Skip to content

Commit

Permalink
Initial upload
Browse files Browse the repository at this point in the history
  • Loading branch information
Harsha H L authored and Harsha H L committed Jul 25, 2022
0 parents commit 070b1a1
Show file tree
Hide file tree
Showing 56 changed files with 1,400 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.DS_Store
*.pyc
110 changes: 110 additions & 0 deletions .ipynb_checkpoints/nlp-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import nltk\n",
"sentence = \"\"\"At eight o'clock on Thursday morning\n",
"... Arthur didn't feel very good.\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['At',\n",
" 'eight',\n",
" \"o'clock\",\n",
" 'on',\n",
" 'Thursday',\n",
" 'morning',\n",
" '...',\n",
" 'Arthur',\n",
" 'did',\n",
" \"n't\",\n",
" 'feel',\n",
" 'very',\n",
" 'good',\n",
" '.']"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tokens = nltk.word_tokenize(sentence)\n",
"tokens"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('At', 'IN'),\n",
" ('eight', 'CD'),\n",
" (\"o'clock\", 'NN'),\n",
" ('on', 'IN'),\n",
" ('Thursday', 'NNP'),\n",
" ('morning', 'NN')]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tagged = nltk.pos_tag(tokens)\n",
"tagged[0:6]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from nltk.corpus import stopwords"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"vscode": {
"interpreter": {
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# NLTK
85 changes: 85 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# FLASK_APP=app.py FLASK_ENV=development flask run
import text_preprocessing
from flask import Flask
from flask import Flask, flash, redirect, render_template, request, session, jsonify
import json

app = Flask(__name__)

# @app.route("/")
# def home():
# return render_template("h.html")


@app.route("/experiment")
def experiment():
objects = [] # a Python object (dict):
new=[]
check_box=[]
f = open("static/text/salt_analysis.txt", "r")
para= f.read()
#length = text_preprocessing.sen() #var is the no of sentences in the paragraph
qwe=text_preprocessing.main() # qwe={(name:testtube,pos:up),(name:beaker,pos:down)}
var = text_preprocessing.sen()
apparatus = text_preprocessing.apparatus()
print("this is var",var)
for i in range(var): #if return render_template is within this for loop only the objects of first sentence are displayed
print("this is newest",qwe[i]) #qwe[i] is each sentence
for q in qwe[i]:
print("this is individual objects in each sent",q)
objects.append(json.dumps(q))
new.append(objects)
check_box.append(objects)
objects=[]
return render_template("experimentPage.html", objs = new, para=para,abc=check_box, instruments = apparatus)


@app.route("/experiment1")
def experiment1():
objects = [] # a Python object (dict):
new=[]
check_box=[]
f = open("static/text/basic_radical.txt", "r")
para= f.read()
#length = text_preprocessing.sen() #var is the no of sentences in the paragraph
qwe=text_preprocessing.main1() # qwe={(name:testtube,pos:up),(name:beaker,pos:down)}
var = text_preprocessing.sen1()
apparatus = text_preprocessing.apparatus()
print("this is var",var)
for i in range(var): #if return render_template is within this for loop only the objects of first sentence are displayed
print("this is newest",qwe[i]) #qwe[i] is each sentence
for q in qwe[i]:
print("this is individual objects in each sent",q)
objects.append(json.dumps(q))
new.append(objects)
check_box.append(objects)
objects=[]
return render_template("experimentPage.html", objs = new, para=para,abc=check_box, instruments = apparatus)



@app.route("/experiment2")
def experiment2():
objects = [] # a Python object (dict):
new=[]
check_box=[]
f = open("static/text/titration.txt", "r")
para= f.read()
#length = text_preprocessing.sen() #var is the no of sentences in the paragraph
qwe=text_preprocessing.main2() # qwe={(name:testtube,pos:up),(name:beaker,pos:down)}
var = text_preprocessing.sen2()
apparatus = text_preprocessing.apparatus()
print("this is var",var)
for i in range(var): #if return render_template is within this for loop only the objects of first sentence are displayed
print("this is newest",qwe[i]) #qwe[i] is each sentence
for q in qwe[i]:
print("this is individual objects in each sent",q)
objects.append(json.dumps(q))
new.append(objects)
check_box.append(objects)
objects=[]

return render_template("experimentPage.html", objs = new, para=para,abc=check_box, instruments = apparatus)

if __name__=='__main__':
app.run(debug=True,port=5000)
71 changes: 71 additions & 0 deletions data.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
<?xml version="1.0" encoding="utf-8" ?>


<data>
<noun>
<obj name="beaker"> beaker </obj>
<obj name="burette"> burette </obj>
<obj name="pipette"> pipette </obj>
<obj name="burner"> bunsenburner </obj>
<obj name="roundbottomflask"> roundbottomflask </obj>
<obj name="flask"> conicalflask </obj>
<obj name="tripodstand"> tripodstand </obj>
<obj name="wiregauze"> wiregauze </obj>
<obj name="tube"> testtube </obj>
<obj name="paper"> paper </obj>
<obj name="precipitate">ppt </obj>
<obj name="dish"> petri </obj>
<obj name="container"> container </obj>
<obj name="gas"> gas </obj>
<obj name="ring"> ring </obj>
<obj name="rod"> rod </obj>
</noun>
<position>
<pos name= "under" x="500" y="-710"> under </pos>
<pos name= "over" x="500" y="-450"> over </pos>
<pos name= "above" x="500" y="-450"> above </pos>
<pos name= "below" x="500" y="-710"> below </pos>
<pos name= "near" x="500" y="-600"> near </pos>
<pos name= "in" x="500" y="-600"> in </pos>
<pos name= "inside" x="500" y="-600"> in </pos>
<pos name= "on" x="500" y="-450"> on </pos>

</position>

<image>
<img name="beaker" src="static/beaker.png"> beaker </img>
<img name="burette" src="static/burette.png"> burette </img>
<img name="pipette" src=""> pipette </img>
<img name="burner" src="static/bunsen_burner.png"> bunsenburner </img>
<img name="roundbottomflask" src="static/round_bottom_flask.png"> roundbottomflask </img>
<img name="flask" src="static/conical.png"> conicalflask </img>
<img name="tripodstand" src="static/tripod.png"> tripodstand </img>
<img name="wiregauze" src=""> wiregauze </img>
<img name="tube" src="static/fullTestTube.png"> testtube </img>
<img name="precipitate" src="static/pptTestTube.png"> ppt </img>
<img name="dish" src="static/dish.png"> petri </img>
<img name="container" src="static/beaker.png"> container </img>
<img name="gas" src="static/gas.png"> gas </img>
<img name="ring" src="static/ring.png"> ring </img>
<img name="rod" src="static/rod.png"> rod </img>
</image>

<verbs>
<verb name="pour" deg="45" > pour </verb>
<verb name="place" deg="0"> place </verb>
<verb name="add"> add </verb>

</verbs>
<colours>
<colour name="green" hex="#368370"> green </colour>
<colour name="blue" hex="#151871"> blue </colour>
<colour name="black" hex="#565656"> black </colour>
<colour name="brown" hex="#5C4033"> brown </colour>
<colour name="white" hex="#ffffff"> white </colour>
<colour name="yellow" hex="#ffff00"> yellow </colour>
<colour name="purple" hex="#A020F0"> purple </colour>

</colours>

</data>

79 changes: 79 additions & 0 deletions pos_tagging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import nltk
#nltk.download('treebank')
import pprint
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction import DictVectorizer
from sklearn.pipeline import Pipeline
from nltk.tokenize import sent_tokenize, word_tokenize

tagged_sentences =[ [('chocolate', 'NN'), ('brown', 'JJ'), ('precipitate', 'NN'), ('of', 'IN'), ('copper', 'NN'), ('ferrocyanide', 'NN'), ('is', 'VBZ'), ('formed', 'VBN'), ('in', 'IN'), ('test', 'NN'), ('tube', 'NN'), ('confirming', 'VBG'), ('the', 'DT'), ('presence', 'NN'), ('of', 'IN'), ('cu2', 'NN'), ('ions', 'NNS')] ,[('solution', 'NN'), ('in', 'IN'), ('test', 'NN'), ('tube', 'NN'), ('turns', 'VBZ'), ('green', 'JJ')]]

print(tagged_sentences[0])
print("Tagged sentences: ", len(tagged_sentences))
print("Tagged words:", len(nltk.corpus.treebank.tagged_words()))
def features(sentence, index):
""" sentence: [w1, w2, ...], index: the index of the word """
return {
'word': sentence[index],
'is_first': index == 0,
'is_last': index == len(sentence) - 1,
'is_capitalized': sentence[index][0].upper() == sentence[index][0],
'is_all_caps': sentence[index].upper() == sentence[index],
'is_all_lower': sentence[index].lower() == sentence[index],
'prefix-1': sentence[index][0],
'prefix-2': sentence[index][:2],
'prefix-3': sentence[index][:3],
'suffix-1': sentence[index][-1],
'suffix-2': sentence[index][-2:],
'suffix-3': sentence[index][-3:],
'prev_word': '' if index == 0 else sentence[index - 1],
'next_word': '' if index == len(sentence) - 1 else sentence[index + 1],
'has_hyphen': '-' in sentence[index],
'is_numeric': sentence[index].isdigit(),
'capitals_inside': sentence[index][1:].lower() != sentence[index][1:]
}

# pprint.pprint(features(['This', 'is', 'a', 'sentence'], 2))

def untag(tagged_sentence):
return [w for w, t in tagged_sentence]

def transform_to_dataset(tagged_sentences):
X, y = [], []

for tagged in tagged_sentences:
for index in range(len(tagged)):
X.append(features(untag(tagged), index))
y.append(tagged[index][1])

return X, y

# Split the dataset for training and testing

cutoff = int(.75 * len(tagged_sentences))
training_sentences = tagged_sentences[:cutoff]
test_sentences = tagged_sentences[cutoff:]

print( len(training_sentences))
print( len(test_sentences))

X, y = transform_to_dataset(training_sentences)

clf = Pipeline([
('vectorizer', DictVectorizer(sparse=False)),
('classifier', DecisionTreeClassifier(criterion='entropy'))
])

clf.fit(X[:1], y[:1]) # Use only the first 10K samples if you're running it multiple times. It takes a fair bit :)

print('Training completed')

X_test, y_test = transform_to_dataset(test_sentences)

print("Accuracy:", clf.score(X_test, y_test))

def pos_tag(sentence):
tags = clf.predict([features(sentence, index) for index in range(len(sentence))])
return sentence, tags

print(pos_tag(word_tokenize('Chocolate brown precipitate of Copper ferrocyanide is formed in test tube confirming the presence of Cu2+ ions.')))
Empty file added requirements.txt
Empty file.
Binary file added static/beaker.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/beaker_pour.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/bunsen_burner.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/bunsen_burner1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/burette.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/cat1_exp1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/cat1_exp2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/cat1_exp3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/conical.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/conical1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/container_pour.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/dish.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/emptyTestTube.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/flask_pour.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/fullTestTube.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/gas.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/gas1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added static/halfTestTube.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 070b1a1

Please sign in to comment.