-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Harsha H L
authored and
Harsha H L
committed
Jul 25, 2022
0 parents
commit 070b1a1
Showing
56 changed files
with
1,400 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.DS_Store | ||
*.pyc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import nltk\n", | ||
"sentence = \"\"\"At eight o'clock on Thursday morning\n", | ||
"... Arthur didn't feel very good.\"\"\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"['At',\n", | ||
" 'eight',\n", | ||
" \"o'clock\",\n", | ||
" 'on',\n", | ||
" 'Thursday',\n", | ||
" 'morning',\n", | ||
" '...',\n", | ||
" 'Arthur',\n", | ||
" 'did',\n", | ||
" \"n't\",\n", | ||
" 'feel',\n", | ||
" 'very',\n", | ||
" 'good',\n", | ||
" '.']" | ||
] | ||
}, | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"tokens = nltk.word_tokenize(sentence)\n", | ||
"tokens" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"[('At', 'IN'),\n", | ||
" ('eight', 'CD'),\n", | ||
" (\"o'clock\", 'NN'),\n", | ||
" ('on', 'IN'),\n", | ||
" ('Thursday', 'NNP'),\n", | ||
" ('morning', 'NN')]" | ||
] | ||
}, | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"tagged = nltk.pos_tag(tokens)\n", | ||
"tagged[0:6]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from nltk.corpus import stopwords" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.12" | ||
}, | ||
"vscode": { | ||
"interpreter": { | ||
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e" | ||
} | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# NLTK |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# FLASK_APP=app.py FLASK_ENV=development flask run | ||
import text_preprocessing | ||
from flask import Flask | ||
from flask import Flask, flash, redirect, render_template, request, session, jsonify | ||
import json | ||
|
||
app = Flask(__name__) | ||
|
||
# @app.route("/") | ||
# def home(): | ||
# return render_template("h.html") | ||
|
||
|
||
@app.route("/experiment") | ||
def experiment(): | ||
objects = [] # a Python object (dict): | ||
new=[] | ||
check_box=[] | ||
f = open("static/text/salt_analysis.txt", "r") | ||
para= f.read() | ||
#length = text_preprocessing.sen() #var is the no of sentences in the paragraph | ||
qwe=text_preprocessing.main() # qwe={(name:testtube,pos:up),(name:beaker,pos:down)} | ||
var = text_preprocessing.sen() | ||
apparatus = text_preprocessing.apparatus() | ||
print("this is var",var) | ||
for i in range(var): #if return render_template is within this for loop only the objects of first sentence are displayed | ||
print("this is newest",qwe[i]) #qwe[i] is each sentence | ||
for q in qwe[i]: | ||
print("this is individual objects in each sent",q) | ||
objects.append(json.dumps(q)) | ||
new.append(objects) | ||
check_box.append(objects) | ||
objects=[] | ||
return render_template("experimentPage.html", objs = new, para=para,abc=check_box, instruments = apparatus) | ||
|
||
|
||
@app.route("/experiment1") | ||
def experiment1(): | ||
objects = [] # a Python object (dict): | ||
new=[] | ||
check_box=[] | ||
f = open("static/text/basic_radical.txt", "r") | ||
para= f.read() | ||
#length = text_preprocessing.sen() #var is the no of sentences in the paragraph | ||
qwe=text_preprocessing.main1() # qwe={(name:testtube,pos:up),(name:beaker,pos:down)} | ||
var = text_preprocessing.sen1() | ||
apparatus = text_preprocessing.apparatus() | ||
print("this is var",var) | ||
for i in range(var): #if return render_template is within this for loop only the objects of first sentence are displayed | ||
print("this is newest",qwe[i]) #qwe[i] is each sentence | ||
for q in qwe[i]: | ||
print("this is individual objects in each sent",q) | ||
objects.append(json.dumps(q)) | ||
new.append(objects) | ||
check_box.append(objects) | ||
objects=[] | ||
return render_template("experimentPage.html", objs = new, para=para,abc=check_box, instruments = apparatus) | ||
|
||
|
||
|
||
@app.route("/experiment2") | ||
def experiment2(): | ||
objects = [] # a Python object (dict): | ||
new=[] | ||
check_box=[] | ||
f = open("static/text/titration.txt", "r") | ||
para= f.read() | ||
#length = text_preprocessing.sen() #var is the no of sentences in the paragraph | ||
qwe=text_preprocessing.main2() # qwe={(name:testtube,pos:up),(name:beaker,pos:down)} | ||
var = text_preprocessing.sen2() | ||
apparatus = text_preprocessing.apparatus() | ||
print("this is var",var) | ||
for i in range(var): #if return render_template is within this for loop only the objects of first sentence are displayed | ||
print("this is newest",qwe[i]) #qwe[i] is each sentence | ||
for q in qwe[i]: | ||
print("this is individual objects in each sent",q) | ||
objects.append(json.dumps(q)) | ||
new.append(objects) | ||
check_box.append(objects) | ||
objects=[] | ||
|
||
return render_template("experimentPage.html", objs = new, para=para,abc=check_box, instruments = apparatus) | ||
|
||
if __name__=='__main__': | ||
app.run(debug=True,port=5000) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
<?xml version="1.0" encoding="utf-8" ?> | ||
|
||
|
||
<data> | ||
<noun> | ||
<obj name="beaker"> beaker </obj> | ||
<obj name="burette"> burette </obj> | ||
<obj name="pipette"> pipette </obj> | ||
<obj name="burner"> bunsenburner </obj> | ||
<obj name="roundbottomflask"> roundbottomflask </obj> | ||
<obj name="flask"> conicalflask </obj> | ||
<obj name="tripodstand"> tripodstand </obj> | ||
<obj name="wiregauze"> wiregauze </obj> | ||
<obj name="tube"> testtube </obj> | ||
<obj name="paper"> paper </obj> | ||
<obj name="precipitate">ppt </obj> | ||
<obj name="dish"> petri </obj> | ||
<obj name="container"> container </obj> | ||
<obj name="gas"> gas </obj> | ||
<obj name="ring"> ring </obj> | ||
<obj name="rod"> rod </obj> | ||
</noun> | ||
<position> | ||
<pos name= "under" x="500" y="-710"> under </pos> | ||
<pos name= "over" x="500" y="-450"> over </pos> | ||
<pos name= "above" x="500" y="-450"> above </pos> | ||
<pos name= "below" x="500" y="-710"> below </pos> | ||
<pos name= "near" x="500" y="-600"> near </pos> | ||
<pos name= "in" x="500" y="-600"> in </pos> | ||
<pos name= "inside" x="500" y="-600"> in </pos> | ||
<pos name= "on" x="500" y="-450"> on </pos> | ||
|
||
</position> | ||
|
||
<image> | ||
<img name="beaker" src="static/beaker.png"> beaker </img> | ||
<img name="burette" src="static/burette.png"> burette </img> | ||
<img name="pipette" src=""> pipette </img> | ||
<img name="burner" src="static/bunsen_burner.png"> bunsenburner </img> | ||
<img name="roundbottomflask" src="static/round_bottom_flask.png"> roundbottomflask </img> | ||
<img name="flask" src="static/conical.png"> conicalflask </img> | ||
<img name="tripodstand" src="static/tripod.png"> tripodstand </img> | ||
<img name="wiregauze" src=""> wiregauze </img> | ||
<img name="tube" src="static/fullTestTube.png"> testtube </img> | ||
<img name="precipitate" src="static/pptTestTube.png"> ppt </img> | ||
<img name="dish" src="static/dish.png"> petri </img> | ||
<img name="container" src="static/beaker.png"> container </img> | ||
<img name="gas" src="static/gas.png"> gas </img> | ||
<img name="ring" src="static/ring.png"> ring </img> | ||
<img name="rod" src="static/rod.png"> rod </img> | ||
</image> | ||
|
||
<verbs> | ||
<verb name="pour" deg="45" > pour </verb> | ||
<verb name="place" deg="0"> place </verb> | ||
<verb name="add"> add </verb> | ||
|
||
</verbs> | ||
<colours> | ||
<colour name="green" hex="#368370"> green </colour> | ||
<colour name="blue" hex="#151871"> blue </colour> | ||
<colour name="black" hex="#565656"> black </colour> | ||
<colour name="brown" hex="#5C4033"> brown </colour> | ||
<colour name="white" hex="#ffffff"> white </colour> | ||
<colour name="yellow" hex="#ffff00"> yellow </colour> | ||
<colour name="purple" hex="#A020F0"> purple </colour> | ||
|
||
</colours> | ||
|
||
</data> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import nltk | ||
#nltk.download('treebank') | ||
import pprint | ||
from sklearn.tree import DecisionTreeClassifier | ||
from sklearn.feature_extraction import DictVectorizer | ||
from sklearn.pipeline import Pipeline | ||
from nltk.tokenize import sent_tokenize, word_tokenize | ||
|
||
tagged_sentences =[ [('chocolate', 'NN'), ('brown', 'JJ'), ('precipitate', 'NN'), ('of', 'IN'), ('copper', 'NN'), ('ferrocyanide', 'NN'), ('is', 'VBZ'), ('formed', 'VBN'), ('in', 'IN'), ('test', 'NN'), ('tube', 'NN'), ('confirming', 'VBG'), ('the', 'DT'), ('presence', 'NN'), ('of', 'IN'), ('cu2', 'NN'), ('ions', 'NNS')] ,[('solution', 'NN'), ('in', 'IN'), ('test', 'NN'), ('tube', 'NN'), ('turns', 'VBZ'), ('green', 'JJ')]] | ||
|
||
print(tagged_sentences[0]) | ||
print("Tagged sentences: ", len(tagged_sentences)) | ||
print("Tagged words:", len(nltk.corpus.treebank.tagged_words())) | ||
def features(sentence, index): | ||
""" sentence: [w1, w2, ...], index: the index of the word """ | ||
return { | ||
'word': sentence[index], | ||
'is_first': index == 0, | ||
'is_last': index == len(sentence) - 1, | ||
'is_capitalized': sentence[index][0].upper() == sentence[index][0], | ||
'is_all_caps': sentence[index].upper() == sentence[index], | ||
'is_all_lower': sentence[index].lower() == sentence[index], | ||
'prefix-1': sentence[index][0], | ||
'prefix-2': sentence[index][:2], | ||
'prefix-3': sentence[index][:3], | ||
'suffix-1': sentence[index][-1], | ||
'suffix-2': sentence[index][-2:], | ||
'suffix-3': sentence[index][-3:], | ||
'prev_word': '' if index == 0 else sentence[index - 1], | ||
'next_word': '' if index == len(sentence) - 1 else sentence[index + 1], | ||
'has_hyphen': '-' in sentence[index], | ||
'is_numeric': sentence[index].isdigit(), | ||
'capitals_inside': sentence[index][1:].lower() != sentence[index][1:] | ||
} | ||
|
||
# pprint.pprint(features(['This', 'is', 'a', 'sentence'], 2)) | ||
|
||
def untag(tagged_sentence): | ||
return [w for w, t in tagged_sentence] | ||
|
||
def transform_to_dataset(tagged_sentences): | ||
X, y = [], [] | ||
|
||
for tagged in tagged_sentences: | ||
for index in range(len(tagged)): | ||
X.append(features(untag(tagged), index)) | ||
y.append(tagged[index][1]) | ||
|
||
return X, y | ||
|
||
# Split the dataset for training and testing | ||
|
||
cutoff = int(.75 * len(tagged_sentences)) | ||
training_sentences = tagged_sentences[:cutoff] | ||
test_sentences = tagged_sentences[cutoff:] | ||
|
||
print( len(training_sentences)) | ||
print( len(test_sentences)) | ||
|
||
X, y = transform_to_dataset(training_sentences) | ||
|
||
clf = Pipeline([ | ||
('vectorizer', DictVectorizer(sparse=False)), | ||
('classifier', DecisionTreeClassifier(criterion='entropy')) | ||
]) | ||
|
||
clf.fit(X[:1], y[:1]) # Use only the first 10K samples if you're running it multiple times. It takes a fair bit :) | ||
|
||
print('Training completed') | ||
|
||
X_test, y_test = transform_to_dataset(test_sentences) | ||
|
||
print("Accuracy:", clf.score(X_test, y_test)) | ||
|
||
def pos_tag(sentence): | ||
tags = clf.predict([features(sentence, index) for index in range(len(sentence))]) | ||
return sentence, tags | ||
|
||
print(pos_tag(word_tokenize('Chocolate brown precipitate of Copper ferrocyanide is formed in test tube confirming the presence of Cu2+ ions.'))) |
Empty file.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.