Skip to content

Commit

Permalink
Merge pull request #605 from ajdapretnar/senti-art
Browse files Browse the repository at this point in the history
[ENH] Sentiment Analysis: Add SentiArt method
  • Loading branch information
lanzagar authored Jan 20, 2021
2 parents 53e0da6 + e7308cd commit aa10616
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 21 deletions.
46 changes: 46 additions & 0 deletions orangecontrib/text/sentiment/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ def read_file(file):
return f.read().split('\n')


def read_pickle(file):
with open(file, 'rb') as f:
return pickle.loads(f.read())


def compute_from_dict(tokens, pos, neg):
scores = []
for doc in tokens:
Expand Down Expand Up @@ -195,6 +200,47 @@ def __setstate__(self, state):
self.__init__(state['language'])


class SentiArtDictionaries(SentimentDictionaries):
server_url = "http://file.biolab.si/files/sentiart/"

def __init__(self):
super().__init__()

def __getitem__(self, language):
filtering_dict = read_pickle(self.localfiles.localpath_download(
f"SentiArt_{language}.pickle"))
return filtering_dict

def supported_languages(self):
regex = "SentiArt_(.*)\.pickle"
supported_languages = set()
for i in self.lang_files:
res = re.fullmatch(regex, i[0])
if res:
supported_languages.add(res.group(1))
return supported_languages


class SentiArt(Sentiment):
sentiments = ('sentiment', 'anger', 'fear', 'disgust', 'happiness',
'sadness', 'surprise')
name = 'SentiArt'

LANGS = {'English': 'EN', 'German': 'DE'}

def __init__(self, language='English'):
self.language = language
self.dictionary = SentiArtDictionaries()[self.LANGS[self.language]]

def get_scores(self, corpus):
scores = []
for doc in corpus.tokens:
score = np.array([list(self.dictionary[word].values()) for word in\
doc if word in self.dictionary]).mean(axis=0)
scores.append(score)
return scores


class CustomDictionaries(Sentiment):
sentiments = ('sentiment',)
name = 'Custom Dictionaries'
Expand Down
10 changes: 8 additions & 2 deletions orangecontrib/text/sentiment/filter_lexicon.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
import fnmatch
import os
import pickle


class FilterSentiment:

@staticmethod
def read_file(file):
with open(file, 'r') as f:
return f.read().split('\n')
if fnmatch.fnmatch(file, '*.pickle'):
with open(file, 'rb') as f:
return pickle.loads(f.read())
else:
with open(file, 'r') as f:
return f.read().split('\n')


class SloSentiment(FilterSentiment):
Expand Down
9 changes: 8 additions & 1 deletion orangecontrib/text/tests/test_sentiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from orangecontrib.text.corpus import Corpus
from orangecontrib.text.sentiment import LiuHuSentiment, VaderSentiment, \
MultiSentiment
MultiSentiment, SentiArt


class LiuHuTest(unittest.TestCase):
Expand Down Expand Up @@ -93,5 +93,12 @@ def setUp(self):
self.new_cols = 1


class SentiArtTest(LiuHuTest):
def setUp(self):
self.corpus = Corpus.from_file('deerwester')
self.method = SentiArt()
self.new_cols = 7


if __name__ == "__main__":
unittest.main()
65 changes: 47 additions & 18 deletions orangecontrib/text/widgets/owsentimentanalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from Orange.widgets.widget import OWWidget, Msg
from orangecontrib.text import Corpus, preprocess
from orangecontrib.text.sentiment import VaderSentiment, LiuHuSentiment, \
MultiSentiment, CustomDictionaries, MultisentimentDictionaries
MultiSentiment, CustomDictionaries, SentiArt, MultisentimentDictionaries, \
SentiArtDictionaries
from orangecontrib.text.widgets.owpreprocess import FileLoader, _to_abspath
from orangecontrib.text.preprocess import PreprocessorList
from orangewidget.utils.filedialogs import RecentPath
Expand All @@ -31,17 +32,20 @@ class Outputs:
autocommit = settings.Setting(True)
liu_language = settings.Setting('English')
multi_language = settings.Setting('English')
senti_language = settings.Setting('English')
want_main_area = False
resizing_enabled = False

METHODS = [
LiuHuSentiment,
VaderSentiment,
MultiSentiment,
SentiArt,
CustomDictionaries
]
LANG = ['English', 'Slovenian']
MULTI_LANG = MultiSentiment.LANGS.keys()
SENTI_LANG = SentiArt.LANGS.keys()
DEFAULT_NONE = None

class Warning(OWWidget.Warning):
Expand All @@ -58,6 +62,7 @@ def __init__(self):
self.pp_corpus = None
self.pos_file = None
self.neg_file = None
self.senti_dict = None

self.form = QGridLayout()
self.method_box = box = gui.radioButtonsInBox(
Expand All @@ -77,6 +82,12 @@ def __init__(self):
sendSelectedValue=True,
contentsLength=10, items=[''],
callback=self._method_changed)
self.senti_art = gui.appendRadioButton(box, "SentiArt",
addToLayout=False)
self.senti_box = gui.comboBox(None, self, 'senti_language',
sendSelectedValue=True,
contentsLength=10, items=[''],
callback=self._method_changed)
self.custom_list = gui.appendRadioButton(box, "Custom dictionary",
addToLayout=False)

Expand All @@ -97,9 +108,12 @@ def __init__(self):
self.form.addWidget(self.multi_sent, 2, 0, Qt.AlignLeft)
self.form.addWidget(QLabel("Language:"), 2, 1, Qt.AlignRight)
self.form.addWidget(self.multi_box, 2, 2, Qt.AlignRight)
self.form.addWidget(self.custom_list, 3, 0, Qt.AlignLeft)
self.form.addWidget(self.senti_art, 3, 0, Qt.AlignLeft)
self.form.addWidget(QLabel("Language:"), 3, 1, Qt.AlignRight)
self.form.addWidget(self.senti_box, 3, 2, Qt.AlignRight)
self.form.addWidget(self.custom_list, 4, 0, Qt.AlignLeft)
self.filegrid = QGridLayout()
self.form.addLayout(self.filegrid, 4, 0, 1, 3)
self.form.addLayout(self.filegrid, 5, 0, 1, 3)
self.filegrid.addWidget(QLabel("Positive:"), 0, 0, Qt.AlignRight)
self.filegrid.addWidget(self.__posfile_loader.file_combo, 0, 1)
self.filegrid.addWidget(self.__posfile_loader.browse_btn, 0, 2)
Expand All @@ -109,9 +123,11 @@ def __init__(self):
self.filegrid.addWidget(self.__negfile_loader.browse_btn, 1, 2)
self.filegrid.addWidget(self.__negfile_loader.load_btn, 1, 3)

self.senti_dict = MultisentimentDictionaries()
self.update_multi_box()
self.senti_online = self.senti_dict.online
self.multi_dict = MultisentimentDictionaries()
self.senti_dict = SentiArtDictionaries()
self.update_box(self.multi_box, self.multi_dict, MultiSentiment)
self.update_box(self.senti_box, self.senti_dict, SentiArt)
self.online = self.multi_dict.online
self.check_sentiment_online()

ac = gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit',
Expand Down Expand Up @@ -140,24 +156,31 @@ def __set_lx_path(self, path: RecentPath, paths: List[RecentPath] = []):
self.__negfile_loader.set_current_file(_to_abspath(path))
self.neg_file = self.__negfile_loader.get_current_file()

def update_multi_box(self):
if self.senti_dict.supported_languages():
self.multi_box.clear()
items = sorted([key for (key, value) in MultiSentiment.LANGS.items()
if value in self.senti_dict.supported_languages()])
self.multi_box.addItems(items)
self.multi_box.setCurrentIndex(items.index("English"))
def update_box(self, box, dictionary, method):
box.clear()
supported_languages = dictionary.supported_languages()
if supported_languages:
items = sorted([key for (key, value) in method.LANGS.items()
if value in supported_languages])
box.addItems(items)
box.setCurrentIndex(items.index("English"))

def check_sentiment_online(self):
current_state = self.senti_dict.online
if self.senti_online != current_state:
self.update_multi_box()
self.senti_online = current_state
current_state = self.multi_dict.online
if self.online != current_state:
self.update_box(self.multi_box, self.multi_dict, MultiSentiment)
self.update_box(self.senti_box, self.senti_dict, SentiArt)
self.online = current_state

self.Warning.senti_offline.clear()
self.Warning.senti_offline_no_lang.clear()
if not current_state and self.method_idx == 2:
if self.senti_dict.supported_languages():
if self.multi_dict.supported_languages():
self.Warning.senti_offline()
else:
self.Warning.senti_offline_no_lang()
if not current_state and self.method_idx == 3:
if self.senti_dict_dict.supported_languages():
self.Warning.senti_offline()
else:
self.Warning.senti_offline_no_lang()
Expand Down Expand Up @@ -192,6 +215,12 @@ def commit(self):
return
else:
out = method(language=self.multi_language).transform(corpus)
elif method.name == 'SentiArt':
if not self.senti_dict.online:
self.Warning.senti_offline()
self.update_box(self.senti_box, self.senti_dict, SentiArt)
return
out = method(language=self.senti_language).transform(corpus)
elif method.name == 'Custom Dictionaries':
out = method(self.pos_file, self.neg_file).transform(corpus)
if (self.pos_file and not self.neg_file) or \
Expand Down
5 changes: 5 additions & 0 deletions orangecontrib/text/widgets/tests/test_owsentimentanalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ def test_output(self):
out_corpus = self.get_output(self.widget.Outputs.corpus)
self.assertEqual(len(out_corpus.domain), len(self.corpus.domain) + 1)

# test SentiArt
self.widget.senti_art.click()
out_corpus = self.get_output(self.widget.Outputs.corpus)
self.assertEqual(len(out_corpus.domain), len(self.corpus.domain) + 7)

# test liu hu
self.widget.liu_hu.click()
out_corpus = self.get_output(self.widget.Outputs.corpus)
Expand Down

0 comments on commit aa10616

Please sign in to comment.