forked from SebastianRestrepoA/NLP-codes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
text_processing_examples.py
36 lines (22 loc) · 1.59 KB
/
text_processing_examples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from text_processing_fns import *
import pandas as pd
path = './KnowledgeBase.xlsx'
vKnowledgeBase = pd.read_excel(path)
example_1 = count_words(vKnowledgeBase['Utterance'])
example_2 = remove_stopwords(vKnowledgeBase['Utterance'])
example_3 = remove_characters(vKnowledgeBase['Utterance'])
example_4 = lowercase_transform(vKnowledgeBase['Utterance'])
example_5 = lemmatization_transform(vKnowledgeBase['Utterance'])
vPathTaggedFile = 'C:/Users/Administrator/Documents/CHATBOT SOFY/Hipotecario/programas de gobierno/Refinamiento 1/' \
'Evaluacion inicial/Tagger_evaluacion_inicial.xlsx'
example_6 = fn_calculate_total_utterances_tagged(vPathTaggedFile)
vPathKnowledgeBase = 'C:/Users/Administrator/Documents/CHATBOT SOFY/Hipotecario/programas de gobierno/Refinamiento 1/' \
'Curacion/Programas de Gobierno.xlsx'
vPathSuccesFailFile = 'C:/Users/Administrator/Documents/CHATBOT SOFY/Hipotecario/programas de gobierno/Refinamiento 1/' \
'metrics/Iter_4/Kfolds/success_fail_confidence_programas_gobierno.xlsx'
example_7 = fn_calculate_total_utterances_per_intent(vPathKnowledgeBase, plot=True)
fn_utterances_similarity_between_intents(vPathKnowledgeBase, 0.4, 'similarity_analysis_programas')
example_8 = fn_calculate_word_frequency_per_intents(vPathKnowledgeBase, generate_excel=True)
fn_word_frequency_analysis_fail_utterances(vPathKnowledgeBase, vPathSuccesFailFile)
vPathNoAddUtterances = 'C:/Users/Administrator/Documents/CHATBOT SOFY/Hipotecario/programas de gobierno/Refinamiento 1/' \
'Curacion/utterances_no_agregadas.xlsx'