forked from krishnaik06/Natural-Language-Processing
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLemmatization.py
43 lines (39 loc) · 2.55 KB
/
Lemmatization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
paragraph = """Thank you all so very much. Thank you to the Academy.
Thank you to all of you in this room. I have to congratulate
the other incredible nominees this year. The Revenant was
the product of the tireless efforts of an unbelievable cast
and crew. First off, to my brother in this endeavor, Mr. Tom
Hardy. Tom, your talent on screen can only be surpassed by
your friendship off screen … thank you for creating a t
ranscendent cinematic experience. Thank you to everybody at
Fox and New Regency … my entire team. I have to thank
everyone from the very onset of my career … To my parents;
none of this would be possible without you. And to my
friends, I love you dearly; you know who you are. And lastly,
I just want to say this: Making The Revenant was about
man's relationship to the natural world. A world that we
collectively felt in 2015 as the hottest year in recorded
history. Our production needed to move to the southern
tip of this planet just to be able to find snow. Climate
change is real, it is happening right now. It is the most
urgent threat facing our entire species, and we need to work
collectively together and stop procrastinating. We need to
support leaders around the world who do not speak for the
big polluters, but who speak for all of humanity, for the
indigenous people of the world, for the billions and
billions of underprivileged people out there who would be
most affected by this. For our children’s children, and
for those people out there whose voices have been drowned
out by the politics of greed. I thank you all for this
amazing award tonight. Let us not take this planet for
granted. I do not take tonight for granted. Thank you so very much."""
sentences = nltk.sent_tokenize(paragraph)
lemmatizer = WordNetLemmatizer()
# Lemmatization
for i in range(len(sentences)):
words = nltk.word_tokenize(sentences[i])
words = [lemmatizer.lemmatize(word) for word in words if word not in set(stopwords.words('english'))]
sentences[i] = ' '.join(words)