Skip to content

Commit 30a1512

Browse files
committed
ENH Use matutils.corpus2dense instead of looping
1 parent cd69e60 commit 30a1512

File tree

1 file changed

+4
-8
lines changed

1 file changed

+4
-8
lines changed

ch04/blei_lda.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from __future__ import print_function
99
from wordcloud import create_cloud
1010
try:
11-
from gensim import corpora, models
11+
from gensim import corpora, models, matutils
1212
except:
1313
print("import gensim failed.")
1414
print()
@@ -44,15 +44,11 @@
4444
# We first identify the most discussed topic, i.e., the one with the
4545
# highest total weight
4646

47-
# First, we need to sum up the weights across all the documents
48-
weight = np.zeros(model.num_topics)
49-
for doc in corpus:
50-
for col, val in model[doc]:
51-
weight[col] += val
52-
# As a reasonable alternative, we could have used the log of val:
53-
# weight[col] += np.log(val)
47+
topics = matutils.corpus2dense(model[corpus], num_terms=model.num_topics)
48+
weight = topics.sum(1)
5449
max_topic = weight.argmax()
5550

51+
5652
# Get the top 64 words for this topic
5753
# Without the argument, show_topic would return only 10 words
5854
words = model.show_topic(max_topic, 64)

0 commit comments

Comments
 (0)