Skip to content

Commit

Permalink
Merge pull request selfteaching#891 from shanchongyue/master
Browse files Browse the repository at this point in the history
Create d6_exercise_stats_word.py
  • Loading branch information
CnPyYang authored Mar 27, 2019
2 parents 67e301c + b44ecaf commit cf65762
Showing 1 changed file with 56 additions and 0 deletions.
56 changes: 56 additions & 0 deletions 19100203/shanchongyue/d6_exercise_stats_word.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
text = '''
Do one thing at a time, and do well.
  一次只做一件事,做到最好!
Never forget to say “thanks”.
  永远不要忘了说“谢谢”!
Keep on going never give up.
  勇往直前, 决不放弃!
'''
dict1 = {}
dict2 = {}
dict3 = {}
dict4 = {}


def stats_text_en(text): #创建一个名为stats_text_en的函数
import re
text = re.sub("[^A-Za-z]", " ", text.strip()) #只保留英文
list1 = re.split(r"\W+",text) #将字符串text转换为列表list1,只保留单词为list1中的元素
while '' in list1: #删除list1中为空的列表元素
list1.remove('')
for i in list1:
dict1.setdefault(i,list1.count(i)) #将列表中的单词及单词的出现次数,分别赋值给dict1的键和值
tup1 = sorted(dict1.items(),key = lambda items:items[1],reverse = True) #将dict1按照value值从大到小排列,并将结果赋给元组tup1
for tup1 in tup1:
dict2[tup1[0]] = dict1[tup1[0]]
return dict2
print(stats_text_en(text))
str = ''



#封装一个统计中文字频的函数

cndic={} #创建一个空的字典

def stats_text_cn(checkstr): #定义检索中文函数
for i in checkstr: # 如果字典里有该单词则加1,否则添加入字典
if u'\u4e00' <= i <= u'\u9fff': #判断一个unicode是否是汉字
cndic[i] = checkstr.count(i)
return cndic

#一个中英混杂的文本
text = '''
Do one thing at a time, and do well.
  一次只做一件事,做到最好!
Never forget to say “thanks”.
  永远不要忘了说“谢谢”!
Keep on going never give up.
  勇往直前, 决不放弃!
'''

stats_text_cn(text) #调用检索中文频次的函数

cndic=sorted(cndic.items(),key=lambda item:item[1],reverse = True) #检索完毕后对字典进行按值从大到小排序

print(cndic)

0 comments on commit cf65762

Please sign in to comment.