forked from selfteaching/selfteaching-python-camp
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request selfteaching#891 from shanchongyue/master
Create d6_exercise_stats_word.py
- Loading branch information
Showing
1 changed file
with
56 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
text = ''' | ||
Do one thing at a time, and do well. | ||
一次只做一件事,做到最好! | ||
Never forget to say “thanks”. | ||
永远不要忘了说“谢谢”! | ||
Keep on going never give up. | ||
勇往直前, 决不放弃! | ||
''' | ||
dict1 = {} | ||
dict2 = {} | ||
dict3 = {} | ||
dict4 = {} | ||
|
||
|
||
def stats_text_en(text): #创建一个名为stats_text_en的函数 | ||
import re | ||
text = re.sub("[^A-Za-z]", " ", text.strip()) #只保留英文 | ||
list1 = re.split(r"\W+",text) #将字符串text转换为列表list1,只保留单词为list1中的元素 | ||
while '' in list1: #删除list1中为空的列表元素 | ||
list1.remove('') | ||
for i in list1: | ||
dict1.setdefault(i,list1.count(i)) #将列表中的单词及单词的出现次数,分别赋值给dict1的键和值 | ||
tup1 = sorted(dict1.items(),key = lambda items:items[1],reverse = True) #将dict1按照value值从大到小排列,并将结果赋给元组tup1 | ||
for tup1 in tup1: | ||
dict2[tup1[0]] = dict1[tup1[0]] | ||
return dict2 | ||
print(stats_text_en(text)) | ||
str = '' | ||
|
||
|
||
|
||
#封装一个统计中文字频的函数 | ||
|
||
cndic={} #创建一个空的字典 | ||
|
||
def stats_text_cn(checkstr): #定义检索中文函数 | ||
for i in checkstr: # 如果字典里有该单词则加1,否则添加入字典 | ||
if u'\u4e00' <= i <= u'\u9fff': #判断一个unicode是否是汉字 | ||
cndic[i] = checkstr.count(i) | ||
return cndic | ||
|
||
#一个中英混杂的文本 | ||
text = ''' | ||
Do one thing at a time, and do well. | ||
一次只做一件事,做到最好! | ||
Never forget to say “thanks”. | ||
永远不要忘了说“谢谢”! | ||
Keep on going never give up. | ||
勇往直前, 决不放弃! | ||
''' | ||
|
||
stats_text_cn(text) #调用检索中文频次的函数 | ||
|
||
cndic=sorted(cndic.items(),key=lambda item:item[1],reverse = True) #检索完毕后对字典进行按值从大到小排序 | ||
|
||
print(cndic) |