Pythonはファイルの中の単語の個数を求めて、平均の長さ、最も多い5つの単語が現れます
1291 ワード
ファイルの中でthe total number of unique wordsを求めて、The average length of all words in the text,the top five most commonly used words in the text
#!/usr/bin/python
# -*- coding: UTF-8 -*-
def getText():
txt= open('Rental.txt','rb',encoding='UTF-8').read()
#while open('Rental.txt','rb') as f:
#txt = f.readline()
txt = txt.lower()
# print(txt)
for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_‘{|}~1234567890':
txt = txt.replace(ch, " ")
return txt
hamletTxt = getText()
words = hamletTxt.split()
counts = {}
for word in words:
counts[word] = counts.get(word,0) + 1
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True)
t=set(items)
#print(t)
#print(items[0])
#print(len(t))
num=0
sum=0
print("the total number of unique words in the {}
".format(len(t)))
for i in range(len(items)):
word, count = items[i]
t=len(word)
sum=t*count+sum
num+=count
#print(num)
print("The average length of all words in the text is {}
".format((sum/num)))
print("the top five most commonly used words in the text ")
for i in range(5):
word, count = items[i]
print ("{0:<10}{1:>5}".format(word, count))