Pythonはファイルの中の単語の個数を求めて、平均の長さ、最も多い5つの単語が現れます

1291 ワード

ファイルの中でthe total number of unique wordsを求めて、The average length of all words in the text,the top five most commonly used words in the text
#!/usr/bin/python
# -*- coding: UTF-8 -*-
def getText():
    txt= open('Rental.txt','rb',encoding='UTF-8').read()
    #while open('Rental.txt','rb') as f:
    #txt = f.readline()
    txt = txt.lower()
   # print(txt)
    for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_‘{|}~1234567890':
        txt = txt.replace(ch, " ")
    return txt
hamletTxt = getText()
words  = hamletTxt.split()
counts = {}
for word in words:
    counts[word] = counts.get(word,0) + 1
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True)
t=set(items)
#print(t)
#print(items[0])
#print(len(t))
num=0
sum=0
print("the total number of unique words in the {}
".format(len(t))) for i in range(len(items)): word, count = items[i] t=len(word) sum=t*count+sum num+=count #print(num) print("The average length of all words in the text is {}
".format((sum/num))) print("the top five most commonly used words in the text ") for i in range(5): word, count = items[i] print ("{0:<10}{1:>5}".format(word, count))