pythonは1つの文章の中で最も多く現れる10の単語を探し出します

970 ワード

#!/usr/bin/python
#Filename: readlinepy.py

import sys,re
urldir=r"C:\python27\a.txt"
distone={}
numTen=[]

#         
f=open(urldir,'r')
for line in f.readlines():
#        
    line = re.sub('\W'," ",line)    
    lineone=line.split()
    for keyone in lineone:
        if not distone.get(keyone):
            distone[keyone]=1            
        else:
            distone[keyone]+=1
f.close()

#   10        

for v in distone.values():
    if v not in numTen:
        numTen.append(v)
numTen.sort()
numTen=numTen[-10:]

#       ,         。      ,              。

distone = sorted(distone.iteritems(),key=lambda d:d[1],reverse = True )

#    ,          。

for i in distone:
    if i[1] in numTen:
        print i