NLTKインスタンス動作1.2
3957 ワード
直接コード:
1、実例テスト1
2、実例テスト2
1、実例テスト1
# -*- coding: UTF-8 -*-
# !/usr/python/bin
# Filename:NltkTest42
''' '''
from __future__ import division
import nltk
from nltk.book import *
import time
import datetime
class NltkTest42:
def __init__(self, text, sent):
self.text = text
self.sent = sent
print self.text
print self.sent
def SomeTests(self):
''' '''
print self.sent
print [w for w in self.sent if len(w) < 4]
print [w for w in self.sent if len(w) <= 4]
print [w for w in self.sent if len(w) == 4]
print [w for w in self.sent if len(w) != 4]
print sorted([w for w in set(self.text) if w.endswith('ableness')])
print sorted([term for term in set(self.text) if 'gnt' in term])
sorted([item for item in set(self.text) if item.istitle()])
sorted([item for item in set(self.sent) if item.isdigit()])
[len(w) for w in self.text]
[w.upper() for w in self.text]
tricky = sorted([w for w in set(self.text) if 'cie' in w or 'cei' in w])
for word in tricky:
if (len(word) > 10):
print word
nt42 = NltkTest42(text1, sent7)
starttime = datetime.datetime.now()
print 'Start at:'
print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
nt42.SomeTests()
endtime = datetime.datetime.now()
print 'Finish at:'
print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
print ' %d ' % (endtime - starttime).seconds
print ' , ?'
#
# s.startswith(t) s t
# s.endswith(t) s t
# t in s s t
# s.islower() s
# s.isupper() s
# s.isalpha() s
# s.isalnum() s
# s.isdigit() s
# s.istitle() s (s )
2、実例テスト2
# -*- coding: UTF-8 -*-
#!/user/python/bin
#filename:Nltk_test091902 //
import nltk
from nltk.book import *
class NltkTest38:
def __init__(self,text):
self.text=text
print self.text
def FreqAnalyse(self,queryStr):
''' TOP50 '''
fdist=FreqDist(self.text)
vocabulary =fdist.keys()
hapaxesWord = fdist.hapaxes()
#
print hapaxesWord[:50]
#
print vocabulary[:50]
# ,False
fdist.plot(50,cumulative=False)
print fdist[queryStr]
def LongWord(self):
''' 15 '''
voc=set(self.text)
# 15
longWords=[word for word in voc if len(word)>15]
print 'longword:'
print sorted(longWords)
def CheckUseless(self):
''' '''
fdist =FreqDist(self.text)
print ' '
print sorted([word for word in set(self.text) if len(word)>7 and fdist[word]>7])
def BigramsCheck(self):
''' '''
#
print ' '
print bigrams(['more','is','said','than','done']) # ,
#
print(self.text.collocations()) #
def Others(self):
''' '''
fdist=FreqDist([len(word) for word in self.text])
print fdist.keys()
print(fdist.items())
print fdist[fdist.max()]
print fdist.freq(fdist.max())
fdist.tabulate()
fdist.plot()
nt38=NltkTest38(text1)
nt38.FreqAnalyse('whale')
nt38.LongWord()
nt38.CheckUseless()
nt38.BigramsCheck()
nt38.Others()