NLTKインスタンス動作1.2


直接コード:
1、実例テスト1
# -*- coding: UTF-8 -*-
# !/usr/python/bin
# Filename:NltkTest42
'''              '''
from __future__ import division
import nltk
from nltk.book import *
import time
import datetime

class NltkTest42:
    def __init__(self, text, sent):
        self.text = text
        self.sent = sent
        print self.text
        print self.sent

    def SomeTests(self):
        '''          '''
        print self.sent
        print [w for w in self.sent if len(w) < 4]
        print [w for w in self.sent if len(w) <= 4]
        print [w for w in self.sent if len(w) == 4]
        print [w for w in self.sent if len(w) != 4]
        print sorted([w for w in set(self.text) if w.endswith('ableness')])
        print sorted([term for term in set(self.text) if 'gnt' in term])
        sorted([item for item in set(self.text) if item.istitle()])
        sorted([item for item in set(self.sent) if item.isdigit()])
        [len(w) for w in self.text]
        [w.upper() for w in self.text]
        tricky = sorted([w for w in set(self.text) if 'cie' in w or 'cei' in w])
        for word in tricky:
            if (len(word) > 10):
                print word


nt42 = NltkTest42(text1, sent7)
starttime = datetime.datetime.now()
print 'Start at:'
print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
nt42.SomeTests()
endtime = datetime.datetime.now()
print 'Finish at:'
print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
print '      %d ' % (endtime - starttime).seconds
print '       ,         ?'


#         
# s.startswith(t)    s     t   
# s.endswith(t)    s     t   
# t in s                    s      t
# s.islower()         s              
# s.isupper()         s              
# s.isalpha()          s            
# s.isalnum()         s               
# s.isdigit()            s            
# s.istitle()              s        (s            )

2、実例テスト2
# -*- coding: UTF-8 -*-
#!/user/python/bin
#filename:Nltk_test091902   //             
import nltk
from nltk.book import *
class NltkTest38:
    def __init__(self,text):
        self.text=text
        print self.text
    def FreqAnalyse(self,queryStr):
        '''          TOP50      '''
        fdist=FreqDist(self.text)
        vocabulary =fdist.keys()
        hapaxesWord = fdist.hapaxes()
        #   
        print hapaxesWord[:50]
        #   
        print vocabulary[:50]
        #  ,False      
        fdist.plot(50,cumulative=False)
        print fdist[queryStr]
    def LongWord(self):
        '''      15   '''
        voc=set(self.text)
        #    15  
        longWords=[word for word in voc if len(word)>15]
        print 'longword:'
        print sorted(longWords)
    def CheckUseless(self):
        '''          '''
        fdist =FreqDist(self.text)
        print '       '
        print sorted([word for word in set(self.text) if len(word)>7 and fdist[word]>7])
    def BigramsCheck(self):
        '''                '''
        #        
        print '   '
        print bigrams(['more','is','said','than','done'])      #          ,      
        #      
        print(self.text.collocations())              #                           
    def Others(self):
        '''      '''
        fdist=FreqDist([len(word) for word in self.text])
        print fdist.keys()
        print(fdist.items())
        print fdist[fdist.max()]
        print fdist.freq(fdist.max())
        fdist.tabulate()
        fdist.plot()
nt38=NltkTest38(text1)
nt38.FreqAnalyse('whale')
nt38.LongWord()
nt38.CheckUseless()
nt38.BigramsCheck()
nt38.Others()