Python--私の音楽の上ですべての歌曲をよじ登って、ワードクロードで詞雲を生成します。

3748 ワード

参考記事:http://blog.csdn.net/fontthrone/article/details/72775865
requestsライブラリの勉強:https://zhuanlan.zhihu.com/p/20410446
爬虫類の原理紹介:http://www.cnblogs.com/zhaof/p/6898138.html
干物が起きろ
import requests
import re
from bs4 import BeautifulSoup

#          user-Agent    http://www.cnblogs.com/rwxwsblog/p/4575894.html
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) Chrome/50.0.2661.102'}
# payload = {'wd': 'GitHub'}  #        GitHub
url = 'http://bd.kuwo.cn/mingxing/%E6%AF%9B%E4%B8%8D%E6%98%93.htm'  #     
res = requests.get(url,  headers=headers)
# print(res.text)   #      
# 1 :         
website1 = 'http://bd.kuwo.cn'   #   
url = 'http://bd.kuwo.cn/mingxing/%E6%AF%9B%E4%B8%8D%E6%98%93.htm ' #     
page = requests.get(url)  #get      
# print(page.text)  #      (html  )

repr1 = r'/yinyue/\d+'  #             “/yinyue/  ”     
LaternUrls = re.findall(repr1,res.text)[2:]  #    ,                
# print(LaternUrls)  #  ['/yinyue/27101590', '/yinyue/27101590',。。。],
repr2 = r'lId\d+'    #         

.... singText = '' # singList = [] # for url in LaternUrls: url = website1 + url # url page = requests.get(url) page.encoding = 'utf-8' bs = BeautifulSoup(page.text,'html.parser') #URL id1 = re.findall(repr2,page.text) # lId( ) id # print(id1,'ididiididididiididid') #["lId1","lId2","lId3","lId4".......] for i in id1: ciju = bs.find(id="%s"%i).get_text() # id singText +=ciju singList.append(ciju) print(singText) print(len(singList))

#           ,      
import jieba #       

ExceptChars = ['
','\r
',', ','(',')'] # L = [x for x in jieba.cut(singText) if x not in ExceptChars] # print(L) words_split_space = ' '.join(L) # print(words_split_space,len(words_split_space),'wwww')

#        
import matplotlib.pyplot as plt
from wordcloud import WordCloud,ImageColorGenerator
import numpy as np
import PIL.Image as Image

imgShape = 'SmallCat.jpg'               #                  
img = Image.open(imgShape)
w,h = img.size              #         
coloring = np.array(Image.open(imgShape))   #       
my_wordcloud = WordCloud(background_color="white", max_words=2000, mask=coloring,max_font_size=40, random_state=42,font_path="C:/Windows/Fonts/simkai.ttf")
my_wordcloud.generate(words_split_space)
image_colors = ImageColorGenerator(coloring)

plt.imshow(my_wordcloud)
plt.axis("off")
plt.figure()

plt.imshow(my_wordcloud.recolor(color_func=image_colors),interpolation="bilinear",cmap=plt.cm.ocean) #cmap      ,      ocean  。
plt.axis("off")
plt.figure()

#     
plt.imshow(coloring, cmap=plt.cm.gray,interpolation="bilinear")  #interpolation      
plt.axis("off")
plt.show()

Python--              , wordcloud    _ 1