Python--私の音楽の上ですべての歌曲をよじ登って、ワードクロードで詞雲を生成します。
3748 ワード
参考記事:http://blog.csdn.net/fontthrone/article/details/72775865
requestsライブラリの勉強:https://zhuanlan.zhihu.com/p/20410446
爬虫類の原理紹介:http://www.cnblogs.com/zhaof/p/6898138.html
干物が起きろ
requestsライブラリの勉強:https://zhuanlan.zhihu.com/p/20410446
爬虫類の原理紹介:http://www.cnblogs.com/zhaof/p/6898138.html
干物が起きろ
import requests
import re
from bs4 import BeautifulSoup
# user-Agent http://www.cnblogs.com/rwxwsblog/p/4575894.html
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) Chrome/50.0.2661.102'}
# payload = {'wd': 'GitHub'} # GitHub
url = 'http://bd.kuwo.cn/mingxing/%E6%AF%9B%E4%B8%8D%E6%98%93.htm' #
res = requests.get(url, headers=headers)
# print(res.text) #
# 1 :
website1 = 'http://bd.kuwo.cn' #
url = 'http://bd.kuwo.cn/mingxing/%E6%AF%9B%E4%B8%8D%E6%98%93.htm ' #
page = requests.get(url) #get
# print(page.text) # (html )
repr1 = r'/yinyue/\d+' # “/yinyue/ ”
LaternUrls = re.findall(repr1,res.text)[2:] # ,
# print(LaternUrls) # ['/yinyue/27101590', '/yinyue/27101590',。。。],
repr2 = r'lId\d+' #
....
singText = '' #
singList = [] #
for url in LaternUrls:
url = website1 + url # url
page = requests.get(url)
page.encoding = 'utf-8'
bs = BeautifulSoup(page.text,'html.parser') #URL
id1 = re.findall(repr2,page.text) # lId( ) id
# print(id1,'ididiididididiididid') #["lId1","lId2","lId3","lId4".......]
for i in id1:
ciju = bs.find(id="%s"%i).get_text() # id
singText +=ciju
singList.append(ciju)
print(singText)
print(len(singList))
# ,
import jieba #
ExceptChars = ['
','\r
',', ','(',')'] #
L = [x for x in jieba.cut(singText) if x not in ExceptChars] #
print(L)
words_split_space = ' '.join(L) #
print(words_split_space,len(words_split_space),'wwww')
#
import matplotlib.pyplot as plt
from wordcloud import WordCloud,ImageColorGenerator
import numpy as np
import PIL.Image as Image
imgShape = 'SmallCat.jpg' #
img = Image.open(imgShape)
w,h = img.size #
coloring = np.array(Image.open(imgShape)) #
my_wordcloud = WordCloud(background_color="white", max_words=2000, mask=coloring,max_font_size=40, random_state=42,font_path="C:/Windows/Fonts/simkai.ttf")
my_wordcloud.generate(words_split_space)
image_colors = ImageColorGenerator(coloring)
plt.imshow(my_wordcloud)
plt.axis("off")
plt.figure()
plt.imshow(my_wordcloud.recolor(color_func=image_colors),interpolation="bilinear",cmap=plt.cm.ocean) #cmap , ocean 。
plt.axis("off")
plt.figure()
#
plt.imshow(coloring, cmap=plt.cm.gray,interpolation="bilinear") #interpolation
plt.axis("off")
plt.show()
:
・