import requests
from lxml import etree
from urllib import request
import os
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0'
}
url = 'https://www.ximalaya.com/lishi/4164479/'
response = requests.get(url,headers=headers)
html_str = response.text
html_ele = etree.HTML(html_str)
href_list = html_ele.xpath('//ul[@class="dOi2"]/li/div[2]/a/@href')
if not os.path.exists('mjx'):
os.mkdir('mjx')
for href in href_list:
next_href = href.split('/')[-1]
xiangqing_url = 'https://www.ximalaya.com/revision/play/tracks?trackIds=' + str(next_href)
print(xiangqing_url)
response = requests.get(xiangqing_url,headers=headers)
json_dict = response.json()
src_str = json_dict['data']['tracksForAudioPlay'][0]['src']
trackName = json_dict['data']['tracksForAudioPlay'][0]['trackName']
request.urlretrieve(src_str,'mjx/'+ trackName + '.m4a')