五彩歌词1——信息收集
作者:互联网
歌手信息
通过歌手预览页来获取歌手详情页的ID,并将其和歌手名对应,保存在字典里。
get_artist_list.py
import requests
from bs4 import BeautifulSoup
import json
import re
import os
from download_lrc import download_artist_lyric
def get_artist(id):
artist_dic={}
artist_url = 'https://music.163.com/discover/artist/cat?id='+str(id)
heads={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
'Host': 'music.163.com'
}
artist_html = requests.get(artist_url,verify=False,headers=heads).text
soupObj = BeautifulSoup(artist_html, 'html.parser')
ul=soupObj.find('ul',{'id':'m-artist-box'})
a_tags=ul.find_all('a',{'class':'nm nm-icn f-thide s-fc0'})
for a in a_tags:
href=str(a.attrs['href'])
id=href[href.find('=')+1:]
artist=str(a.text)
artist_dic[artist]=id
return artist_dic
def download(id):
dic=get_artist(id)
for artist in dic:
os.mkdir(artist)
os.chdir(artist)
download_artist_lyric(dic[artist])
os.chdir('../')
歌曲信息
通过歌手详情页来获取歌曲ID,并将其和歌曲名对应,保存在字典里。要注意去除歌曲里的特殊字符,避免在用歌曲名命名文件时,特殊歌曲名造成的文件名错误。
get_music_id.py
import requests
import json
import ast
from bs4 import BeautifulSoup
requests.packages.urllib3.disable_warnings()
def get_music_ids_by_musican_id(singer_id): # 通过一个歌手id下载这个歌手的所有歌词
singer_url = 'http://music.163.com/artist?id=' + str(singer_id)
heads = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
'Host': 'music.163.com'
}
r = requests.get(singer_url, verify=False,headers=heads).text
print(r)
soupObj = BeautifulSoup(r, 'html.parser')
song_ids = soupObj.find('textarea',attrs={'id':'song-list-pre-data'}).text
print(song_ids)
jobj = json.loads(song_ids)
ids = {}
for item in jobj:
item['name'] = form_name(item['name'])
ids[item['name']] = item['id']
return ids
def form_name(name):
if name.find('(') != -1:
name = name[:name.index('(')]
if name.find('(') != -1:
name = name[:name.index('(')]
if name.find('\\') != -1:
name = name[:name.index('\\')]
if name.find('/') != -1:
name = name[:name.index('/')]
if name.find('【') != -1:
name = name[:name.index('【')]
return name
歌词下载
通过歌曲页面下载歌词并保存在其歌手名文件夹下
import requests
import json
import re
import os
from get_music_id import get_music_ids_by_musican_id
requests.packages.urllib3.disable_warnings()
def download_by_music_id(music_id):
# 根据歌词id下载
url = 'http://music.163.com/api/song/lyric?' + 'id=' + str(music_id) + '&lv=1&kv=1&tv=-1'
heads = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
'Host': 'music.163.com'
}
r = requests.get(url, headers=heads)
json_obj = r.text
j = json.loads(json_obj)
try:
lrc = j['lrc']['lyric']
pat = re.compile(r'\[.*\]')
lrc = re.sub(pat, "", lrc)
lrc = lrc.strip()
except:
lrc = ''
return lrc
def download_artist_lyric(uid):
music_ids = get_music_ids_by_musican_id(uid)
print(music_ids)
for key in music_ids:
text = download_by_music_id(music_ids[key])
if text != '':
try:
file = open(key + '.txt', 'a', encoding='utf-8')
file.write(key + '\n')
file.write(text)
file.close()
except:
print('写入失败')
开始下载
import requests
import json
import re
from bs4 import BeautifulSoup
import os
from get_artist_list import download
# 在下面可以更改歌手预览页ID
download(1001)
标签:name,收集,artist,歌词,ids,music,五彩,import,id 来源: https://blog.csdn.net/ruyihen/article/details/112756142