其他分享
首页 > 其他分享> > 五彩歌词1——信息收集

五彩歌词1——信息收集

作者:互联网

歌手信息

通过歌手预览页来获取歌手详情页的ID,并将其和歌手名对应,保存在字典里。
在这里插入图片描述
get_artist_list.py

import requests
from bs4 import BeautifulSoup
import json
import re
import os
from download_lrc import download_artist_lyric


def get_artist(id):
    artist_dic={}
    artist_url = 'https://music.163.com/discover/artist/cat?id='+str(id)
    heads={
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
        'Host': 'music.163.com'
    }
    artist_html = requests.get(artist_url,verify=False,headers=heads).text
    soupObj = BeautifulSoup(artist_html, 'html.parser')
    ul=soupObj.find('ul',{'id':'m-artist-box'})
    a_tags=ul.find_all('a',{'class':'nm nm-icn f-thide s-fc0'})
    for a in a_tags:
        href=str(a.attrs['href'])
        id=href[href.find('=')+1:]
        artist=str(a.text)
        artist_dic[artist]=id
    return artist_dic


def download(id):
    dic=get_artist(id)
    for artist in dic:
        os.mkdir(artist)
        os.chdir(artist)
        download_artist_lyric(dic[artist])
        os.chdir('../')


歌曲信息

通过歌手详情页来获取歌曲ID,并将其和歌曲名对应,保存在字典里。要注意去除歌曲里的特殊字符,避免在用歌曲名命名文件时,特殊歌曲名造成的文件名错误。
在这里插入图片描述
get_music_id.py

import requests
import json
import ast
from bs4 import BeautifulSoup

requests.packages.urllib3.disable_warnings()


def get_music_ids_by_musican_id(singer_id):  # 通过一个歌手id下载这个歌手的所有歌词
    singer_url = 'http://music.163.com/artist?id=' + str(singer_id)
    heads = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
        'Host': 'music.163.com'
    }
    r = requests.get(singer_url, verify=False,headers=heads).text
    print(r)
    soupObj = BeautifulSoup(r, 'html.parser')
    song_ids = soupObj.find('textarea',attrs={'id':'song-list-pre-data'}).text
    print(song_ids)
    jobj = json.loads(song_ids)
    ids = {}
    for item in jobj:
        item['name'] = form_name(item['name'])
        ids[item['name']] = item['id']
    return ids


def form_name(name):
    if name.find('(') != -1:
        name = name[:name.index('(')]
    if name.find('(') != -1:
        name = name[:name.index('(')]
    if name.find('\\') != -1:
        name = name[:name.index('\\')]
    if name.find('/') != -1:
        name = name[:name.index('/')]
    if name.find('【') != -1:
        name = name[:name.index('【')]
    return name


歌词下载

通过歌曲页面下载歌词并保存在其歌手名文件夹下

import requests
import json
import re
import os
from get_music_id import get_music_ids_by_musican_id
requests.packages.urllib3.disable_warnings()


def download_by_music_id(music_id):
    # 根据歌词id下载
    url = 'http://music.163.com/api/song/lyric?' + 'id=' + str(music_id) + '&lv=1&kv=1&tv=-1'
    heads = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
        'Host': 'music.163.com'
    }
    r = requests.get(url, headers=heads)
    json_obj = r.text
    j = json.loads(json_obj)
    try:
        lrc = j['lrc']['lyric']
        pat = re.compile(r'\[.*\]')
        lrc = re.sub(pat, "", lrc)
        lrc = lrc.strip()
    except:
        lrc = ''
    return lrc


def download_artist_lyric(uid):
    music_ids = get_music_ids_by_musican_id(uid)
    print(music_ids)
    for key in music_ids:
        text = download_by_music_id(music_ids[key])
        if text != '':
            try:
                file = open(key + '.txt', 'a', encoding='utf-8')
                file.write(key + '\n')
                file.write(text)
                file.close()
            except:
                print('写入失败')


开始下载

import requests
import json
import re
from bs4 import BeautifulSoup
import os
from get_artist_list import download

# 在下面可以更改歌手预览页ID
download(1001)

标签:name,收集,artist,歌词,ids,music,五彩,import,id
来源: https://blog.csdn.net/ruyihen/article/details/112756142