其他分享
首页 > 其他分享> > 爬虫实现qq音乐歌单无vip批量下载

爬虫实现qq音乐歌单无vip批量下载

作者:互联网

爬虫实现qq音乐歌单无vip批量下载

分享歌单链接
电脑网页无法获取歌单完信息,所以需要借助手机下载网页文件
利用下载网站实现批量下载

music.py

import requests
from fake_useragent import UserAgent
from lxml import html
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities


# 读取qq音乐分享文件
def get_html_file():
    file = input('请输入html文件地址(项目目录下则直接输入文件名包括后缀名):')
    with open(file, 'r', encoding='utf-8') as f:
        html_ = f.read()
    return html_


# 从html中获取歌曲的信息(歌名和歌手)
def get_music_name_and_singer(html_):
    etree = html.etree
    e = etree.HTML(html_)
    # 不同歌曲歌单
    # music_infos = e.xpath('//p[@class="song_list__desc"]/text()')
    # 同一作者的歌单
    music_infos = e.xpath('//span[@class="song_list__txt"]/text()')

    # print(music_info)
    return music_infos


# 从下载网站获取歌曲下载地址
def get_download_url(music_info):
    url = 'https://www.musictool.top/?name={}&type=qq'
    url = url.format(str(music_info))
    option = webdriver.ChromeOptions()
    option.add_argument('headless')
    dcap = dict(DesiredCapabilities.CHROME)
    dcap['chrome.page.settings.userAgent'] = UserAgent().chrome
    driver = webdriver.Chrome(chrome_options=option, desired_capabilities=dcap)
    driver.get(url)
    sleep(6)
    response = driver.page_source
    # print(response)
    etree = html.etree
    e = etree.HTML(response)
    if ((e.xpath('//a[@id="j-src-btn"]/@href'))) == None:
        return None
    else:
        download_url = ''.join(e.xpath('//a[@id="j-src-btn"]/@href'))
        print(download_url)
        return download_url


# 下载歌曲,保存歌曲
def download_music(download_url, name):
    if download_url == None:
        print(name + '---下载失败')
    headers = {
        'User-Agent': UserAgent().random
    }
    response = requests.get(download_url, headers=headers)
    if response.status_code == 200:
        with open('music/' + name + '.mp3', 'wb') as f:
            f.write(response.content)
        print('下载完成---' + name)
        print('--------------------')
    else:
        print(name + '---下载失败')


# 主方法,遍历歌曲信息执行下载歌曲
def main():
    html_ = get_html_file()
    music_infos = get_music_name_and_singer(html_)
    print(music_infos)
    for music_info in music_infos:
        print('开始下载---' + music_info)
        #   music_info = '马良/孙茜茹 往后余生'
        download_url = get_download_url(music_info)
        music_info = music_info.replace(' · ', ' ').replace('/', '_').replace('?', '').strip()
        download_music(download_url, music_info)


if __name__ == '__main__':
    main()


说明:

1.歌曲下载网站使用的时Ajax异步请求,所以不能通过常规方法的爬取歌曲
 2.采用selenuim来获取网页完整的代码,从而获取歌曲的下载地址
 3.爬取过程中ua很重要的,一定要设置
 4.访问速度一定不要太快,所以采用了sleep()方法来减慢爬取速度,防止被检测到电脑操作,从而报错

标签:qq,info,name,url,html,vip,歌单,music,download
来源: https://www.cnblogs.com/zq98/p/15028019.html