Python 爬取网易云音乐排行榜
# -*- coding: utf-8 -*-
"""
 * @Date : 2020-09-29 15:16
 * @Auth : xiaoshuai.zhu
 * @File :Python 爬取网易云音乐排行榜.py
 * @IDE :PyCharm
 * @Version 1.0
"""
import os
from time import sleep
import requests
import bs4
import json
import openpyxl
from openpyxl.chart import Reference, BarChart, PieChart
from openpyxl.chart.marker import DataPoint
from openpyxl.styles import Font

def open_url(url, IP=None):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"
    }
    if (IP != None):
        # proxy = {'http': 'http://' + str(IP)}
        proxy = {'http': 'http://' + "1.85.5.66:8060"}
        print(f"IP:{proxy}")
        res = requests.get(url, headers=headers, proxies=proxy)
    else:
        res = requests.get(url, headers=headers)
    return res

def getTopList(url):
    """
        获取排行榜
        :return: 返回一个字典,name: 歌曲名称,artists: 歌曲作者
    """
    global rawjsonloads
    TopList = []
    res = open_url(url)
    topSongJson = json.loads(res.text)
    if topSongJson["code"] == 405:
        print("操作频繁,请稍后再试")
        return
    for item in topSongJson["result"]["tracks"]:
        songInfo = {}
        songInfo["id"] = item["id"]
        songInfo["name"] = item["name"]
        songInfo["artists"] = item["artists"][0]["name"]
        songInfo["url"] = "https://music.163.com/#/song?id=" + str(item["id"])
        TopList.append(songInfo)
    return TopList

def getSongConnect(url):
    """
        获取歌曲评论
        :return: 返回一个字典,user: 评论用户,content: 评论内容
    """
    nameID = url.split('=')[1]
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
        "referer": "https://music.163.com"
    }
    data = {
        "params": "BEHhnfg5V0r7dbXfxcEUEs/WFPXShib778VClI5bvPwG0OeuIH7HPwU17zgk+fBm1uvFQXAyKyi/wGjWR/pHe22WldVOhBGliKCOei+8rbKeESgze+JoG/uIq8Rhzsum7zywdx6OjK4hqm21mO30HLKIfNc9ZJ0GEdcgdS+VCXQOdmccxwtot/qcmOIb4FKy",
        "encSecKey": "4ce93300df4e8eccfcf6a390f8b6985b10ac903e4af7764cbd73b177a16e1b927942156c6c2220f47eea62c7ba5329069b6d96c5a316b36f1f81dcbfd06f3a2e86088fac64d6c059c8a59dce42ce07dc59ef2dd546c038bea917ef871387efe427cb1a4181d37dcade75cfcaafa57dea14d7cb95de4e11875b03871dd4983236"
    }
    targetUrl = "https://music.163.com/api/v1/resource/comments/R_SO_4_{}?csrf_token=".format(
        nameID)
    res = requests.get(targetUrl, headers=headers, data=data)
    # 解析json
    AllJson = json.loads(res.text)
    hotComments = AllJson['hotComments']
    # 输出
    ConnectList = []
    for item in hotComments:
        ConnectInfo = {}
        ConnectInfo["user"] = item['user']['nickname']
        ConnectInfo["content"] = item['content']
        ConnectList.append(ConnectInfo)
    return ConnectList

def createExcelForm(form, name, titles, TopList):
    form.create_sheet(name)
    # form1 = form.active
    form1 = form[name]

    form1.title = name
    # 生成标题
    for item in titles:
        form1.cell(1, titles.index(item) + 1).value = item
    # 编辑表格
    for item in TopList:
        text = [TopList.index(item) + 1, item['name'], item['artists'], f"=COUNTIF(C:C,C{TopList.index(item) + 2})"]
        form1.append(text)
    # 设置列宽
    form1.column_dimensions['B'].width = 40.0
    form1.column_dimensions['C'].width = 20.0
    # 创建图表
    pie = PieChart()
    labels = Reference(form1, min_col=3, min_row=2, max_row=102)
    data = Reference(form1, min_col=4, min_row=2, max_row=102)
    pie.add_data(data, titles_from_data=True)
    pie.set_categories(labels)
    pie.title = "歌手频率"

    # Cut the first slice out of the pie
    slice = DataPoint(idx=0, explosion=20)
    pie.series[0].data_points = [slice]
    form1.add_chart(pie, "G5")

    # values = Reference(form1, min_col=3, min_row=2, max_col=4, max_row=100)
    # chart = PieChart()
    # chart.add_data(values)
    # form1.add_chart(chart, "E15")
    # 设置字体
    font = Font(name=u'微软雅黑')
    # 第 1 到 110 行
    for x in range(1, 110):
        form1.cell(row=x, column=1).font = font
        form1.cell(row=x, column=2).font = font
        form1.cell(row=x, column=3).font = font
        form1.cell(row=x, column=4).font = font

def main():
    form = openpyxl.Workbook()
    titles = ["序号", "名称", "作者", "歌手频率"]

    print("正在爬取 云音乐飙升榜")
    TopList = getTopList("https://music.163.com/api/playlist/detail?id=19723756")
    if TopList:
        createExcelForm(form, "云音乐飙升榜", titles, TopList)
    else:
        print("爬取 云音乐飙升榜 失败")

    sleep(1)
    print("正在爬取 云音乐新歌榜")
    TopList = getTopList("https://music.163.com/api/playlist/detail?id=3779629")
    if TopList:
        createExcelForm(form, "云音乐新歌榜", titles, TopList)
    else:
        print("爬取 云音乐新歌榜 失败")

    sleep(1)
    print("正在爬取 网易原创歌曲榜")
    TopList = getTopList("https://music.163.com/api/playlist/detail?id=2884035")
    if TopList:
        createExcelForm(form, "网易原创歌曲榜", titles, TopList)
    else:
        print("爬取 网易原创歌曲榜 失败")

    sleep(1)
    print("正在爬取 云音乐热歌榜")
    TopList = getTopList("https://music.163.com/api/playlist/detail?id=3778678")
    if TopList:
        createExcelForm(form, "云音乐热歌榜", titles, TopList)
    else:
        print("爬取 云音乐热歌榜 失败")

    # 保存表格
    print("正在保存表格")
    try:
        # 删除默认表
        form.remove_sheet(form.get_sheet_by_name("Sheet"))
        form.save("网易云音乐排行.xlsx")
        form.close()
        print("表格保存成功,已存放于本目录下")
        os.popen("网易云音乐排行.xlsx")
    except Exception:
        print("表格保存失败")

    # -------------------------------------------------------------------------------------------
    # -------------------------------------------------------------------------------------------
    # 云音乐飙升榜
    # print("\n云音乐飙升榜")
    # TopList = getTopList("https://music.163.com/discover/toplist")
    # print(TopList)

    # 云音乐新歌榜
    # print("\n云音乐新歌榜")
    # TopList = getTopList("https://music.163.com/discover/toplist?id=3779629")
    # for item in TopList:
    #     print(f"\n名称:{item['name']},作者:{item['artists']}")
    # # 网易原创歌曲榜
    # print("\n网易原创歌曲榜")
    # TopList = getTopList("https://music.163.com/discover/toplist?id=2884035")
    # for item in TopList:
    #     print(f"\n名称:{item['name']},作者:{item['artists']}")
    # # 云音乐热歌榜
    # print("\n云音乐热歌榜")
    # TopList = getTopList("https://music.163.com/discover/toplist?id=3778678")
    # for item in TopList:
    #     print(f"\n名称:{item['name']},作者:{item['artists']}")

if __name__ == '__main__':
    main()
上一篇
下一篇