0


爬虫很难吗?手把手酷我音乐解析


Nodejs酷我爬虫参考


1.排行榜列表

  1. 打开 kuwo.cn, 浏览器F12 监听网络请求,刷新一次页面
  2. 根据网页信息,定位接口地址。
  3. F12界面全局搜索(ctrl+shift+F)排行榜中的向云端 (你看到什么就搜什么 不固定)

在这里插入图片描述

  1. 点击搜索结果,跳转至关键词数据所在页面,在此页面搜索(ctrl+F)向云端,结论:数据来源于页面window._NUXT_.data.bang对象。如下图:在这里插入图片描述在这里插入图片描述 页面用的是Nuxt服务器端渲染,看到a,b,c,d,e,f...了吧,这段js被混淆了
  2. 数据提取。可能首先想到的是用正则匹配,那有没有办法办法直接获取到 __NUXT__ 对象呢?既然是js,那就想办法运行这段js。
  3. 技术选型。那就用 nodejs 吧! nodejs 爬虫实战,看代码:
let url ='http://www.kuwo.cn/';//模拟访问首页let result =awaitget(url);//get方法为request封装//正则匹配那段jsvar mc = result.match(/<script>.*?<\/script>/gi);//console.log(mc);var __NUXT__;//定义接收变量let sc = mc[0]??'';//一系列的替换if(sc){
        sc = sc.replace('<script>','');
        sc = sc.replace('</script>','');
        sc = sc.replace('window.','');//由于不是浏览器没有window对象,就替换为空
        sc = sc.replace(/\r/gi,'');
        sc = sc.replace(/\n/gi,'');//console.log(sc);}let js =eval(sc);//用nodejs的方便之处,eval搞定,直接把结果赋值给__NUXT__//console.log(js);let bangs = __NUXT__.data[0].bang;

结果:

[{"leader":"酷我热歌榜","num":"300","name":"酷我热歌榜","pic":"https://img4.kuwo.cn/star/albumcover/120/1/29/1934979845.jpg","id":"16","pub":"2023-07-14","musicList":[{"musicrid":"MUSIC_279292599","barrage":"0","ad_type":"","artist":"小霞&海洋Bo","mvpayinfo":{"play":0,"vid":0,"down":0},"pic":"https://img4.kuwo.cn/star/albumcover/500/1/29/1934979845.jpg","isstar":0,"rid":279292599,"duration":251,"score100":"93","ad_subtype":"0","content_type":"0","track":1,"hasmv":0,"releaseDate":"2023-06-29","album":"向云端","albumid":38777035,"pay":"16515324","artistid":8854278,"albumpic":"https://img4.kuwo.cn/star/albumcover/500/1/29/1934979845.jpg","originalsongtype":0,"isListenFee":false,"pic120":"https://img4.kuwo.cn/star/albumcover/120/1/29/1934979845.jpg","name":"向云端","online":1,

2.排行榜榜单详情

  1. 点击首页排行榜更多,监听网络请求在这里插入图片描述

详情地址为:

GET => https://www.kuwo.cn/api/www/bang/bang/musicList?bangId=93&pn=1&rn=20&httpsStatus=1&reqId=16fffd81-21fa-11ee-9cdf-25abfdc1606c&plat=web_www&from=
bangId=93//上个步骤中的id
pn=1//页码
rn=20//每页多少条
reqId=16fffd81-21fa-11ee-9cdf-25abfdc1606//可有可无

同样用

nodejs

模拟这个请求,结果:

{"success":false,"message":"CSRF token Invalid!","now":"2023-07-14T05:49:44.720Z"}
  1. 爬虫的关键部分来了,请求头模拟。简单来说,就是用程序把请求做的像浏览器发出的一样。通过分析请求头,以及反复测试,最终得出以下两个头信息为必备:(把以下所有头信息都模拟更好)在这里插入图片描述

  2. F12 界面全局搜Cross 定位出算法:在这里插入图片描述 由上面代码可以看出来,Cross 是通过以 Hm_token 为参数,调用A()方法生成的。打断点调试A():在这里插入图片描述

  3. 梳理这两个参数的关系:

    Created with Raphaël 2.3.0

    Hm_token->EzAGdH3z6e3SPteBeHHSAYNKiBTGf4QP

    A()<-9b0e5cae88dc5782dc85c789a0f3d21f0e08dc69

    Cross-> d30d2c87eac9797e2376acadbabba058

Cross

是由

Hm_token

加密处理得到。

**由经验得知:

Cross

=

md5(sha1(Hm_token))

**

常用的加密算法:

md5 aes des sha

不知道数据用什么加密方式就全局搜,或者一个一个加密方法试。最后才选择单步调试~

  1. 加密算法有了,寻找 Hm_token :在这里插入图片描述 每次请求,都会往cookie里面写入Hm_token ,用程序模拟并获取cookie:
asyncfunctionhmToken(){let url =`https://www.kuwo.cn/`;let result =await superagent.get(url);let cookie = result.header['set-cookie'][0]??'';//console.log(cookie);let mc = cookie.match(/Hm_token=(.*?);/);let hm = mc[1]??'';return hm;}
  1. 排行榜榜单详情最终代码:
asyncfunctiongetBangMusicList(id, pn, rn){let hm =awaithmToken();
    console.log(hm)var obj = crypto.createHash('sha1');
    obj.update(hm);let hmSha = obj.digest('hex');
    console.log(hmSha);

    obj = crypto.createHash('md5');
    obj.update(hmSha);var cross = obj.digest('hex');//hex是十六进制
    console.log(cross);let url =`http://www.kuwo.cn/api/www/bang/bang/musicList?bangId=${id}&pn=${pn}&rn=${rn}&httpsStatus=1&plat=web_www&reqId=`;var result =awaitget(url,{},{Cross: cross,cookie:`Hm_token=${hm}`});var obj =JSON.parse(result);return obj;}

3.根据rid获取歌曲mp3地址

  1. 点首歌,监听网络请求:在这里插入图片描述
  2. 程序模拟:
asyncfunctiongetAudio(id){let url ='https://www.kuwo.cn/api/v1/www/music/playUrl?mid='+ id +'&type=music&httpsStatus=1&reqId=&plat=web_www&from=';let result =awaitget(url);//console.log(result);return result;}======================================getAudio(279292599) result 结果:
{"code":200,"msg":"success","reqId":"b9ca2ee89fec9ad131105ce494d05a2b","data":{"url":"https://other-web-nf01-sycdn.kuwo.cn/b09d144c5526bbb95e8d1166f1c8db8a/64b0fa4a/resource/n2/60/65/902137489.mp3?from$unkown&plat$web_www"},"profileId":"site","curTime":1689320311016,"success":true}

4.2023.07.17更新,请求头由“Cross”改为“Secret”

在这里插入图片描述

F12

定位

Secret

:
在这里插入图片描述
单步调试这个

h()

方法:
在这里插入图片描述

分析完毕,看具体实现:

  1. 新增一个加密函数:(来源:某度)
functionencrypt(str, pwd){if(pwd ==null|| pwd.length <=0){alert("Please enter a password with which to encrypt the message.");returnnull;}var prand ="";for(var i =0; i < pwd.length; i++){
        prand += pwd.charCodeAt(i).toString();}var sPos = Math.floor(prand.length /5);var mult =parseInt(prand.charAt(sPos)+ prand.charAt(sPos *2)+ prand.charAt(sPos *3)+ prand.charAt(sPos *4)+ prand.charAt(sPos *5));var incr = Math.ceil(pwd.length /2);var modu = Math.pow(2,31)-1;if(mult <2){alert("Algorithm cannot find a suitable hash. Please choose a different password. \nPossible considerations are to choose a more complex or longer password.");returnnull;}var salt = Math.round(Math.random()*1000000000)%100000000;
    prand += salt;while(prand.length >10){
        prand =(parseInt(prand.substring(0,10))+parseInt(prand.substring(10, prand.length))).toString();}
    prand =(mult * prand + incr)% modu;var enc_chr ="";var enc_str ="";for(var i =0; i < str.length; i++){
        enc_chr =parseInt(str.charCodeAt(i)^ Math.floor((prand / modu)*255));if(enc_chr <16){
            enc_str +="0"+ enc_chr.toString(16);}else enc_str += enc_chr.toString(16);
        prand =(mult * prand + incr)% modu;}
    salt = salt.toString(16);while(salt.length <8) salt ="0"+ salt;
    enc_str += salt;return enc_str;}
  1. 新增获取请求头方法:
asyncfunctiongetHmIuvt(){let url =`https://www.kuwo.cn/`;var result =await superagent.get(url);let cookie = result.header['set-cookie'][0]??'';//console.log(cookie);if(cookie){
        cookie = cookie.replace(/;.*/ig,'');}let sp = cookie.split('=');let data ={key: sp[0]??'',token: sp[1]??''};return data;}
  1. 完整代码示例(获取榜单列表):
asyncfunctiongetBangMusicList(id, pn, rn){let iuvt =awaitgetHmIuvt();let secret =encrypt(iuvt.token, iuvt.key);let url =`http://www.kuwo.cn/api/www/bang/bang/musicList?bangId=${id}&pn=${pn}&rn=${rn}&httpsStatus=1&plat=web_www&reqId=`;var result =awaitget(url,{},{Secret: secret,cookie:`${iuvt.key}=${iuvt.token}`});var obj =JSON.parse(result);return obj;}

5.根据rid获取VIP/高音质音乐链接

rid:281472652 音乐Url,请打开以下链接↓↓↓↓

https://nmobi.kuwo.cn/mobi.s?f=kuwo&q=NI8S5evAnmGldi4g47EsqtfDbGsJckckbTQQd2LAgmDPITUWSd51OkjHRFj6xHPEQxNN6u+tD3K2e3HYhbE4U0pUYwqjd2kt
结果是有过期时间的哦~每打开一次会获取最新

format=flac
bitrate=2000
url=http://ll.sycdn.kuwo.cn/0f304a22b30673380bb9646d6737302d/64b512bc/resource/s1/77/85/2165046264.flac?bitrate$2000&format$flac&type$convert_url2
sig=9298802900736320721
rid=281472652
type=0

rid:228908 音乐Url,请打开以下链接↓↓↓↓

http://nmobi.kuwo.cn/mobi.s?f=kuwo&q=NI8S5evAnmGldi4g47EsqtfDbGsJckckbTQQd2LAgmDPITUWSd51OkjHRFj6xHPEQxNN6u+tD3KtKpZdbgOTWA==
结果是有过期时间的哦~每打开一次会获取最新

format=flac
bitrate=2000
url=http://sy.sycdn.kuwo.cn/188862a666b9fff315836ac0dc168441/64b51570/resource/s1/96/9/3483822377.flac?bitrate$2000&format$flac&type$convert_url2
sig=14962903177885333588
rid=260839262
type=1

**

注意:电脑端访问:F12 将浏览器切换为手机模式才可访问!!!!!;或者发送到手机端直接访问即可!

**

6.根据rid获取歌词

asyncfunctionmusicLrc(rid){let url =`https://m.kuwo.cn/newh5/singles/songinfoandlrc?musicId=${rid}`;let result =await superagent.get(url);if(result?.text){let data =JSON.parse(result.text);return data.data.lrclist;}returnnull;}

rid:228908 歌词 musicLrc(228908)

[{"lineLyric":"晴天-周杰伦","time":"0.0"},{"lineLyric":"词:周杰伦","time":"1.88"},{"lineLyric":"曲:周杰伦","time":"3.76"},{"lineLyric":"编曲:周杰伦","time":"5.65"},{"lineLyric":"制作人:周杰伦","time":"7.53"},{"lineLyric":"合声:周杰伦","time":"9.42"},{"lineLyric":"合声编写:周杰伦","time":"11.3"},{"lineLyric":"吉他:蔡科俊Again","time":"13.18"},{"lineLyric":"贝斯:陈任佑","time":"15.07"},{"lineLyric":"鼓:陈柏州","time":"16.95"},{"lineLyric":"录音助理:刘勇志","time":"18.84"},{"lineLyric":"录音工程:杨瑞代Alfa Studio","time":"20.73"},{"lineLyric":"混音工程:杨大纬杨大纬录音工作室","time":"27.72"},{"lineLyric":"故事的小黄花","time":"29.22"},{"lineLyric":"从出生那年就飘着","time":"32.67"},{"lineLyric":"童年的荡秋千","time":"36.15"},{"lineLyric":"随记忆一直晃到现在","time":"39.6"},{"lineLyric":"Re So So Si Do Si La","time":"44.55"},{"lineLyric":"So La Si Si Si Si La Si La So","time":"45.72"},{"lineLyric":"吹着前奏望着天空","time":"49.74"},{"lineLyric":"我想起花瓣试着掉落","time":"53.1"},{"lineLyric":"为你翘课的那一天","time":"56.58"},{"lineLyric":"花落的那一天","time":"58.8"},{"lineLyric":"教室的那一间","time":"60.51"},{"lineLyric":"我怎么看不见","time":"62.16"},{"lineLyric":"消失的下雨天","time":"64.01"},{"lineLyric":"我好想再淋一遍","time":"65.76"},{"lineLyric":"没想到失去的勇气我还留着","time":"69.75"},{"lineLyric":"好想再问一遍","time":"75.81"},{"lineLyric":"你会等待还是离开","time":"77.88"},{"lineLyric":"刮风这天我试过握着你手","time":"84.75"},{"lineLyric":"但偏偏雨渐渐大到我看你不见","time":"90.51"},{"lineLyric":"还要多久我才能在你身边","time":"98.7"},{"lineLyric":"等到放晴的那天也许我会比较好一点","time":"105.39"},{"lineLyric":"从前从前有个人爱你很久","time":"112.740005"},{"lineLyric":"但偏偏风渐渐把距离吹得好远","time":"118.53"},{"lineLyric":"好不容易又能再多爱一天","time":"126.72"},{"lineLyric":"但故事的最后你好像还是说了拜拜","time":"133.38"},{"lineLyric":"为你翘课的那一天","time":"154.59"},{"lineLyric":"花落的那一天","time":"156.81"},{"lineLyric":"教室的那一间","time":"158.52"},{"lineLyric":"我怎么看不见","time":"160.32"},{"lineLyric":"消失的下雨天","time":"162.03"},{"lineLyric":"我好想再淋一遍","time":"163.83"},{"lineLyric":"没想到失去的勇气我还留着","time":"167.97"},{"lineLyric":"好想再问一遍","time":"172.14"},{"lineLyric":"你会等待还是离开","time":"175.98"},{"lineLyric":"刮风这天我试过握着你手","time":"182.82"},{"lineLyric":"但偏偏雨渐渐大到我看你不见","time":"188.37"},{"lineLyric":"还要多久我才能在你身边","time":"196.8"},{"lineLyric":"等到放晴的那天也许我会比较好一点","time":"203.45999"},{"lineLyric":"从前从前有个人爱你很久","time":"210.87"},{"lineLyric":"偏偏风渐渐把距离吹得好远","time":"216.99"},{"lineLyric":"好不容易又能再多爱一天","time":"224.79001"},{"lineLyric":"但故事的最后你好像还是说了拜拜","time":"231.45"},{"lineLyric":"刮风这天我试过握着你手","time":"238.95"},{"lineLyric":"但偏偏雨渐渐大到我看你不见","time":"241.77"},{"lineLyric":"还要多久我才能够在你身边","time":"245.43"},{"lineLyric":"等到放晴那天也许我会比较好一点","time":"248.94"},{"lineLyric":"从前从前有个人爱你很久","time":"252.69"},{"lineLyric":"但偏偏雨渐渐把距离吹得好远","time":"255.78"},{"lineLyric":"好不容易又能再多爱一天","time":"259.26"},{"lineLyric":"但故事的最后你好像还是说了拜","time":"262.74"}]

声明:仅用于学习交流,切勿用于其他用途~


本文转载自: https://blog.csdn.net/u012981972/article/details/131717687
版权归原作者 宁不凡啊 所有, 如有侵权,请联系我们删除。

“爬虫很难吗?手把手酷我音乐解析”的评论:

还没有评论