python爬虫——爬取网络各种资源
1.某讯视频
import requests
import re
from tqdm import tqdm
url ="https://apd-vlive.apdcdn.tc.qq.com/defaultts.tc.qq.com/B_JxNyiJmktHRgresXhfyMem1E4_DPhVbhxv28spVNp5Dj6vs6uhjyh7JsYzrUOQcL/svp_50112/ZV6e2op5S_S1AyUVjIbzXsJek1I7zANtM2Tv2peQ2YVY3YFimvlfjsXz1DQmrgxOvXrMl6Vs6HiozYNZAtgUo-JKZKtrgs6Vnubhh-IFRlbEUIcUZOu39XJX7hJt5uDrq9jZ-uScgH0wZi5gJSD03ZA0p0pU32ocepjRtSdPw3Zw-tx5nWAPXVGQZgfcOS3TTPtCNs0qoCwEgtP3z-i0YoIZT-MACU25AB2ILMv_z8HX2bCMw-pYKQ/gzc_1000102_0b53zuabqaaahiae4ebljvrmbtodddfqahca.f322062.ts.m3u8?ver=4"
headers ={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"}
response = requests.get(url=url,headers=headers).text
m3u8_data = re.sub('#.*','',response).split()for ts in tqdm(m3u8_data):
ts_url ='https://ltscsy.qq.com/B_JxNyiJmktHRgresXhfyMeulWsW_l0JzF9NWhW-VqfOrj6vs6uhjyh7JsYzrUOQcL/svp_50112/vDKS4TspZpx8uhYKG9EVBe5I0alPqhW0tx6JBvJ2aS25FDZoNU5KZ6zqkZHI0oluZXeMLWOdHJVJkwU7hTESavdDeIvxTvVGzzDbdV2aXouqP0rqMwh7iS-HBpSSyoJ7-2trKnnldoZQZ49UsJ97yCUsFgW4sYeCBUsR2eKR2-HnO6bayh1rWhDvF63Nr5aLs8_zJIy0ARYOUMGtem6NWCkxgFVaQdLf2-dyEgVe40V1g7FupCtIRw/'+ts
video_data = requests.get(ts_url).content
withopen('葫芦娃.mp4','ab')as f:
f.write(video_data)
2.某音视频
import requests
url ="https://v3-web.douyinvod.com/f32cffe441fd98a917184c59b4c4e876/65f8333a/video/tos/cn/tos-cn-ve-15/oYBBlyMPtABcQwziAatpj9EgFfecM9iB8DhIAw/?a=6383&ch=5&cr=3&dr=0&lr=all&cd=0%7C0%7C0%7C3&cv=1&br=1024&bt=1024&cs=0&ds=4&ft=LjhJEL998xI7uEPmH0P5H4eaciDXt0YbZ_QEe09-mB~D1Inz&mime_type=video_mp4&qs=0&rc=aTs4aWY8aGk2ODZmNWU1ZkBpMzRxdjQ6ZmtzcTMzNGkzM0AxLmJhLjEtNWMxY15fM2MuYSNsbC1ycjRfMGhgLS1kLTBzcw%3D%3D&btag=e00008000&cquery=100a&dy_q=1710761249&feature_id=46a7bb47b4fd1280f3d3825bf2b29388&l=20240318192728651A9F612C610A0B9193"
headers ={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"}
response = requests.get(url=url,headers=headers)
video_data = response.content
withopen('aa.mp4','ab')as f:
f.write(video_data)
3.某站视频
import json
import re
import requests
import os
url ="https://www.bilibili.com/video/BV1kC411a7cn/?spm_id_from=333.1007.tianma.2-2-4.click&vd_source=f4be0a001848558927c3212d18de2626"
headers ={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36","Cookie":"CURRENT_FNVAL=4048; DedeUserID=539138603; DedeUserID__ckMd5=671cb5f4af58c4e5; enable_web_push=DISABLE; buvid3=A9DB3A97-A849-C537-2902-0CA83CA2F72A78091infoc; b_nut=1706950178; i-wanna-go-back=-1; b_ut=5; _uuid=112B10126-29107-64D10-4BF1-610B365B5847376377infoc; buvid_fp_plain=undefined; buvid4=98D90CF7-909F-21A5-857B-E2A5352CB3A970761-024031409-FeaNf0N026PHa1xHiEcvIw%3D%3D; FEED_LIVE_VERSION=V8; header_theme_version=CLOSE; is-2022-channel=1; rpdid=|(RlRRR)lRR0J'u~u|R|mY)J; CURRENT_QUALITY=80; fingerprint=d35797e9afc601e4d1f9c94226939e95; buvid_fp=d35797e9afc601e4d1f9c94226939e95; bp_video_offset_539138603=909006115716464659; b_lsid=1A79ABE1_18E5076F703; bmg_af_switch=1; bmg_src_def_domain=i1.hdslb.com; SESSDATA=23faecec%2C1726298960%2C1a4a3%2A31CjBgBTtVdw4XOBT-_73RNdtnfi3F-w5kEs7_tl50_QHZHhu9sQ025YYZXx4OVxkF7GASVmttUFUxdGdCcnZZT2p0Y0VTRmMzVzhKXzRybzhGYkp4ZTQwQWNIQ256MWNwQjRYRnRLaC0wMlE3eVZ4S294Z3NYWVFjam9zMG5sNGJfVnlkYWVMWF9RIIEC; bili_jct=a32fd0781da03d9df2a4e7c79b3bc9ad; sid=75goizpx; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MTEwMDYxNjcsImlhdCI6MTcxMDc0NjkwNywicGx0IjotMX0.jOzBgRd1WLdIwFX7y_xFu1h20PjzadWHz5538s0qCOs; bili_ticket_expires=1711006107; home_feed_column=4; browser_resolution=778-730; PVID=2"}
response = requests.get(url=url,headers=headers)
title = re.findall('<h1 title="(.*?)"',response.text)[0]
playinfo = re.findall('<script>window.__playinfo__=(.*?)</script>',response.text)[0]
playinfo = json.loads(playinfo)
audio_url = playinfo['data']['dash']['audio'][0]['baseUrl']
video_url = playinfo['data']['dash']['video'][0]['baseUrl']
audio_data = requests.get(audio_url).content
video_data = requests.get(video_url).content
withopen('audio.mp3','wb')as f:
f.write(audio_data)withopen('video.mp4','wb')as f:
f.write(video_data)
command =f'D:\\PackageDown\\ffmpeg-6.0-full_build\\bin\\ffmpeg.exe -i audio.mp3 -i video.mp4 -acodec copy -vcodec copy "{title}.mp4"'
os.system(command=command)
os.remove('video.mp4')
os.remove('audio.mp3')
4.音乐
import json
import requests
headers ={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"}
url ="https://wwwapi.kugou.com/play/songinfo?srcappid=2919&clientver=20000&clienttime=1710753441574&mid=94dda63306ec019da57becefaf677248&uuid=94dda63306ec019da57becefaf677248&dfid=4FHz9d0RpBdS3oyFkd3iivge&appid=1014&platid=4&encode_album_audio_id=6ts59xd9&token=&userid=0&signature=f9070fa15e1408f6c86a667aecfc7b5a"
response = requests.get(url=url,headers=headers)
data = json.loads(response.text)
names =data['data']['song_name']
paly_url = data['data']['play_url']
muisc = requests.get(paly_url).content
withopen(f"{names}"+".mp3","wb")as f:
f.write(muisc)
最后
- 某讯视频是采用m3u8视频流格式,先找到你所需要爬取电影的m3u8文件的url,然后通过访问这个url得到 .ts文件的地址
- 某音和音乐只需要找到视频的链接就可以直接下载
- 某站的视频有所不同,某站视频的视频和音频是分开的,现需要搜索 .m4s文件,找到分别视频和音频的url。将视频和音频下载下来之后,会是两个文件,一个.mp3文件和一个.mp4文件。需要借助第三方软件ffmpeg将视频和音频合并,这样就得到了完整的视频资源
上面爬取的某讯视频和音乐都是免费的,是会员的就不能爬(如果你充了会员的话那也可以爬),爬虫是可见即可爬,切记爬虫不等于破解!!!
上面代码仅供参考,请勿商用!!!
本文转载自: https://blog.csdn.net/qq_71450730/article/details/140248263
版权归原作者 太阳照着月亮 所有, 如有侵权,请联系我们删除。
版权归原作者 太阳照着月亮 所有, 如有侵权,请联系我们删除。