from requests import Session # Session保持连接,加快下载速度
from re import search # 导入正则匹配函数
items = [ # 丢进来的数据,已格式化,为了控制篇幅仅列举一项
{
"bookId": 124114,
"bookName": "(21下)大学英语四级考试超详解真题+模拟",
"canDown": True,
"canShare": True,
"content": "/resourceservice/mediaplay.do?resId=29822410&resIdSign=82e981&mediaType=3",
"createUser": 7345927,
"dirId": 29822409,
"downUrl": "/resourceservice/mediaplay.do?resId=29822410&resIdSign=82e981&mediaType=3", # resId: rid, resIdSign: sign
"fkId": 3002989,
"gmtCreate": 1625205456000,
"gmtModified": 1625205542000,
"id": 29822410,
"idSign": "82e981",
"isDelete": 0,
"length": 18219091,
"mediaType": 3,
"pcrId": 22859080,
"pcrName": "真题听力音频",
"pv": 323,
"status": 1,
"thumbnails": "https://cdnqn-user.xdfsjj.com/7345927_352468E47886D235F25FD74A22E5B3FC.png?fix=no&imageView2/0/w/312/h/462",
"times": 1518,
"title": "2019年12月四级真题(第一套)",
"type": 2,
"viewCount": 323
}
]
x = Session() # 实例化Session
for item in items: # 遍历items里的数据
name = item['title'] # 标题
match = search(r'resId=(\d+)&resIdSign=(.*)&mediaType=3', item['content']) # 正则匹配rid与sign
rid, sign = match.groups() # 赋值rid与sign
html = x.get(f'https://mp.xdfsjj.com/share/audio.htm?bid=124114&cid=22859080&rid={rid}&sign={sign}').text # 拼接并访问音频播放页网址
src = search(r'<audio id="audio_media" height="0" width="0" preload="auto" src="(https://ali-media\.xdfsjj\.com/.*\.mp3)"></audio>', html).groups()[0] # 正则匹配mp3直链
print(name, src) # 输出相关信息,提示下载进度
with open(f'mp3/{name}.mp3', 'wb') as f: # 在`/mp3`目录下写入音频
f.write(x.get(src).content)