# -*- coding: utf-8 -*-
"""
* target : 爬朱雯轩的所有短视频
* @Date : 2020-09-29 13:03
* @Auth : xiaoshuai.zhu
* @File :爬取抖音用户视频.py
* @IDE :PyCharm
* @Version 1.0
"""
import os
import requests
import random
import time
import urllib3
def timestamp():
'''
生成时间戳
:return:
'''
min_t = int(time.time())
max_t = int(time.time() * 1000)
return str(min_t), str(max_t)
def fabu_time(t):
'''
将时间戳转换成时间格式
:param t:
:return:
'''
timeArray = time.localtime(t)
fabu_time = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
return fabu_time
def save(dict):
'''
传入字典保存数据
:param dict:
:return:
'''
# 有些短视频中没有标题导致我们再保存数据时会覆盖,所以我们对没有标题的短视频进行命名
num = str(random.randint(0, 100))
if dict['desc'] == '':
name = dict['user_name'] + '的作品{}'.format(num) + '.mp4'
else:
name = dict['desc'] + '.mp4'
video_url = dict['video_url']
res = requests.get(video_url)
file_name = 'D:/Ponfey/Object/Study/抖音短视频/抖音短视频_' + dict['user_name']
if os.path.exists(file_name):
with open(file_name + '/' + name, 'wb') as f:
f.write(res.content)
print('{}下载完成!'.format(name))
else:
os.mkdir(file_name)
with open(file_name + '/' + name, 'wb') as f:
f.write(res.content)
print('{}下载完成!'.format(name))
def get_data(url):
'''
根据请求url获取数据
:param url:
:return:
data:list数据
max_cursor:下次请求需要携带的参数
'''
while True:
min_t, max_t = timestamp()
headers = {'Connection': 'keep-alive',
'Cookie': 'd_ticket=4c5b44a063bf078fae71bffec25ddad8ca4ea; odin_tt=6450ec41def6afd0731d426731b508fcf43650505f50460244a368ac5847f0d1cbf6747a7ad4e89fa3b0f0e15754002e; sid_guard=a6001040b2e52133062ca1e743097c06%7C1588771917%7C5183999%7CSun%2C+05-Jul-2020+13%3A31%3A56+GMT; uid_tt=de424823d2132aab28fa581760578e06; uid_tt_ss=de424823d2132aab28fa581760578e06; sid_tt=a6001040b2e52133062ca1e743097c06; sessionid=a6001040b2e52133062ca1e743097c06; sessionid_ss=a6001040b2e52133062ca1e743097c06; install_id=4019449375779358; ttreq=1$4e594dc75197827452871cc4798ece9879de8c47',
'X-SS-REQ-TICKET': '1588773397905',
'X-Tt-Token': '00a6001040b2e52133062ca1e743097c06cdb862c09a8bd8d850534d8a1ddf1bcda1297dfe483093add3539ae384dd657413',
'sdk-version': '1',
'X-SS-DP': '1128',
'x-tt-trace-id': '00-ea46271a0d9c7aafc2c2647f8e990468-ea46271a0d9c7aaf-01',
'User-Agent': 'com.ss.android.ugc.aweme/100901 (Linux; U; Android 5.1.1; zh_CN; SM-N960F; Build/JLS36C; Cronet/TTNetVersion:8109b77c 2020-04-15 QuicVersion:0144d358 2020-03-24)',
'Accept-Encoding': 'gzip',
'X-Gorgon': '0404b8014001a0ff70a608a72b1c1754b85bab86f700fceac8f4',
'X-Khronos': min_t,
'x-common-params-v2': 'os_api=22&device_platform=android&device_type=SM-N960F&iid=4019449375779358&version_code=100900&app_name=aweme&openudid=9a7fc881896f46bf&device_id=2752811979515463&os_version=5.1.1&aid=1128&channel=tengxun_new&ssmix=a&manifest_version_code=100901&dpi=240&cdid=1421f378-62fa-44ad-af4f-49d33aa7a58a&version_name=10.9.0&resolution=720*1280&language=zh&device_brand=samsung&app_type=normal&ac=wifi&update_version_code=10909900&uuid=355757648741243'}
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
response = requests.get(url, headers=headers, verify=False).json()
try:
max_cursor = response['max_cursor']
if not response['aweme_list']:
pass
else:
data = response['aweme_list']
return data, max_cursor
except:
data = None
max_cursor = None
return data, max_cursor
def data_parse(data):
'''
用户短视频数据解析
:param data:
:return:
'''
production_list = []
for i in data:
production_dict = {
'user_name': i['author']['nickname'],
'desc': i['desc'],
'time': fabu_time(i['create_time']),
'music_author': i['music']['author'],
'music_name': i['music']['title'],
'video_url': i['video']['play_addr_lowbr']['url_list'][0]
}
production_list.append(production_dict)
return production_list
# headers数据参数不正确请求不到数据
def attention_me_uid(url):
'''
获取我关注的抖音用户的uid与下一页请求锁需要的参数
:return:
'''
while True:
min_t, max_t = timestamp()
headers = {'Connection': "keep-alive",
'Cookie': 'd_ticket=4c5b44a063bf078fae71bffec25ddad8ca4ea; odin_tt=6450ec41def6afd0731d426731b508fcf43650505f50460244a368ac5847f0d1cbf6747a7ad4e89fa3b0f0e15754002e; sid_guard=a6001040b2e52133062ca1e743097c06%7C1588771917%7C5183999%7CSun%2C+05-Jul-2020+13%3A31%3A56+GMT; uid_tt=de424823d2132aab28fa581760578e06; uid_tt_ss=de424823d2132aab28fa581760578e06; sid_tt=a6001040b2e52133062ca1e743097c06; sessionid=a6001040b2e52133062ca1e743097c06; sessionid_ss=a6001040b2e52133062ca1e743097c06; install_id=4019449375779358; ttreq=1$4e594dc75197827452871cc4798ece9879de8c47',
'X-SS-REQ-TICKET': '1588777054732',
'X-Tt-Token': '00a6001040b2e52133062ca1e743097c06cdb862c09a8bd8d850534d8a1ddf1bcda1297dfe483093add3539ae384dd657413',
'sdk-version': '1',
'X-SS-DP': '1128',
'x-tt-trace-id': '00-ea7df3860d9c7aafc2c2647233a00468-ea7df3860d9c7aaf-01',
'User-Agent': 'com.ss.android.ugc.aweme/100901 (Linux; U; Android 5.1.1; zh_CN; SM-N960F; Build/JLS36C; Cronet/TTNetVersion:8109b77c 2020-04-15 QuicVersion:0144d358 2020-03-24)',
'Accept-Encoding': 'gzip',
'X-Gorgon': '0404b8014001fc330e5d08a72b1c1754b85bab86f700fca29ee3',
'X-Khronos': min_t,
'x-common-params-v2': 'os_api=22&device_platform=android&device_type=SM-N960F&iid=4019449375779358&version_code=100900&app_name=aweme&openudid=9a7fc881896f46bf&device_id=2752811979515463&os_version=5.1.1&aid=1128&channel=tengxun_new&ssmix=a&manifest_version_code=100901&dpi=240&cdid=1421f378-62fa-44ad-af4f-49d33aa7a58a&version_name=10.9.0&resolution=720*1280&language=zh&device_brand=samsung&app_type=normal&ac=wifi&update_version_code=10909900&uuid=355757648741243'}
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
response = requests.get(url, headers=headers, verify=False).json()
print(response)
try:
max_time = response['max_time']
if not response['followings']:
pass
else:
data = response['followings']
return data, max_time
except:
data = None
max_cursor = None
return data, max_cursor
# 将获取到的关注的抖音用户数据进行解析
def uid_parse(data):
'''
传入的是列表
:param data:
:return:
'''
user_list = []
for i in data:
user_dict = {
'sec_uid': i['sec_uid']
}
user_list.append(user_dict)
return user_list
# 第一次请求时携带的max_time,将max_time 传入后获取关注抖音用户列表
def first_user():
min_t, max_t = timestamp()
url = 'https://api3-normal-c-lq.amemv.com/aweme/v1/user/following/list/?user_id=86304636253&sec_user_id=MS4wLjABAAAAliUfImgLRYe1ih0ZL0_GQ3dzUAOGZ1JEInos9icA04w&max_time=0&count=20&offset=0&source_type=2&address_book_access=2&gps_access=1&vcd_count=0&vcd_auth_first_time=0&ts={}&host_abi=armeabi-v7a&_rticket={}&mcc_mnc=46007&'.format(
min_t, max_t)
headers = {'Connection': 'keep-alive',
'Cookie': 'd_ticket=4c5b44a063bf078fae71bffec25ddad8ca4ea; odin_tt=6450ec41def6afd0731d426731b508fcf43650505f50460244a368ac5847f0d1cbf6747a7ad4e89fa3b0f0e15754002e; sid_guard=a6001040b2e52133062ca1e743097c06%7C1588771917%7C5183999%7CSun%2C+05-Jul-2020+13%3A31%3A56+GMT; uid_tt=de424823d2132aab28fa581760578e06; uid_tt_ss=de424823d2132aab28fa581760578e06; sid_tt=a6001040b2e52133062ca1e743097c06; sessionid=a6001040b2e52133062ca1e743097c06; sessionid_ss=a6001040b2e52133062ca1e743097c06; install_id=4019449375779358; ttreq=1$4e594dc75197827452871cc4798ece9879de8c47',
'X-SS-REQ-TICKET': '1588780059093',
'X-Tt-Token': '00a6001040b2e52133062ca1e743097c06cdb862c09a8bd8d850534d8a1ddf1bcda1297dfe483093add3539ae384dd657413',
'sdk-version': '1', 'X-SS-DP': '1128',
'x-tt-trace-id': '00-eaabcb4c0d9c7aafc2c26472519d0468-eaabcb4c0d9c7aaf-01',
'User-Agent': 'com.ss.android.ugc.aweme/100901 (Linux; U; Android 5.1.1; zh_CN; SM-N960F; Build/JLS36C; Cronet/TTNetVersion:8109b77c 2020-04-15 QuicVersion:0144d358 2020-03-24)',
'Accept-Encoding': 'gzip', 'X-Gorgon': '0404b8014001be0483eb08a72b1c1754b85bab86f700fc923f8b',
'X-Khronos': '1588780059',
'x-common-params-v2': 'os_api=22&device_platform=android&device_type=SM-N960F&iid=4019449375779358&version_code=100900&app_name=aweme&openudid=9a7fc881896f46bf&device_id=2752811979515463&os_version=5.1.1&aid=1128&channel=tengxun_new&ssmix=a&manifest_version_code=100901&dpi=240&cdid=1421f378-62fa-44ad-af4f-49d33aa7a58a&version_name=10.9.0&resolution=720*1280&language=zh&device_brand=samsung&app_type=normal&ac=wifi&update_version_code=10909900&uuid=355757648741243'}
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
response = requests.get(url, headers=headers, verify=False).json()
print(response)
# 一边循环获取关注的抖音用户的sec_uid一边对单个用户进行数据的爬取
def run():
max_time = 1588778511
while True:
min_t, max_t = timestamp()
url = 'https://api3-normal-c-lq.amemv.com/aweme/v1/user/following/list/?user_id=86304636253&sec_user_id=MS4wLjABAAAAliUfImgLRYe1ih0ZL0_GQ3dzUAOGZ1JEInos9icA04w&max_time={}&count=20&offset=0&source_type=1&address_book_access=2&gps_access=1&vcd_count=0&vcd_auth_first_time=0&ts={}4&host_abi=armeabi-v7a&_rticket={}&mcc_mnc=46007&'.format(
max_time, min_t, max_t)
print(url)
data, max_time = attention_me_uid(url)
print(data)
if data is not None and max_time is not None:
user_list = uid_parse(data)
for user in user_list:
sec_uid = user['sec_uid']
user_get(sec_uid)
else:
print('您关注的抖音用户已爬完!')
break
def user_get(sec_uid):
'''
传入目标抖音用户的sec_uid对单个抖音用户循环爬取所有短视频
:param sec_uid:
:return:
'''
max_cursor = 0
while True:
min_t, max_t = timestamp()
url = 'https://api3-normal-c-lq.amemv.com/aweme/v1/aweme/post/?source=0&publish_video_strategy_type=0&max_cursor={}&sec_user_id={}&count=20&ts={}&host_abi=armeabi-v7a&_rticket={}&mcc_mnc=46007&'.format(
max_cursor, sec_uid, min_t, max_t)
data, max_cursor = get_data(url)
if data is not None and max_cursor is not None:
production_list = data_parse(data)
for production in production_list:
save(production)
else:
print('您喜欢的抖音用户已爬完!')
break
if __name__ == '__main__':
sec_uid = 'MS4wLjABAAAAqszdrdTPFmmpQXXvMMxrx2Dmj-4sdMQYOIcduwNAfjM' # 朱雯轩的个人主页
user_get(sec_uid)
