补天SRC公益爬虫

闲来无事在补天商城溜达时,深感币子之稀少,就打算对公益SRC下手,爬一下资产

注:2023.4.13可用 自测

运行方式python3 butian.pyimg

img

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import requests
from bs4 import BeautifulSoup
import pandas as pd

cookie = '自己bp抓包填一下啦'
headers = {
'Host': 'www.butian.net',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest',
'Cookie':cookie,
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
'Origin': 'https://www.butian.net',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9'
}

butian=requests.post('https://www.butian.net/Reward/pub', headers=headers) #post方式访问返回josn
pages=int(butian.json()['data']['count']) #提取josn中count参数 为补天公益页数
print('获取公益SRC厂商列表页数:' ,pages ,'页')

p=0
company_id = []
company_name = []
company_url = []
while p < pages :
p += 1
data = {'s': 1, 'p': p, 'token': ''}
butian = requests.post('https://www.butian.net/Reward/pub', headers=headers,data=data)
list = butian.json()['data']['list']
for item in list:
company_id.append(item['company_id']) #item遍历company_id 写入company_id列表
company_name.append(item['company_name'])
print('第' ,p,'页厂商名字与ID获取成功')
print('共获取到', len(company_id),'个公益厂商')

for i in range(len(company_id)):
params = {'cid': company_id[i]}
url = requests.get('https://www.butian.net/Loo/submit',headers=headers,params=params)
html=BeautifulSoup(url.text, 'lxml')
url = html.find(name='input', attrs={'name': 'host'}).attrs['value']
company_url.append(url)
print('正在获取厂商ID', company_id[i], '的URL,还剩',len(company_id)-(i+1),'个厂商未获取')
#print(company_id)
#print(company_name)
#print(company_url)

#这里为列 选用pandas
butian= pd.DataFrame({'ID':company_id,'NAME':company_name,'URL':company_url})
butian.to_csv("butian.csv",index=False)