用于处理彩票的大数据算法
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

46 lines
1.0 KiB

import json
import time
import requests as req
from bs4 import BeautifulSoup
import lxml
import copy
base_url = 'http://kaijiang.zhcw.com/zhcw/html/ssq/list'
data = {'date': '', 'id': '', 'red': [], 'blue': ''}
data_list = []
for n in range(0, 148):
time.sleep(0.5)
if n == 0:
u = base_url + '.html'
else:
u = base_url + '_' + str(n + 1) + '.html'
print(u)
response = req.get(u)
# print(response.text)
soup = BeautifulSoup(response.text, 'lxml')
table = soup.find('table')
item = table.findAll('tr')
for i in item:
# print(i)
tds = i.findAll('td')
# print(tds)
ems = i.findAll('em')
# print(ems)
if len(tds) and len(ems):
d = copy.deepcopy(data)
d['date'] = tds[0].text
d['id'] = tds[1].text
d['red'] = [e.text for e in ems[0:6]]
d['blue'] = ems[6].text
data_list.append(json.dumps(d)+'\n')
data_list.reverse()
with open('num_new.txt', 'a') as f:
f.writelines(data_list)
f.close()