You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
47 lines
1.0 KiB
47 lines
1.0 KiB
2 years ago
|
import json
|
||
|
import time
|
||
|
|
||
|
import requests as req
|
||
|
from bs4 import BeautifulSoup
|
||
|
import lxml
|
||
|
import copy
|
||
|
|
||
|
base_url = 'http://kaijiang.zhcw.com/zhcw/html/ssq/list'
|
||
|
|
||
|
data = {'date': '', 'id': '', 'red': [], 'blue': ''}
|
||
|
|
||
|
data_list = []
|
||
|
|
||
|
for n in range(0, 148):
|
||
|
time.sleep(0.5)
|
||
|
if n == 0:
|
||
|
u = base_url + '.html'
|
||
|
else:
|
||
|
u = base_url + '_' + str(n + 1) + '.html'
|
||
|
print(u)
|
||
|
response = req.get(u)
|
||
|
# print(response.text)
|
||
|
soup = BeautifulSoup(response.text, 'lxml')
|
||
|
table = soup.find('table')
|
||
|
item = table.findAll('tr')
|
||
|
|
||
|
for i in item:
|
||
|
# print(i)
|
||
|
tds = i.findAll('td')
|
||
|
# print(tds)
|
||
|
ems = i.findAll('em')
|
||
|
# print(ems)
|
||
|
if len(tds) and len(ems):
|
||
|
d = copy.deepcopy(data)
|
||
|
d['date'] = tds[0].text
|
||
|
d['id'] = tds[1].text
|
||
|
d['red'] = [e.text for e in ems[0:6]]
|
||
|
d['blue'] = ems[6].text
|
||
|
data_list.append(json.dumps(d)+'\n')
|
||
|
|
||
|
data_list.reverse()
|
||
|
|
||
|
with open('num_new.txt', 'a') as f:
|
||
|
f.writelines(data_list)
|
||
|
f.close()
|