You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
46 lines
1.0 KiB
46 lines
1.0 KiB
import json |
|
import time |
|
|
|
import requests as req |
|
from bs4 import BeautifulSoup |
|
import lxml |
|
import copy |
|
|
|
base_url = 'http://kaijiang.zhcw.com/zhcw/html/ssq/list' |
|
|
|
data = {'date': '', 'id': '', 'red': [], 'blue': ''} |
|
|
|
data_list = [] |
|
|
|
for n in range(0, 148): |
|
time.sleep(0.5) |
|
if n == 0: |
|
u = base_url + '.html' |
|
else: |
|
u = base_url + '_' + str(n + 1) + '.html' |
|
print(u) |
|
response = req.get(u) |
|
# print(response.text) |
|
soup = BeautifulSoup(response.text, 'lxml') |
|
table = soup.find('table') |
|
item = table.findAll('tr') |
|
|
|
for i in item: |
|
# print(i) |
|
tds = i.findAll('td') |
|
# print(tds) |
|
ems = i.findAll('em') |
|
# print(ems) |
|
if len(tds) and len(ems): |
|
d = copy.deepcopy(data) |
|
d['date'] = tds[0].text |
|
d['id'] = tds[1].text |
|
d['red'] = [e.text for e in ems[0:6]] |
|
d['blue'] = ems[6].text |
|
data_list.append(json.dumps(d)+'\n') |
|
|
|
data_list.reverse() |
|
|
|
with open('num_new.txt', 'a') as f: |
|
f.writelines(data_list) |
|
f.close()
|
|
|