import json import time import requests as req from bs4 import BeautifulSoup import lxml import copy base_url = 'http://kaijiang.zhcw.com/zhcw/html/ssq/list' data = {'date': '', 'id': '', 'red': [], 'blue': ''} data_list = [] for n in range(0, 148): time.sleep(0.5) if n == 0: u = base_url + '.html' else: u = base_url + '_' + str(n + 1) + '.html' print(u) response = req.get(u) # print(response.text) soup = BeautifulSoup(response.text, 'lxml') table = soup.find('table') item = table.findAll('tr') for i in item: # print(i) tds = i.findAll('td') # print(tds) ems = i.findAll('em') # print(ems) if len(tds) and len(ems): d = copy.deepcopy(data) d['date'] = tds[0].text d['id'] = tds[1].text d['red'] = [e.text for e in ems[0:6]] d['blue'] = ems[6].text data_list.append(json.dumps(d)+'\n') data_list.reverse() with open('num_new.txt', 'a') as f: f.writelines(data_list) f.close()