|
|
|
import pymysql
|
|
|
|
from pymysql.cursors import DictCursor
|
|
|
|
import datetime
|
|
|
|
|
|
|
|
db_config = {
|
|
|
|
'host': '192.168.66.101',
|
|
|
|
'user': 'root',
|
|
|
|
'password': 'Sxzgx1209',
|
|
|
|
'database': 'scrapy'
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
class DbAction:
|
|
|
|
def __init__(self, end_date='today'):
|
|
|
|
self.conn = pymysql.Connection(**db_config)
|
|
|
|
self.cursor = self.conn.cursor(cursor=DictCursor)
|
|
|
|
self.date_list = self.get_date_list(end_date)
|
|
|
|
|
|
|
|
def get_db_data(self):
|
|
|
|
query_sql = f"SELECT h.id, h.cate, h.date, h.`name`, h.save_link, h.`code`, h.unzip_pwd FROM scrapyh h WHERE h.date IN ({self.date_list})"
|
|
|
|
ext_filter = " AND save_link != '' AND code != '' AND file_name IS NULL "
|
|
|
|
query_sql = query_sql + ext_filter
|
|
|
|
print(query_sql)
|
|
|
|
# 错误调试
|
|
|
|
# query_sql = query_sql + " AND id IN ('35334', '35335', '35336', '35337', '35338', '35339')"
|
|
|
|
self.cursor.execute(query_sql)
|
|
|
|
return self.cursor.fetchall()
|
|
|
|
|
|
|
|
def update_file_name(self, data_id, file_name):
|
|
|
|
query_sql = f"UPDATE scrapyh SET file_name = %s WHERE id = %s"
|
|
|
|
self.cursor.execute(query_sql, (file_name, data_id))
|
|
|
|
self.conn.commit()
|
|
|
|
print(f'{data_id}文件名为{file_name}')
|
|
|
|
|
|
|
|
def mark_failed(self, data_id):
|
|
|
|
query_sql = f"UPDATE scrapyh SET file_name = '链接失效' WHERE id = %s"
|
|
|
|
self.cursor.execute(query_sql, (data_id,))
|
|
|
|
self.conn.commit()
|
|
|
|
|
|
|
|
def disconnect_db(self):
|
|
|
|
self.cursor.close()
|
|
|
|
self.conn.close()
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_date_list(_end_date):
|
|
|
|
date_list = []
|
|
|
|
if _end_date == 'today':
|
|
|
|
date_list.append(f"'{datetime.date.strftime(datetime.date.today(), '%Y-%m-%d')}'")
|
|
|
|
else:
|
|
|
|
start_date = datetime.datetime.today()
|
|
|
|
end_date = datetime.datetime.strptime(_end_date, '%Y-%m-%d')
|
|
|
|
delta = datetime.timedelta(days=1)
|
|
|
|
while start_date > end_date:
|
|
|
|
date_list.append(f"'{datetime.datetime.strftime(start_date, '%Y-%m-%d')}'")
|
|
|
|
start_date = start_date - delta
|
|
|
|
print(date_list)
|
|
|
|
return ','.join(date_list)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
db_action = DbAction('2023-07-28')
|
|
|
|
result = db_action.get_db_data()
|
|
|
|
print(result)
|