osc/research/jd_data/api_request/file_downloader.py

104 lines
4.0 KiB
Python
Raw Normal View History

2025-05-28 19:16:17 +08:00
import json
import time
import pymysql
import os
import wget
import random
from api_request import ApiRequest
def create_path(target_dir):
if not os.path.exists(target_dir):
os.mkdir(target_dir)
def custom_download(url, path):
try:
wget.download(url, path)
print('下载文件“{}”成功'.format(path.split("/")[-1]))
return True
except:
print('!!!下载文件“{}”失败'.format(path.split("/")[-1]))
return False
def exec_sql(local_db, local_cursor, sql):
try:
local_cursor.execute(sql)
local_db.commit()
except:
local_db = pymysql.connect(host='39.101.194.63', port=23306,
user='root', passwd='passok123A', db='jd_data', charset='utf8mb4')
local_cursor = local_db.cursor()
local_cursor.execute(sql)
local_db.commit()
if __name__ == '__main__':
target_root_path = "F:/jd_data"
create_path(target_root_path)
db = pymysql.connect(host='39.101.194.63', port=23306,
user='root', passwd='passok123A', db='jd_data', charset='utf8mb4')
cursor = db.cursor()
ar = ApiRequest()
count = 0
while True:
try:
sql_select = "SELECT id, experiment, variable, model, level, data_size FROM jd_data.data_detail " \
"WHERE available = 1 AND downloaded = 0 AND model != 'miroc_es2h' AND data_size < 20000000;"
cursor.execute(sql_select)
result = cursor.fetchall()
if len(result) == 0:
break
else:
print("剩余{}个小文件需要下载".format(len(result)))
row = result[random.choice(range(len(result)))]
row_id = row[0]
experiment = row[1]
variable = row[2]
model = row[3]
level = row[4]
data_size = row[5]
# experiment_path = target_root_path + '/' + experiment
# variable_path = experiment_path + '/' + variable
# create_path(experiment_path)
# create_path(variable_path)
target_file_name = " + ".join([experiment, variable, model, level]) + ".zip"
target_file_full_path = target_root_path + '/' + target_file_name
print('%s, %s, %s, %s' % (row_id, experiment, variable, model))
param = {
'experiment': experiment,
'variable': variable,
'model': model,
'level': level
}
reply = ar.get_reply(param)
if reply:
remarks = json.dumps(reply)
location = ''
content_length = 0
if reply['state'] == 'completed':
location = reply['location']
content_length = reply['content_length']
sent_to_rmq_at = 'null'
if 'sent_to_rmq_at' in reply:
sent_to_rmq_at = "'" + reply['sent_to_rmq_at'].replace("T", " ").replace("Z", "") + "'"
print(reply)
sql_update_status = "UPDATE data_detail " \
"SET available = 1, url = '%s', data_size = %d, update_time = %s, remarks = '%s' " \
"WHERE id = %d" \
% (location, content_length, sent_to_rmq_at, remarks.replace("'", "\\'"),
row_id)
exec_sql(db, cursor, sql_update_status)
if custom_download(location, target_file_full_path):
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
sql_update_download = "UPDATE jd_data.data_detail SET downloaded = 1, downloaded_time = '%s' " \
"WHERE id = %s" % (current_time, row_id)
exec_sql(db, cursor, sql_update_download)
except:
pass
db.close()