import os import shutil import pymysql from lxml import etree from nfm.mstruct.webpage_parser import parse_base_info target_file_path = r"E:\yuxin\nuofang-data\base\webpage" logo_path = r"E:\yuxin\nuofang-data\base\logo" icon_path = r"E:\yuxin\nuofang-data\base\icon" if __name__ == '__main__': count = 0 db = pymysql.connect(host='39.101.194.63', port=23306, user='root', passwd='passok123A', db='nfm', charset='utf8mb4') cursor = db.cursor() for _path in os.listdir(target_file_path): if _path.endswith('.html'): # if _path.endswith('海军舰队指挥部_组织结构_全球军事态势情报数据库.html'): count += 1 target_file = open(target_file_path + '\\' + _path, mode='r', encoding='utf-8') html_content = target_file.read().replace('\n', '') target_file.close() selector = etree.HTML(html_content) base_info = parse_base_info(selector) internal_id = base_info['内部编号'] icon_uris = selector.xpath("//div[@class='top cls']/span/a/img[@class='img-svg']/@src") logo_uris = selector.xpath( "//div[@class='good-info-box']/div[@class='tab ']//div[@class='cItem active']//img/@src") icon_file_name = '' logo_file_name = '' if len(icon_uris) > 0: if icon_uris[0].startswith("./"): icon_source_path = target_file_path + icon_uris[0][1:] icon_file_name = icon_source_path.split("/")[-1] shutil.copy(icon_source_path, icon_path + "/" + icon_file_name) if len(logo_uris) > 0: if logo_uris[0].startswith("./"): logo_source_path = target_file_path + logo_uris[0][1:] logo_file_name = logo_source_path.split("/")[-1] shutil.copy(logo_source_path, logo_path + "/" + logo_file_name) print("[No. {}] {}: logo - {} , icon - {}".format(count, internal_id, logo_file_name, icon_file_name)) sql_update = "UPDATE nfm.m_base_page SET logo = '{}', icon = '{}' WHERE internal_id = '{}'".format( logo_file_name, icon_file_name, internal_id) cursor.execute(sql_update) db.commit() db.close()