49 lines
2.3 KiB
Python
49 lines
2.3 KiB
Python
import os
|
|
import shutil
|
|
|
|
import pymysql
|
|
from lxml import etree
|
|
|
|
from nfm.mstruct.webpage_parser import parse_base_info
|
|
|
|
target_file_path = r"E:/yuxin/nuofang-data/structure/0420/webpage"
|
|
logo_path = r"E:/yuxin/nuofang-data/structure/0420/logo"
|
|
icon_path = r"E:/yuxin/nuofang-data/structure/0420/icon"
|
|
|
|
if __name__ == '__main__':
|
|
count = 0
|
|
db = pymysql.connect(host='39.101.194.63', port=23306,
|
|
user='root', passwd='passok123A', db='nfm', charset='utf8mb4')
|
|
cursor = db.cursor()
|
|
for _path in os.listdir(target_file_path):
|
|
if _path.endswith('.html'):
|
|
# if _path.endswith('海军舰队指挥部_组织结构_全球军事态势情报数据库.html'):
|
|
count += 1
|
|
target_file = open(target_file_path + '\\' + _path, mode='r', encoding='utf-8')
|
|
html_content = target_file.read().replace('\n', '')
|
|
target_file.close()
|
|
selector = etree.HTML(html_content)
|
|
base_info = parse_base_info(selector)
|
|
internal_id = base_info['内部编号']
|
|
icon_uris = selector.xpath("//div[@class='top cls']/span/a/img[@class='img-svg']/@src")
|
|
logo_uris = selector.xpath(
|
|
"//div[@class='good-info-box']/div[@class='tab ']//div[@class='cItem active']//img/@src")
|
|
icon_file_name = ''
|
|
logo_file_name = ''
|
|
if len(icon_uris) > 0:
|
|
if icon_uris[0].startswith("./"):
|
|
icon_source_path = target_file_path + icon_uris[0][1:]
|
|
icon_file_name = icon_source_path.split("/")[-1]
|
|
shutil.copy(icon_source_path, icon_path + "/" + icon_file_name)
|
|
if len(logo_uris) > 0:
|
|
if logo_uris[0].startswith("./"):
|
|
logo_source_path = target_file_path + logo_uris[0][1:]
|
|
logo_file_name = logo_source_path.split("/")[-1]
|
|
shutil.copy(logo_source_path, logo_path + "/" + logo_file_name)
|
|
print("[No. {}] {}: logo - {} , icon - {}".format(count, internal_id, logo_file_name, icon_file_name))
|
|
sql_update = "UPDATE nfm.m_struct_page SET logo = '{}', icon = '{}' WHERE internal_code = '{}'".format(
|
|
logo_file_name, icon_file_name, internal_id)
|
|
cursor.execute(sql_update)
|
|
db.commit()
|
|
db.close()
|