osc/research/nuofang-db/nfm/mperson/convert_to_db.py

73 lines
2.3 KiB
Python
Raw Normal View History

2025-05-28 19:16:17 +08:00
import json
import os
import pymysql
base_props = {
'name': "姓名",
'gender': "性别",
'birthday': "出生日期",
'region': "国家/地区",
'type_or_dept': "军种/部门",
'category': "人员类别",
'military_rank': "军衔",
'occupation': "单位职务",
'specialty': "专长领域",
'contact': "联系方式",
'social_media': "社交媒体账号",
'update_time': "数据日期",
'introduction': "个人简介",
'honors': "所获荣誉",
'education': "教育背景",
'source': "信息来源",
'career': "从业背景",
'keywords': "关键词",
'website': "个人主页",
'achievements': "主要成就",
'rank_up': "军衔晋升",
'party': "政党",
'remarks': "备注",
'foreigners_info': "外裔情况",
'religion': "宗教信仰",
'other_contacts': "其他联系方式",
'address': "个人住址"
}
result_file_path = r"E:\yuxin\nuofang-data\person\result0515"
if __name__ == '__main__':
db = pymysql.connect(host='39.101.194.63', port=23306,
user='root', passwd='passok123A', db='nfm', charset='utf8mb4')
cursor = db.cursor()
l_item = {}
count = 0
for single_json_file in os.listdir(result_file_path):
count += 1
result_file = open(result_file_path + "\\" + single_json_file, "r", encoding='utf-8')
result_json = json.loads(result_file.read())
result_file.close()
t_item_content = {}
url_info = result_json['url_info']
person_id = url_info[37:45]
t_item_content['id'] = person_id
base_info = result_json['base_info']
website_info = result_json['website_info']
for _ in base_props:
val = base_props[_]
if val in base_info:
t_item_content[_] = base_info[val]
elif val in website_info:
t_item_content[_] = website_info[val]
col_list = [_ for _ in t_item_content]
val_list = ["'" + t_item_content[_].replace("'", "") + "'" for _ in t_item_content]
cols = ", ".join(col_list)
vals = ", ".join(val_list)
sql_insert = 'INSERT INTO m_person_page (%s) VALUES (%s)' % (cols, vals)
print("[No. {}] {}".format(count, sql_insert))
cursor.execute(sql_insert)
db.commit()
db.close()