import scrapy class ForumspidersItem(scrapy.Item): pass class ZhihuItem(scrapy.Item): # (问题) (回答) (评论) item_id = scrapy.Field() # 随机生成ID 随机生成ID 随机生成ID content_type = scrapy.Field() # q a c question_id = scrapy.Field() # 问题ID 关联问题ID 关联问题ID answer_id = scrapy.Field() # 回答ID 关联回答ID comment_id = scrapy.Field() # 评论ID question_title = scrapy.Field() # 问题标题 问题标题 问题标题 question_link = scrapy.Field() # 问题链接 问题链接 问题链接 answer_link = scrapy.Field() # 回答链接 回答链接 content_text = scrapy.Field() # 内容文本 内容文本 内容文本 content_html = scrapy.Field() # 内容HTML 内容HTML 内容HTML image_urls = scrapy.Field() # 图片链接 图片链接 图片链接 author_id = scrapy.Field() # 作者ID 作者ID 作者ID author_name = scrapy.Field() # 作者名 作者名 作者名 author_link = scrapy.Field() # 作者主页 作者主页 作者主页 upvote_num = scrapy.Field() # 好问题数 赞同数 点赞数 comment_num = scrapy.Field() # 评论数 评论数 date_created = scrapy.Field() # 发布时间戳 发布时间戳 发布时间戳 date_modified = scrapy.Field() # 修改时间戳 修改时间戳 修改时间戳 remarks = scrapy.Field() # 备注 备注 备注 meta = scrapy.Field() # 元数据 元数据 元数据 class MdcTwItem(scrapy.Item): item_id = scrapy.Field() author_name = scrapy.Field() author_link = scrapy.Field() author_info = scrapy.Field() post_time = scrapy.Field() post_content = scrapy.Field() content_image_urls = scrapy.Field() post_title = scrapy.Field() post_id = scrapy.Field() class EsItem(scrapy.Item): es_sid = scrapy.Field() # sid es_hkey = scrapy.Field() # 根据发帖内容、时间、作者等生成的 ID es_urlname = scrapy.Field() # 发帖内容的 URL,如果是回帖则再加上 hkey es_sitename = scrapy.Field() # 站点名称 es_srcname = scrapy.Field() # 来源站点 URL es_urltitle = scrapy.Field() # 发帖标题(主贴的标题,跟帖采用主帖标题) es_urltopic = scrapy.Field() # 主贴 ID,这个字段用来关联更贴和主帖 es_isrepost = scrapy.Field() # 0-主贴;1-跟帖 es_lasttime = scrapy.Field() # 更新时间 es_loadtime = scrapy.Field() # 入库时间 es_urltime = scrapy.Field() # 发布时间 es_authors = scrapy.Field() # 作者 es_urlimage = scrapy.Field() # 图片内容 es_doclength = scrapy.Field() # 贴文长度 es_content = scrapy.Field() # 发帖内容 es_urlcontent = scrapy.Field() # 发帖内容(去除 html 标签) es_carriertype = scrapy.Field() # 赋值为 forum es_commentcount = scrapy.Field() # 回帖数,这个字段不采,留给业务系统实时查询并赋值 es_likecount = scrapy.Field() # 点赞或赞同数 es_attachment = scrapy.Field() # 附件内容