change:org_id和org_name修改为list类型

main
zhaoxiangpeng 2 months ago
parent 7cda5cc406
commit 576260f52d

@ -35,9 +35,9 @@ class WosLatestIncrementSpider(scrapy.Spider):
def __init__(self, task_obj):
scrapy.Spider.__init__(self)
self.task_obj = task_obj
self.record_id = task_obj['id']
self.org_id = task_obj['org_id']
self.org_name = task_obj['org_name']
self.record_id = task_obj['task_id']
self.org_id = self.tolist(task_obj['org_id'])
self.org_name = self.tolist(task_obj['org_name'])
self.query_id = task_obj['query_id']
self.query_content = task_obj['content']
self.query_condition = task_obj['task_condition']
@ -45,6 +45,13 @@ class WosLatestIncrementSpider(scrapy.Spider):
self.first_page = task_obj.get('first_page', 1)
self._records_found = 0
@staticmethod
def tolist(datas) -> list:
if isinstance(datas, (list, tuple, set)):
return list(set(datas))
else:
raise TypeError("不支持的类型:%s" % (type(datas)))
async def start(self):
full_query = self.query_content
if self.query_condition is not None:
@ -62,9 +69,9 @@ class WosLatestIncrementSpider(scrapy.Spider):
async def parse(self, response: JsonResponse, **kwargs):
meta = response.meta
request: scrapy.Request = response.request
task_query_id = self.query_id
task_org_id = self.org_id
task_record_id = self.record_id
task_query_id: int = self.query_id
task_org_id: list = self.org_id
task_record_id: int = self.record_id
if response.status != 200:
self.logger.warning("""
@ -97,7 +104,7 @@ class WosLatestIncrementSpider(scrapy.Spider):
relation_item = WosIdRelationItem()
relation_item['third_id'] = record.get("uid")
relation_item['query_ids'] = [task_query_id]
relation_item['school_ids'] = [task_org_id]
relation_item['school_ids'] = task_org_id
relation_item['task_ids'] = [task_record_id]
relation_item['updated_at'] = batch_time
yield relation_item

Loading…
Cancel
Save