From 3b3a647fc749bad73b1e8289da5e2221e76a264a Mon Sep 17 00:00:00 2001 From: zhaoxiangpeng <1943364377@qq.com> Date: Thu, 12 Mar 2026 14:08:02 +0800 Subject: [PATCH] =?UTF-8?q?wos:=E4=B8=80=E9=94=AE=E8=BF=90=E8=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../starter/crawl_article_latest.py | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/science_article_wos/starter/crawl_article_latest.py b/science_article_wos/starter/crawl_article_latest.py index 960d91d..1d40914 100644 --- a/science_article_wos/starter/crawl_article_latest.py +++ b/science_article_wos/starter/crawl_article_latest.py @@ -112,7 +112,31 @@ def starter_latest_all(): client.close() process = CrawlerProcess(get_project_settings()) - f(True) + f(running=True) + process.start() + process.stop() + + +def starter_latest_by_record(record_id: int): + @defer.inlineCallbacks + def f(): + client: pymysql.Connection = get_connect() + cursor = client.cursor(cursors.DictCursor) + cursor.execute( + 'select b.id as task_id, q.id as query_id, q.content as content, b.task_condition as task_condition, q.source_type as source_type, b.is_done as is_done from task_batch_record as b join task_search_strategy as q on b.query_id=q.id where b.id=%s and q.source_type=1 limit 1', + (record_id,)) + result = cursor.fetchone() + query_id = result['query_id'] + cursor.execute('select org_id, org_name from relation_org_query where query_id=%s', (query_id,)) + org_results: List[dict] = cursor.fetchall() + result['org_id'] = [org_result['org_id'] for org_result in org_results] + result['org_name'] = [org_result['org_name'] for org_result in org_results] + + init_params = result + yield process.crawl(WosLatestIncrementSpider, task_obj=init_params) + + process = CrawlerProcess(get_project_settings()) + f() process.start() process.stop() @@ -132,3 +156,4 @@ def starter(): if __name__ == '__main__': starter_latest_all() + # starter_latest_by_record(8278)