You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

33 lines
791 B
Python

# -*- coding: utf-8 -*-
# @Time : 2025/7/25 13:39
# @Author : zhaoxiangpeng
# @File : run.py
import os
import re
from scrapy.crawler import CrawlerProcess
from scrapy.settings import Settings
from scrapy.utils.project import get_project_settings
regex = re.compile(r'^(org_news_).*?(_lib)$')
def load_spider_script(path):
"""
加载特定的爬虫脚本
"""
scripts = os.listdir(path)
spiders = []
for script in scripts:
if not script.endswith('.py'):
continue
spider_name = script[:-3]
if bool(regex.search(spider_name)):
spiders.append(spider_name)
return spiders
process = CrawlerProcess(get_project_settings())
# process.crawl('org_news_sjtu_lib')
process.crawl('org_news_xjtu_lib')
process.start()