# -*- coding: utf-8 -*- # @Time : 2025/7/25 13:39 # @Author : zhaoxiangpeng # @File : run.py import os import re from scrapy.crawler import CrawlerProcess from scrapy.settings import Settings from scrapy.utils.project import get_project_settings regex = re.compile(r'^(org_news_).*?(_lib)$') def load_spider_script(path): """ 加载特定的爬虫脚本 """ scripts = os.listdir(path) spiders = [] for script in scripts: if not script.endswith('.py'): continue spider_name = script[:-3] if bool(regex.search(spider_name)): spiders.append(spider_name) return spiders process = CrawlerProcess(get_project_settings()) # process.crawl('org_news_sjtu_lib') process.crawl('org_news_xjtu_lib') process.start()