# -*- coding: utf-8 -*-
# @Time    : 2025/7/25 13:39
# @Author  : zhaoxiangpeng
# @File    : run.py
import os
import re
from scrapy.crawler import CrawlerProcess
from scrapy.settings import Settings
from scrapy.utils.project import get_project_settings
regex = re.compile(r'^(org_news_).*?(_lib)$')


def load_spider_script(path):
    """
    加载特定的爬虫脚本
    """
    scripts = os.listdir(path)
    spiders = []
    for script in scripts:
        if not script.endswith('.py'):
            continue
        spider_name = script[:-3]
        if bool(regex.search(spider_name)):
            spiders.append(spider_name)
    return spiders


process = CrawlerProcess(get_project_settings())

# process.crawl('org_news_sjtu_lib')
process.crawl('org_news_xjtu_lib')
process.start()