You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
33 lines
791 B
Python
33 lines
791 B
Python
# -*- coding: utf-8 -*-
|
|
# @Time : 2025/7/25 13:39
|
|
# @Author : zhaoxiangpeng
|
|
# @File : run.py
|
|
import os
|
|
import re
|
|
from scrapy.crawler import CrawlerProcess
|
|
from scrapy.settings import Settings
|
|
from scrapy.utils.project import get_project_settings
|
|
regex = re.compile(r'^(org_news_).*?(_lib)$')
|
|
|
|
|
|
def load_spider_script(path):
|
|
"""
|
|
加载特定的爬虫脚本
|
|
"""
|
|
scripts = os.listdir(path)
|
|
spiders = []
|
|
for script in scripts:
|
|
if not script.endswith('.py'):
|
|
continue
|
|
spider_name = script[:-3]
|
|
if bool(regex.search(spider_name)):
|
|
spiders.append(spider_name)
|
|
return spiders
|
|
|
|
|
|
process = CrawlerProcess(get_project_settings())
|
|
|
|
# process.crawl('org_news_sjtu_lib')
|
|
process.crawl('org_news_xjtu_lib')
|
|
process.start()
|