diff --git a/douban_book/README.md b/douban_book/README.md new file mode 100644 index 0000000..72d01b6 --- /dev/null +++ b/douban_book/README.md @@ -0,0 +1,43 @@ +# 安装 +```shell +pip install scrapy +``` + +# 快速开始项目 + +## 创建项目 +```shell +scrapy startproject douban +``` + +## 创建爬虫 +```shell +scrapy genspider douabn_top250 book.douban.com +``` + +## 运行爬虫 +```shell +scrapy crawl douabn_top250 +``` + +## 运行多个排重 +在项目的根目录,`scrapy.cfg` 的同级目录新建 `.py` 文件 +```shell +from scrapy.crawler import CrawlerProcess +from scrapy.utils.project import get_project_settings + +process = CrawlerProcess(get_project_settings()) +# 参数爬虫的名字 +process.crawl('douban_***') +process.start() +``` + +## 导出数据 +```shell +scrapy crawl douban_top250 -o output.json +``` + +## 使用scrapy shell +```shell +scrapy shell "http://example.com" +``` diff --git a/douban_book/run.py b/douban_book/run.py new file mode 100644 index 0000000..f19f63f --- /dev/null +++ b/douban_book/run.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# @Time : 2025/8/19 17:13 +# @Author : zhaoxiangpeng +# @File : run.py