From f8da4997f48466611b0fe04000f1f0a1d8f53dd7 Mon Sep 17 00:00:00 2001 From: zhaoxiangpeng <1943364377@qq.com> Date: Tue, 19 Aug 2025 17:14:51 +0800 Subject: [PATCH] =?UTF-8?q?first=20commit:=E8=B1=86=E7=93=A3start?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- douban_book/README.md | 43 +++++++++++++++++++++++++++++++++++++++++++ douban_book/run.py | 4 ++++ 2 files changed, 47 insertions(+) create mode 100644 douban_book/README.md create mode 100644 douban_book/run.py diff --git a/douban_book/README.md b/douban_book/README.md new file mode 100644 index 0000000..72d01b6 --- /dev/null +++ b/douban_book/README.md @@ -0,0 +1,43 @@ +# 安装 +```shell +pip install scrapy +``` + +# 快速开始项目 + +## 创建项目 +```shell +scrapy startproject douban +``` + +## 创建爬虫 +```shell +scrapy genspider douabn_top250 book.douban.com +``` + +## 运行爬虫 +```shell +scrapy crawl douabn_top250 +``` + +## 运行多个排重 +在项目的根目录,`scrapy.cfg` 的同级目录新建 `.py` 文件 +```shell +from scrapy.crawler import CrawlerProcess +from scrapy.utils.project import get_project_settings + +process = CrawlerProcess(get_project_settings()) +# 参数爬虫的名字 +process.crawl('douban_***') +process.start() +``` + +## 导出数据 +```shell +scrapy crawl douban_top250 -o output.json +``` + +## 使用scrapy shell +```shell +scrapy shell "http://example.com" +``` diff --git a/douban_book/run.py b/douban_book/run.py new file mode 100644 index 0000000..f19f63f --- /dev/null +++ b/douban_book/run.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# @Time : 2025/8/19 17:13 +# @Author : zhaoxiangpeng +# @File : run.py