From 3bdb9289a578d12397e2bef37e55b1ae8c8b6584 Mon Sep 17 00:00:00 2001 From: zhaoxiangpeng <1943364377@qq.com> Date: Tue, 19 Aug 2025 17:21:59 +0800 Subject: [PATCH] =?UTF-8?q?add:=20=E4=BF=AE=E6=94=B9=E8=AF=B7=E6=B1=82?= =?UTF-8?q?=E5=A4=B4=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- douban_book/douban_book/settings.py | 9 +++++---- douban_book/douban_book/spiders/douabn_top250.py | 10 ---------- douban_book/douban_book/spiders/douban_top250.py | 10 ++++++++++ 3 files changed, 15 insertions(+), 14 deletions(-) delete mode 100644 douban_book/douban_book/spiders/douabn_top250.py create mode 100644 douban_book/douban_book/spiders/douban_top250.py diff --git a/douban_book/douban_book/settings.py b/douban_book/douban_book/settings.py index e590050..bf07438 100644 --- a/douban_book/douban_book/settings.py +++ b/douban_book/douban_book/settings.py @@ -33,10 +33,11 @@ DOWNLOAD_DELAY = 1 #TELNETCONSOLE_ENABLED = False # Override the default request headers: -#DEFAULT_REQUEST_HEADERS = { -# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", -# "Accept-Language": "en", -#} +DEFAULT_REQUEST_HEADERS = { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en", + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36" +} # Enable or disable spider middlewares # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html diff --git a/douban_book/douban_book/spiders/douabn_top250.py b/douban_book/douban_book/spiders/douabn_top250.py deleted file mode 100644 index 25019c8..0000000 --- a/douban_book/douban_book/spiders/douabn_top250.py +++ /dev/null @@ -1,10 +0,0 @@ -import scrapy - - -class DouabnTop250Spider(scrapy.Spider): - name = "douabn_top250" - allowed_domains = ["book.douban.com"] - start_urls = ["https://book.douban.com"] - - def parse(self, response): - pass diff --git a/douban_book/douban_book/spiders/douban_top250.py b/douban_book/douban_book/spiders/douban_top250.py new file mode 100644 index 0000000..95bcae3 --- /dev/null +++ b/douban_book/douban_book/spiders/douban_top250.py @@ -0,0 +1,10 @@ +import scrapy + + +class DoubanTop250Spider(scrapy.Spider): + name = "douban_top250" + allowed_domains = ["book.douban.com"] + start_urls = ["https://book.douban.com/top250?start=0"] + + def parse(self, response): + pass