Compare commits

...

2 Commits

Author SHA1 Message Date
zhaoxiangpeng 6e21275234 add: item添加链接字段 2 weeks ago
zhaoxiangpeng 3bdb9289a5 add: 修改请求头配置 2 weeks ago

@ -22,3 +22,5 @@ class DoubanBookTop250Item(scrapy.Item):
rank_type = scrapy.Field() # 榜单类型 rank_type = scrapy.Field() # 榜单类型
rank_name = scrapy.Field() # 榜单名 rank_name = scrapy.Field() # 榜单名
ranking = scrapy.Field() # 排名 ranking = scrapy.Field() # 排名
book_link = scrapy.Field() # 图书链接
book_cover_link = scrapy.Field() # 图书封面链接

@ -33,10 +33,11 @@ DOWNLOAD_DELAY = 1
#TELNETCONSOLE_ENABLED = False #TELNETCONSOLE_ENABLED = False
# Override the default request headers: # Override the default request headers:
#DEFAULT_REQUEST_HEADERS = { DEFAULT_REQUEST_HEADERS = {
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
# "Accept-Language": "en", "Accept-Language": "en",
#} "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}
# Enable or disable spider middlewares # Enable or disable spider middlewares
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html

@ -1,10 +0,0 @@
import scrapy
class DouabnTop250Spider(scrapy.Spider):
name = "douabn_top250"
allowed_domains = ["book.douban.com"]
start_urls = ["https://book.douban.com"]
def parse(self, response):
pass

@ -0,0 +1,10 @@
import scrapy
class DoubanTop250Spider(scrapy.Spider):
name = "douban_top250"
allowed_domains = ["book.douban.com"]
start_urls = ["https://book.douban.com/top250?start=0"]
def parse(self, response):
pass
Loading…
Cancel
Save