Compare commits

..

2 Commits

Author SHA1 Message Date
zhaoxiangpeng 6e21275234 add: item添加链接字段 2 weeks ago
zhaoxiangpeng 3bdb9289a5 add: 修改请求头配置 2 weeks ago

@ -22,3 +22,5 @@ class DoubanBookTop250Item(scrapy.Item):
rank_type = scrapy.Field() # 榜单类型
rank_name = scrapy.Field() # 榜单名
ranking = scrapy.Field() # 排名
book_link = scrapy.Field() # 图书链接
book_cover_link = scrapy.Field() # 图书封面链接

@ -33,10 +33,11 @@ DOWNLOAD_DELAY = 1
#TELNETCONSOLE_ENABLED = False
# Override the default request headers:
#DEFAULT_REQUEST_HEADERS = {
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
# "Accept-Language": "en",
#}
DEFAULT_REQUEST_HEADERS = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}
# Enable or disable spider middlewares
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html

@ -1,10 +0,0 @@
import scrapy
class DouabnTop250Spider(scrapy.Spider):
name = "douabn_top250"
allowed_domains = ["book.douban.com"]
start_urls = ["https://book.douban.com"]
def parse(self, response):
pass

@ -0,0 +1,10 @@
import scrapy
class DoubanTop250Spider(scrapy.Spider):
name = "douban_top250"
allowed_domains = ["book.douban.com"]
start_urls = ["https://book.douban.com/top250?start=0"]
def parse(self, response):
pass
Loading…
Cancel
Save