Compare commits

..

No commits in common. '6e212752343e8274f6cc8a5863872d682030c9b1' and '2ebf883df7b513b00d8dcc9de29bb3b0fd10812f' have entirely different histories.

@ -22,5 +22,3 @@ class DoubanBookTop250Item(scrapy.Item):
rank_type = scrapy.Field() # 榜单类型 rank_type = scrapy.Field() # 榜单类型
rank_name = scrapy.Field() # 榜单名 rank_name = scrapy.Field() # 榜单名
ranking = scrapy.Field() # 排名 ranking = scrapy.Field() # 排名
book_link = scrapy.Field() # 图书链接
book_cover_link = scrapy.Field() # 图书封面链接

@ -33,11 +33,10 @@ DOWNLOAD_DELAY = 1
#TELNETCONSOLE_ENABLED = False #TELNETCONSOLE_ENABLED = False
# Override the default request headers: # Override the default request headers:
DEFAULT_REQUEST_HEADERS = { #DEFAULT_REQUEST_HEADERS = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", # "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en", # "Accept-Language": "en",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36" #}
}
# Enable or disable spider middlewares # Enable or disable spider middlewares
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html

@ -0,0 +1,10 @@
import scrapy
class DouabnTop250Spider(scrapy.Spider):
name = "douabn_top250"
allowed_domains = ["book.douban.com"]
start_urls = ["https://book.douban.com"]
def parse(self, response):
pass

@ -1,10 +0,0 @@
import scrapy
class DoubanTop250Spider(scrapy.Spider):
name = "douban_top250"
allowed_domains = ["book.douban.com"]
start_urls = ["https://book.douban.com/top250?start=0"]
def parse(self, response):
pass
Loading…
Cancel
Save