add: 豆瓣图书top250
parent
6e21275234
commit
601cc86af8
@ -0,0 +1,9 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# @Time : 2025/8/20 15:05
|
||||||
|
# @Author : zhaoxiangpeng
|
||||||
|
# @File : main.py
|
||||||
|
|
||||||
|
from scrapy.cmdline import execute
|
||||||
|
|
||||||
|
|
||||||
|
execute('scrapy crawl douban_top250 -o data_douban_top250_3.csv'.split())
|
@ -0,0 +1,4 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# @Time : 2025/8/20 9:29
|
||||||
|
# @Author : zhaoxiangpeng
|
||||||
|
# @File : __init__.py
|
@ -0,0 +1,51 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# @Time : 2025/8/20 9:30
|
||||||
|
# @Author : zhaoxiangpeng
|
||||||
|
# @File : book.py
|
||||||
|
|
||||||
|
# 书名
|
||||||
|
SHUOMING_XPATH = book_name_xpath = '//*[@id="wrapper"]/h1/span/text()'
|
||||||
|
# 封面
|
||||||
|
FENGMIAN_XPATH = book_cover_xpath = '//*[@id="mainpic"]/a/@href'
|
||||||
|
# 作者[多个]
|
||||||
|
ZUOZHE_XPATH = author_xpath = '//span[contains(text(), "作者")]/following-sibling::a/text()'
|
||||||
|
# 出版社
|
||||||
|
CHUBANSHE_XPATH = press_xpath = '//span[contains(text(), "出版社")]/following-sibling::a[1]/text()'
|
||||||
|
# 出品方[多个]
|
||||||
|
CHUPINFANG_XPATH = producer_xpath = '//span[contains(text(), "作者")]/following-sibling/a/text()'
|
||||||
|
# 副标题
|
||||||
|
FUBIAOTI_XPATH = title_subhead_xpath = '//span[contains(text(), "副标题")]/following-sibling::text()[1]'
|
||||||
|
# 原作名
|
||||||
|
YUANZUOMING_XPATH = book_name_original_xpath = '//span[contains(text(), "原作名")]/following-sibling::text()[1]'
|
||||||
|
# 译者[多个]
|
||||||
|
YIZHE_XPATH = translator_xpath = '//span[contains(text(), "译者")]/following-sibling::a/text()'
|
||||||
|
# 出版年
|
||||||
|
CHUBANNIAN_XPATH = publish_year_xpath = '//span[contains(text(), "出版年")]/following-sibling::text()[1]'
|
||||||
|
# 页数
|
||||||
|
YESHU_XPATH = pages_xpath = '//span[contains(text(), "页数")]/following-sibling::text()[1]'
|
||||||
|
# 定价
|
||||||
|
DINGJIA_XPATH = price_xpath = '//span[contains(text(), "定价")]/following-sibling::text()[1]'
|
||||||
|
# 装帧
|
||||||
|
ZHUANGZHEN_XPATH = binding_xpath = '//span[contains(text(), "装帧")]/following-sibling::text()[1]'
|
||||||
|
# 丛书
|
||||||
|
CONGSHU_XPATH = series_xpath = '//span[contains(text(), "丛书")]/following-sibling::a[1]/text()'
|
||||||
|
# 评分
|
||||||
|
PINGFEN_XPATH = rating_nums_xpath = '//*[@id="interest_sectl"]/div/div[2]/strong/text()'
|
||||||
|
# ISBN
|
||||||
|
ISBN_XPATH = isbn_xpath = '//span[contains(text(), "ISBN")]/following-sibling::text()[1]'
|
||||||
|
# 内容简介
|
||||||
|
NEIRONGJIANJIE_XPATH = book_summery_xpath = '//span[contains(text(), "内容简介")]/parent::*/following-sibling::div[1]/span[2]//text() | //*[@class="all hidden"]/div/div//text() | //*[@id="content"]/div/div[1]/div[3]/div[1]/div/div//text()'
|
||||||
|
# 目录
|
||||||
|
MULU_XPATH = book_catalog_xpath = '//span[contains(text(), "目录")]/parent::*/following-sibling::div[2]//text() | //*[@class="related_info"]/div[5]/text()'
|
||||||
|
# 作者简介
|
||||||
|
ZUOZHEJIANJIE_XPATH = author_summery_xpath = '//span[contains(text(), "作者简介")]/parent::*/following-sibling::div[1]//text()'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
XPATH = '//span[contains(text(), "作者")]/following-sibling/a/text()' # XPATH[多个]
|
||||||
|
XPATH = '//span[contains(text(), "作者")]/following-sibling/a/text()' # XPATH[多个]
|
||||||
|
XPATH = '//span[contains(text(), "作者")]/following-sibling/a/text()' # XPATH[多个]
|
||||||
|
XPATH = '//span[contains(text(), "作者")]/following-sibling/a/text()' # XPATH[多个]
|
Loading…
Reference in New Issue