test:cnki测试
parent
a0a8d05c61
commit
752521c87c
@ -0,0 +1,20 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Time : 2026/1/13 14:54
|
||||
# @Author : zhaoxiangpeng
|
||||
# @File : test_item_exists.py
|
||||
|
||||
from pymongo import MongoClient
|
||||
from pymongo.database import Database
|
||||
from pymongo.collection import Collection
|
||||
from science_article_cnki.db_utils.mongo import MongoDBUtils
|
||||
from science_article_cnki.settings import MONGO_URI, MONGO_DATABASE
|
||||
|
||||
client: MongoClient = MongoClient(MONGO_URI)
|
||||
db: Database = client[MONGO_DATABASE]
|
||||
|
||||
|
||||
def test_item_exists():
|
||||
collection: Collection = db.get_collection('data_cnki_article')
|
||||
results = collection.find_one(filter={"third_id": {"$in": ['SCJI202502004']}}, projection={"_id": 0, "third_id": 1})
|
||||
print(results)
|
||||
|
||||
@ -0,0 +1,26 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Time : 2026/1/13 16:08
|
||||
# @Author : zhaoxiangpeng
|
||||
# @File : test_more_so.py
|
||||
|
||||
from parsel import Selector
|
||||
|
||||
TABLE_HEAD_EN = ['src_db', 'title', 'author', 'org', 'journal', 'keyword', 'abstract', 'pub_time', 'first_duty', 'fund', 'year', 'volum', 'issue', 'page', 'classification_code', 'issn', 'url', 'doi']
|
||||
|
||||
|
||||
def test_parser():
|
||||
with open('Y:\cnki-metadata\CNKI-20260112161602991.xls', encoding='utf-8') as f:
|
||||
data = f.read()
|
||||
print(data)
|
||||
selector = Selector(data)
|
||||
rows = selector.xpath(r'//tr')
|
||||
for row in rows[1:]:
|
||||
cols = row.xpath('./td')
|
||||
row_datas = []
|
||||
for col in cols:
|
||||
col_data = col.xpath('string(.)').get().strip()
|
||||
row_datas.append(col_data)
|
||||
data = dict(zip(TABLE_HEAD_EN, row_datas))
|
||||
if data.get('src_db') == 'SrcDatabase-来源库':
|
||||
continue
|
||||
print(data)
|
||||
Loading…
Reference in New Issue