test:cnki测试

main
zhaoxiangpeng 1 week ago
parent a0a8d05c61
commit 752521c87c

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
# @Time : 2026/1/13 14:54
# @Author : zhaoxiangpeng
# @File : test_item_exists.py
from pymongo import MongoClient
from pymongo.database import Database
from pymongo.collection import Collection
from science_article_cnki.db_utils.mongo import MongoDBUtils
from science_article_cnki.settings import MONGO_URI, MONGO_DATABASE
client: MongoClient = MongoClient(MONGO_URI)
db: Database = client[MONGO_DATABASE]
def test_item_exists():
collection: Collection = db.get_collection('data_cnki_article')
results = collection.find_one(filter={"third_id": {"$in": ['SCJI202502004']}}, projection={"_id": 0, "third_id": 1})
print(results)

@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
# @Time : 2026/1/13 16:08
# @Author : zhaoxiangpeng
# @File : test_more_so.py
from parsel import Selector
TABLE_HEAD_EN = ['src_db', 'title', 'author', 'org', 'journal', 'keyword', 'abstract', 'pub_time', 'first_duty', 'fund', 'year', 'volum', 'issue', 'page', 'classification_code', 'issn', 'url', 'doi']
def test_parser():
with open('Y:\cnki-metadata\CNKI-20260112161602991.xls', encoding='utf-8') as f:
data = f.read()
print(data)
selector = Selector(data)
rows = selector.xpath(r'//tr')
for row in rows[1:]:
cols = row.xpath('./td')
row_datas = []
for col in cols:
col_data = col.xpath('string(.)').get().strip()
row_datas.append(col_data)
data = dict(zip(TABLE_HEAD_EN, row_datas))
if data.get('src_db') == 'SrcDatabase-来源库':
continue
print(data)
Loading…
Cancel
Save