From 752521c87c37b56e16a24680c39c867097f66a2d Mon Sep 17 00:00:00 2001
From: zhaoxiangpeng <1943364377@qq.com>
Date: Thu, 12 Mar 2026 14:11:33 +0800
Subject: [PATCH] =?UTF-8?q?test:cnki=E6=B5=8B=E8=AF=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../tests/test_item_exists.py                 | 20 ++++++++++++++
 science_article_cnki/tests/test_more_so.py    | 26 +++++++++++++++++++
 2 files changed, 46 insertions(+)
 create mode 100644 science_article_cnki/tests/test_item_exists.py
 create mode 100644 science_article_cnki/tests/test_more_so.py

diff --git a/science_article_cnki/tests/test_item_exists.py b/science_article_cnki/tests/test_item_exists.py
new file mode 100644
index 0000000..006ec92
--- /dev/null
+++ b/science_article_cnki/tests/test_item_exists.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2026/1/13 14:54
+# @Author  : zhaoxiangpeng
+# @File    : test_item_exists.py
+
+from pymongo import MongoClient
+from pymongo.database import Database
+from pymongo.collection import Collection
+from science_article_cnki.db_utils.mongo import MongoDBUtils
+from science_article_cnki.settings import MONGO_URI, MONGO_DATABASE
+
+client: MongoClient = MongoClient(MONGO_URI)
+db: Database = client[MONGO_DATABASE]
+
+
+def test_item_exists():
+    collection: Collection = db.get_collection('data_cnki_article')
+    results = collection.find_one(filter={"third_id": {"$in": ['SCJI202502004']}}, projection={"_id": 0, "third_id": 1})
+    print(results)
+
diff --git a/science_article_cnki/tests/test_more_so.py b/science_article_cnki/tests/test_more_so.py
new file mode 100644
index 0000000..7743025
--- /dev/null
+++ b/science_article_cnki/tests/test_more_so.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2026/1/13 16:08
+# @Author  : zhaoxiangpeng
+# @File    : test_more_so.py
+
+from parsel import Selector
+
+TABLE_HEAD_EN = ['src_db', 'title', 'author', 'org', 'journal', 'keyword', 'abstract', 'pub_time', 'first_duty', 'fund', 'year', 'volum', 'issue', 'page', 'classification_code', 'issn', 'url', 'doi']
+
+
+def test_parser():
+    with open('Y:\cnki-metadata\CNKI-20260112161602991.xls', encoding='utf-8') as f:
+        data = f.read()
+        print(data)
+    selector = Selector(data)
+    rows = selector.xpath(r'//tr')
+    for row in rows[1:]:
+        cols = row.xpath('./td')
+        row_datas = []
+        for col in cols:
+            col_data = col.xpath('string(.)').get().strip()
+            row_datas.append(col_data)
+        data = dict(zip(TABLE_HEAD_EN, row_datas))
+        if data.get('src_db') == 'SrcDatabase-来源库':
+            continue
+        print(data)