|
|
# -*- coding: utf-8 -*-
|
|
|
# @Time : 2023/7/13 9:40
|
|
|
# @Author : zhaoxiangpeng
|
|
|
# @File : model.py
|
|
|
|
|
|
import json
|
|
|
import enum
|
|
|
import warnings
|
|
|
from typing import List, Tuple, Any, Dict, Union
|
|
|
from urllib.parse import urlencode
|
|
|
|
|
|
from science_article_wos.configs import wos as config
|
|
|
|
|
|
false = False
|
|
|
true = True
|
|
|
null = None
|
|
|
|
|
|
|
|
|
class WosDB(enum.Enum):
|
|
|
WOS = 1
|
|
|
CSCD = 2
|
|
|
|
|
|
|
|
|
class AnalyzesEnum(enum.Enum):
|
|
|
WOSCC = ["TP.Value.6", "REVIEW.Value.6", "EARLY ACCESS.Value.6", "OA.Value.6", "DR.Value.6", "ECR.Value.6",
|
|
|
"PY.Field_D.6", "DT.Value.6", "AU.Value.6", "DX2NG.Value.6", "PEERREVIEW.Value.6"]
|
|
|
CSCD = ["TP.Value.6", "DR.Value.6", "OA.Value.6", "PY.Field_D.6", "DT.Value.6", "SJ.Value.6", "AU.Value.6",
|
|
|
"OG.Value.6", "SO.Value.6"]
|
|
|
|
|
|
|
|
|
ColNameMap = dict(WOS='WOSCC', CSCD='CSCD')
|
|
|
|
|
|
|
|
|
def calculate_next_page(next_page: int = 1, page_size: int = 100):
|
|
|
"""
|
|
|
计算下一页的游标,即记录的序号
|
|
|
:param next_page: 下一页的页码
|
|
|
:param page_size: 每页的大小
|
|
|
:return:
|
|
|
"""
|
|
|
return (next_page - 1) * page_size + 1
|
|
|
|
|
|
|
|
|
def lite_base_model(usr_query: str, db_id: int = None, first_record: int = 1, page_size: int = 100, **kwargs):
|
|
|
if db_id is None:
|
|
|
db_id = 1
|
|
|
if first_record > 1e5:
|
|
|
warnings.warn('first_record 必须在 1 ~ 100000 之间')
|
|
|
model = {
|
|
|
'databaseId': WosDB(db_id).name,
|
|
|
'firstRecord': first_record,
|
|
|
'count': page_size,
|
|
|
'usrQuery': usr_query
|
|
|
}
|
|
|
# return urlencode(model)
|
|
|
return model
|
|
|
|
|
|
|
|
|
def lite_query_model(db_id: int = None, first_record: int = 1, page_size: int = 100, **kwargs):
|
|
|
if db_id is None:
|
|
|
db_id = 1
|
|
|
model = {
|
|
|
'databaseId': WosDB(db_id).name,
|
|
|
'firstRecord': first_record,
|
|
|
'count': page_size,
|
|
|
}
|
|
|
return urlencode(model)
|
|
|
|
|
|
|
|
|
def starter_documents_uid_get(uid, detail: str = None):
|
|
|
"""
|
|
|
|
|
|
:param uid:
|
|
|
:param detail:
|
|
|
:return:
|
|
|
"""
|
|
|
_query_params: List[Tuple[str, str]] = []
|
|
|
if detail is not None:
|
|
|
_query_params.append(("detail", detail))
|
|
|
|
|
|
|
|
|
def starter_documents_get(q, db: WosDB = WosDB.WOS.name, limit: int = config.WOS_STARTER_PER_PAGE_LIMIT, page: int = 1, sort_field: str = None,
|
|
|
modified_time_span=None, tc_modified_time_span=None, detail=None, **kwargs):
|
|
|
"""
|
|
|
:param q:
|
|
|
:param db:
|
|
|
:param limit: 最大为50
|
|
|
:param page: 当limit为50时,范围为1~2000,也就是最多10w条
|
|
|
:param sort_field:
|
|
|
:param modified_time_span:
|
|
|
:param tc_modified_time_span:
|
|
|
:param detail: 默认全部数据,如果值为short,返回较少的字段(uid, links{record,citingArticles,references,related}, citations[{db,count}], identifiers{doi,issn})
|
|
|
:param kwargs:
|
|
|
:return:
|
|
|
"""
|
|
|
_query_params: List[Tuple[str, str]] = []
|
|
|
_query_params.append(("q", q))
|
|
|
if db: pass
|
|
|
_query_params.append(("db", db))
|
|
|
_query_params.append(("limit", limit))
|
|
|
_query_params.append(("page", page))
|
|
|
if detail is not None:
|
|
|
_query_params.append(("detail", detail))
|
|
|
return _query_params
|
|
|
|
|
|
|
|
|
def make_advanced_search_ut(query: str = None, wos_ids: List = None, limit: int = 50, col_name: str = "WOS") -> Dict[
|
|
|
str, Any]:
|
|
|
if query is None:
|
|
|
if wos_ids is None:
|
|
|
raise ValueError('query 和 wos_ids 必须满足其中一个不为None')
|
|
|
query = ' OR '.join([f'UT=({wos_id})' for wos_id in wos_ids])
|
|
|
# 通过一个自定义的名字去拿核心
|
|
|
product = ColNameMap[col_name]
|
|
|
model = {
|
|
|
"product": product,
|
|
|
"searchMode": "general",
|
|
|
"viewType": "search",
|
|
|
"serviceMode": "summary",
|
|
|
"search": {
|
|
|
"mode": "general",
|
|
|
"database": product,
|
|
|
"query": [
|
|
|
{
|
|
|
"rowText": query
|
|
|
}
|
|
|
],
|
|
|
"sets": [],
|
|
|
"options": {
|
|
|
"lemmatize": "On"
|
|
|
}
|
|
|
},
|
|
|
"retrieve": {
|
|
|
"count": limit,
|
|
|
"history": True,
|
|
|
"jcr": True,
|
|
|
"sort": "relevance",
|
|
|
"analyzes": getattr(AnalyzesEnum, product).value
|
|
|
},
|
|
|
"eventMode": None,
|
|
|
"isPreprintReview": False
|
|
|
}
|
|
|
return model
|
|
|
|
|
|
|
|
|
def export_search_data_to_txt(
|
|
|
q_id: str,
|
|
|
mark_from: int = 1,
|
|
|
mark_to: int = 500,
|
|
|
col_name: str = "WOS",
|
|
|
filters: str = config.DEFAULT_EXPORT_RECORD_FILTER
|
|
|
) -> Dict[str, Any]:
|
|
|
"""
|
|
|
导出搜索到的记录
|
|
|
:param q_id: 通过检索得到的检索结果id
|
|
|
:param mark_from: 记录开始,包含
|
|
|
:param mark_to: 记录结束,包含
|
|
|
:param col_name: 来源库/核心
|
|
|
:param filters: fullRecord(完整记录)/fullRecordPlus(完整记录和参考文献)
|
|
|
:return:
|
|
|
"""
|
|
|
if mark_to - mark_from > 500:
|
|
|
mark_to = mark_from + 499
|
|
|
model = {"parentQid": q_id, "sortBy": "relevance",
|
|
|
"displayTimesCited": "true", "displayCitedRefs": "true", "product": "UA", "colName": col_name,
|
|
|
"displayUsageInfo": "true", "fileOpt": "othersoftware", "action": "saveToTab",
|
|
|
"markFrom": str(mark_from), "markTo": str(mark_to),
|
|
|
"view": "summary", "isRefQuery": "false", "locale": "zh_CN", "filters": filters}
|
|
|
return model
|
|
|
|
|
|
|
|
|
def article_detail_model(uts: Union[List[str], str], core: str = "WOSCC"):
|
|
|
"""
|
|
|
详情 https://webofscience.clarivate.cn/wos/woscc/full-record/{wos_id}
|
|
|
接口 https://webofscience.clarivate.cn/api/wosnx/core/runQuerySearch
|
|
|
:param uts:
|
|
|
:param core:
|
|
|
:return:
|
|
|
"""
|
|
|
if isinstance(uts, str):
|
|
|
uts = [uts]
|
|
|
model = {
|
|
|
"eventMode": null,
|
|
|
"isPreprintReview": false,
|
|
|
"product": core,
|
|
|
"retrieve": {
|
|
|
"first": 1, "links": "retrieve", "sort": "relevance", "count": 1, "view": "super",
|
|
|
"coll": null, "activity": false, "analyzes": null, "jcr": true, "reviews": true,
|
|
|
"highlight": null,
|
|
|
"secondaryRetrieve": {
|
|
|
"associated_data": {
|
|
|
"sort": "relevance", "count": 10
|
|
|
},
|
|
|
"cited_references": {
|
|
|
"sort": "author-ascending", "count": 30
|
|
|
},
|
|
|
"citing_article": {
|
|
|
"sort": "date", "count": 2, "links": null, "view": "mini"
|
|
|
},
|
|
|
"cited_references_with_context": {
|
|
|
"sort": "date", "count": 135, "view": "mini"
|
|
|
},
|
|
|
"recommendation_articles": {
|
|
|
"sort": "recommendation-relevance", "count": 5, "links": null, "view": "mini"
|
|
|
},
|
|
|
"grants_to_wos_records": {
|
|
|
"sort": "date-descending", "count": 30, "links": null, "view": "mini"
|
|
|
}
|
|
|
}
|
|
|
},
|
|
|
"search": {
|
|
|
"database": core,
|
|
|
"mode": "record_ids",
|
|
|
"uts": uts
|
|
|
},
|
|
|
"searchMode": "record_ids",
|
|
|
"viewType": "search",
|
|
|
"serviceMode": "summary",
|
|
|
}
|
|
|
return model
|
|
|
|
|
|
|
|
|
# 被引用专用model
|
|
|
def get_wos_core_cites(
|
|
|
uts_or_qid: str,
|
|
|
year_range: tuple = None,
|
|
|
core: str = "WOSCC",
|
|
|
parent_db: str = "WOSCC",
|
|
|
is_refine: bool = False
|
|
|
):
|
|
|
"""
|
|
|
https://webofscience.clarivate.cn/api/wosnx/core/runQuerySearch
|
|
|
:param uts_or_qid:
|
|
|
:param year_range: 筛选的年份范围
|
|
|
:param core: 检索的数据库
|
|
|
:param parent_db:
|
|
|
:param is_refine: 是否是精炼检索
|
|
|
:return:
|
|
|
"""
|
|
|
model = {
|
|
|
"eventMode": null,
|
|
|
"isPreprintReview": false,
|
|
|
"product": core,
|
|
|
|
|
|
"search": {"database": core, "mode": "citing_article", "parentDatabase": parent_db,
|
|
|
"parentDoc": null,
|
|
|
"parentId": {"type": "colluid", "value": uts_or_qid},
|
|
|
"parentQid": null, "parentSort": null},
|
|
|
# "retrieve": {
|
|
|
# "sort": "date-descending",
|
|
|
# "count": 50,
|
|
|
# "jcr": true,
|
|
|
# "history": true,
|
|
|
# "analyzes": ["TP.Value.6", "REVIEW.Value.6", "EARLY ACCESS.Value.6", "OA.Value.6",
|
|
|
# "DR.Value.6", "ECR.Value.6", "PY.Field_D.6", "DT.Value.6", "AU.Value.6",
|
|
|
# "DX2NG.Value.6", "PEERREVIEW.Value.6"]
|
|
|
# },
|
|
|
|
|
|
"searchMode": "citing_article",
|
|
|
"serviceMode": "summary",
|
|
|
"viewType": "search",
|
|
|
}
|
|
|
refines = []
|
|
|
if year_range:
|
|
|
is_refine = True
|
|
|
years = list(range(*year_range)) + [year_range[-1]]
|
|
|
refines.append(dict(
|
|
|
index="PY", value=[str(year) for year in years]
|
|
|
))
|
|
|
len(refines) and model.update({"refines": refines})
|
|
|
if is_refine:
|
|
|
model.setdefault("qid", uts_or_qid)
|
|
|
model.pop("search")
|
|
|
model.pop("isPreprintReview")
|
|
|
model.update(viewType="refine")
|
|
|
return model
|
|
|
|
|
|
|
|
|
def get_aggregation_wos_cited(q_id: str, core: str = "WOSCC"):
|
|
|
"""
|
|
|
获取各核心引用的聚合
|
|
|
https://webofscience.clarivate.cn/api/wosnx/core/runQueryGetRecordsStream
|
|
|
"""
|
|
|
model = {
|
|
|
"product": core,
|
|
|
"qid": q_id,
|
|
|
"retrieve": {
|
|
|
"analyzes": ["EDN.Value.200"]
|
|
|
},
|
|
|
"searchMode": "citing_article",
|
|
|
"viewType": "records"
|
|
|
}
|
|
|
return model
|
|
|
|
|
|
|
|
|
def get_refine_count(q_id: str, count: int = 5):
|
|
|
model = {
|
|
|
"eventMode": null,
|
|
|
"product": "WOSCC",
|
|
|
"qid": q_id,
|
|
|
"refines": [
|
|
|
{"index": "EDN", "value": ["WOS.SCI", "WOS.SSCI", "WOS.AHCI"]}
|
|
|
],
|
|
|
# "retrieve": {
|
|
|
# "count": count, "sort": "date-descending", "history": true, "jcr": true,
|
|
|
# "analyzes": ["TP.Value.6", "REVIEW.Value.6", "EARLY ACCESS.Value.6", "OA.Value.6",
|
|
|
# "DR.Value.6", "ECR.Value.6", "PY.Field_D.6", "DT.Value.6", "AU.Value.6",
|
|
|
# "DX2NG.Value.6", "PEERREVIEW.Value.6"]
|
|
|
# },
|
|
|
"searchMode": "citing_article",
|
|
|
"serviceMode": "summary",
|
|
|
"viewType": "refine",
|
|
|
}
|
|
|
return model
|
|
|
|
|
|
|
|
|
def get_record_info(body: bytes, sep: Union[str, bytes] = b'\n'):
|
|
|
resp_texts = body.strip().split(sep)
|
|
|
query_id = None
|
|
|
records_found = 0
|
|
|
for resp_text in resp_texts:
|
|
|
resp_row_dict: dict = json.loads(resp_text)
|
|
|
if resp_row_dict.get("key") == "searchInfo":
|
|
|
query_id = resp_row_dict.get("payload", {}).get("QueryID")
|
|
|
records_found = resp_row_dict.get("payload", {}).get("RecordsFound") # 找到的记录
|
|
|
break # 找到就结束
|
|
|
return query_id, records_found
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
m1 = lite_base_model(WosDB.WOS)
|