You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

332 lines
10 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
# @Time : 2023/7/13 9:40
# @Author : zhaoxiangpeng
# @File : model.py
import json
import enum
import warnings
from typing import List, Tuple, Any, Dict, Union
from urllib.parse import urlencode
from science_article_wos.configs import wos as config
false = False
true = True
null = None
class WosDB(enum.Enum):
WOS = 1
CSCD = 2
class AnalyzesEnum(enum.Enum):
WOSCC = ["TP.Value.6", "REVIEW.Value.6", "EARLY ACCESS.Value.6", "OA.Value.6", "DR.Value.6", "ECR.Value.6",
"PY.Field_D.6", "DT.Value.6", "AU.Value.6", "DX2NG.Value.6", "PEERREVIEW.Value.6"]
CSCD = ["TP.Value.6", "DR.Value.6", "OA.Value.6", "PY.Field_D.6", "DT.Value.6", "SJ.Value.6", "AU.Value.6",
"OG.Value.6", "SO.Value.6"]
ColNameMap = dict(WOS='WOSCC', CSCD='CSCD')
def calculate_next_page(next_page: int = 1, page_size: int = 100):
"""
计算下一页的游标,即记录的序号
:param next_page: 下一页的页码
:param page_size: 每页的大小
:return:
"""
return (next_page - 1) * page_size + 1
def lite_base_model(usr_query: str, db_id: int = None, first_record: int = 1, page_size: int = 100, **kwargs):
if db_id is None:
db_id = 1
if first_record > 1e5:
warnings.warn('first_record 必须在 1 ~ 100000 之间')
model = {
'databaseId': WosDB(db_id).name,
'firstRecord': first_record,
'count': page_size,
'usrQuery': usr_query
}
# return urlencode(model)
return model
def lite_query_model(db_id: int = None, first_record: int = 1, page_size: int = 100, **kwargs):
if db_id is None:
db_id = 1
model = {
'databaseId': WosDB(db_id).name,
'firstRecord': first_record,
'count': page_size,
}
return urlencode(model)
def starter_documents_uid_get(uid, detail: str = None):
"""
:param uid:
:param detail:
:return:
"""
_query_params: List[Tuple[str, str]] = []
if detail is not None:
_query_params.append(("detail", detail))
def starter_documents_get(q, db: WosDB = WosDB.WOS.name, limit: int = config.WOS_STARTER_PER_PAGE_LIMIT, page: int = 1, sort_field: str = None,
modified_time_span=None, tc_modified_time_span=None, detail=None, **kwargs):
"""
:param q:
:param db:
:param limit: 最大为50
:param page: 当limit为50时范围为1~2000也就是最多10w条
:param sort_field:
:param modified_time_span:
:param tc_modified_time_span:
:param detail: 默认全部数据如果值为short返回较少的字段(uid, links{record,citingArticles,references,related}, citations[{db,count}], identifiers{doi,issn})
:param kwargs:
:return:
"""
_query_params: List[Tuple[str, str]] = []
_query_params.append(("q", q))
if db: pass
_query_params.append(("db", db))
_query_params.append(("limit", limit))
_query_params.append(("page", page))
if detail is not None:
_query_params.append(("detail", detail))
return _query_params
def make_advanced_search_ut(query: str = None, wos_ids: List = None, limit: int = 50, col_name: str = "WOS") -> Dict[
str, Any]:
if query is None:
if wos_ids is None:
raise ValueError('query 和 wos_ids 必须满足其中一个不为None')
query = ' OR '.join([f'UT=({wos_id})' for wos_id in wos_ids])
# 通过一个自定义的名字去拿核心
product = ColNameMap[col_name]
model = {
"product": product,
"searchMode": "general",
"viewType": "search",
"serviceMode": "summary",
"search": {
"mode": "general",
"database": product,
"query": [
{
"rowText": query
}
],
"sets": [],
"options": {
"lemmatize": "On"
}
},
"retrieve": {
"count": limit,
"history": True,
"jcr": True,
"sort": "relevance",
"analyzes": getattr(AnalyzesEnum, product).value
},
"eventMode": None,
"isPreprintReview": False
}
return model
def export_search_data_to_txt(
q_id: str,
mark_from: int = 1,
mark_to: int = 500,
col_name: str = "WOS",
filters: str = config.DEFAULT_EXPORT_RECORD_FILTER
) -> Dict[str, Any]:
"""
导出搜索到的记录
:param q_id: 通过检索得到的检索结果id
:param mark_from: 记录开始,包含
:param mark_to: 记录结束,包含
:param col_name: 来源库/核心
:param filters: fullRecord(完整记录)/fullRecordPlus(完整记录和参考文献)
:return:
"""
if mark_to - mark_from > 500:
mark_to = mark_from + 499
model = {"parentQid": q_id, "sortBy": "relevance",
"displayTimesCited": "true", "displayCitedRefs": "true", "product": "UA", "colName": col_name,
"displayUsageInfo": "true", "fileOpt": "othersoftware", "action": "saveToTab",
"markFrom": str(mark_from), "markTo": str(mark_to),
"view": "summary", "isRefQuery": "false", "locale": "zh_CN", "filters": filters}
return model
def article_detail_model(uts: Union[List[str], str], core: str = "WOSCC"):
"""
详情 https://webofscience.clarivate.cn/wos/woscc/full-record/{wos_id}
接口 https://webofscience.clarivate.cn/api/wosnx/core/runQuerySearch
:param uts:
:param core:
:return:
"""
if isinstance(uts, str):
uts = [uts]
model = {
"eventMode": null,
"isPreprintReview": false,
"product": core,
"retrieve": {
"first": 1, "links": "retrieve", "sort": "relevance", "count": 1, "view": "super",
"coll": null, "activity": false, "analyzes": null, "jcr": true, "reviews": true,
"highlight": null,
"secondaryRetrieve": {
"associated_data": {
"sort": "relevance", "count": 10
},
"cited_references": {
"sort": "author-ascending", "count": 30
},
"citing_article": {
"sort": "date", "count": 2, "links": null, "view": "mini"
},
"cited_references_with_context": {
"sort": "date", "count": 135, "view": "mini"
},
"recommendation_articles": {
"sort": "recommendation-relevance", "count": 5, "links": null, "view": "mini"
},
"grants_to_wos_records": {
"sort": "date-descending", "count": 30, "links": null, "view": "mini"
}
}
},
"search": {
"database": core,
"mode": "record_ids",
"uts": uts
},
"searchMode": "record_ids",
"viewType": "search",
"serviceMode": "summary",
}
return model
# 被引用专用model
def get_wos_core_cites(
uts_or_qid: str,
year_range: tuple = None,
core: str = "WOSCC",
parent_db: str = "WOSCC",
is_refine: bool = False
):
"""
https://webofscience.clarivate.cn/api/wosnx/core/runQuerySearch
:param uts_or_qid:
:param year_range: 筛选的年份范围
:param core: 检索的数据库
:param parent_db:
:param is_refine: 是否是精炼检索
:return:
"""
model = {
"eventMode": null,
"isPreprintReview": false,
"product": core,
"search": {"database": core, "mode": "citing_article", "parentDatabase": parent_db,
"parentDoc": null,
"parentId": {"type": "colluid", "value": uts_or_qid},
"parentQid": null, "parentSort": null},
# "retrieve": {
# "sort": "date-descending",
# "count": 50,
# "jcr": true,
# "history": true,
# "analyzes": ["TP.Value.6", "REVIEW.Value.6", "EARLY ACCESS.Value.6", "OA.Value.6",
# "DR.Value.6", "ECR.Value.6", "PY.Field_D.6", "DT.Value.6", "AU.Value.6",
# "DX2NG.Value.6", "PEERREVIEW.Value.6"]
# },
"searchMode": "citing_article",
"serviceMode": "summary",
"viewType": "search",
}
refines = []
if year_range:
is_refine = True
years = list(range(*year_range)) + [year_range[-1]]
refines.append(dict(
index="PY", value=[str(year) for year in years]
))
len(refines) and model.update({"refines": refines})
if is_refine:
model.setdefault("qid", uts_or_qid)
model.pop("search")
model.pop("isPreprintReview")
model.update(viewType="refine")
return model
def get_aggregation_wos_cited(q_id: str, core: str = "WOSCC"):
"""
获取各核心引用的聚合
https://webofscience.clarivate.cn/api/wosnx/core/runQueryGetRecordsStream
"""
model = {
"product": core,
"qid": q_id,
"retrieve": {
"analyzes": ["EDN.Value.200"]
},
"searchMode": "citing_article",
"viewType": "records"
}
return model
def get_refine_count(q_id: str, count: int = 5):
model = {
"eventMode": null,
"product": "WOSCC",
"qid": q_id,
"refines": [
{"index": "EDN", "value": ["WOS.SCI", "WOS.SSCI", "WOS.AHCI"]}
],
# "retrieve": {
# "count": count, "sort": "date-descending", "history": true, "jcr": true,
# "analyzes": ["TP.Value.6", "REVIEW.Value.6", "EARLY ACCESS.Value.6", "OA.Value.6",
# "DR.Value.6", "ECR.Value.6", "PY.Field_D.6", "DT.Value.6", "AU.Value.6",
# "DX2NG.Value.6", "PEERREVIEW.Value.6"]
# },
"searchMode": "citing_article",
"serviceMode": "summary",
"viewType": "refine",
}
return model
def get_record_info(body: bytes, sep: Union[str, bytes] = b'\n'):
resp_texts = body.strip().split(sep)
query_id = None
records_found = 0
for resp_text in resp_texts:
resp_row_dict: dict = json.loads(resp_text)
if resp_row_dict.get("key") == "searchInfo":
query_id = resp_row_dict.get("payload", {}).get("QueryID")
records_found = resp_row_dict.get("payload", {}).get("RecordsFound") # 找到的记录
break # 找到就结束
return query_id, records_found
if __name__ == '__main__':
m1 = lite_base_model(WosDB.WOS)