You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
53 lines
1.5 KiB
Python
53 lines
1.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
# @Time : 2022/5/31 8:52
|
|
# @Author : ZhaoXiangPeng
|
|
# @File : gz.py
|
|
|
|
import gzip
|
|
import json
|
|
import ujson
|
|
|
|
|
|
def data_parse(io):
|
|
data = json.load(io)
|
|
items = data['items']
|
|
for item in items:
|
|
doi = item.get('DOI')
|
|
dtype = item.get('type')
|
|
issn = item.get('ISSN')
|
|
title = item.get('title') and item.get('title')[0]
|
|
source = item.get('container-title') and item.get('container-title')[0]
|
|
print('****************************************************\n'
|
|
'TITLE: %s\n'
|
|
'DOI: %s\n'
|
|
'TYPE: %s\n'
|
|
'ISSN: %s\n'
|
|
'SOURCE: %s\n' % (title, doi, dtype, issn, source))
|
|
if not item.get('reference-count', 0):
|
|
continue
|
|
try:
|
|
reference_list = []
|
|
for reference in item.get('reference', []):
|
|
ref_doi = reference.get('DOI')
|
|
if ref_doi:
|
|
# do something
|
|
reference_list.append(ref_doi)
|
|
continue
|
|
ref_at = reference.get('article-title')
|
|
if ref_at:
|
|
print(ref_at)
|
|
reference_list.append(ref_at)
|
|
continue
|
|
ref_jt = reference.get('journal-title')
|
|
except KeyError:
|
|
print(item.keys())
|
|
|
|
|
|
def un_gz(file_name):
|
|
g_file = gzip.GzipFile(file_name)
|
|
return g_file
|
|
|
|
|
|
if __name__ == '__main__':
|
|
un_gz('H:/crossref_public_data_file_2021_01/1.json.gz')
|