You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

53 lines
1.5 KiB
Python

# -*- coding: utf-8 -*-
# @Time : 2022/5/31 8:52
# @Author : ZhaoXiangPeng
# @File : gz.py
import gzip
import json
import ujson
def data_parse(io):
data = json.load(io)
items = data['items']
for item in items:
doi = item.get('DOI')
dtype = item.get('type')
issn = item.get('ISSN')
title = item.get('title') and item.get('title')[0]
source = item.get('container-title') and item.get('container-title')[0]
print('****************************************************\n'
'TITLE: %s\n'
'DOI: %s\n'
'TYPE: %s\n'
'ISSN: %s\n'
'SOURCE: %s\n' % (title, doi, dtype, issn, source))
if not item.get('reference-count', 0):
continue
try:
reference_list = []
for reference in item.get('reference', []):
ref_doi = reference.get('DOI')
if ref_doi:
# do something
reference_list.append(ref_doi)
continue
ref_at = reference.get('article-title')
if ref_at:
print(ref_at)
reference_list.append(ref_at)
continue
ref_jt = reference.get('journal-title')
except KeyError:
print(item.keys())
def un_gz(file_name):
g_file = gzip.GzipFile(file_name)
return g_file
if __name__ == '__main__':
un_gz('H:/crossref_public_data_file_2021_01/1.json.gz')