# -*- coding: utf-8 -*- # @Time : 2022/5/31 8:52 # @Author : ZhaoXiangPeng # @File : gz.py import gzip import json import ujson def data_parse(io): data = json.load(io) items = data['items'] for item in items: doi = item.get('DOI') dtype = item.get('type') issn = item.get('ISSN') title = item.get('title') and item.get('title')[0] source = item.get('container-title') and item.get('container-title')[0] print('****************************************************\n' 'TITLE: %s\n' 'DOI: %s\n' 'TYPE: %s\n' 'ISSN: %s\n' 'SOURCE: %s\n' % (title, doi, dtype, issn, source)) if not item.get('reference-count', 0): continue try: reference_list = [] for reference in item.get('reference', []): ref_doi = reference.get('DOI') if ref_doi: # do something reference_list.append(ref_doi) continue ref_at = reference.get('article-title') if ref_at: print(ref_at) reference_list.append(ref_at) continue ref_jt = reference.get('journal-title') except KeyError: print(item.keys()) def un_gz(file_name): g_file = gzip.GzipFile(file_name) return g_file if __name__ == '__main__': un_gz('H:/crossref_public_data_file_2021_01/1.json.gz')