diff --git a/es_server/find_es_article.py b/es_server/find_es_article.py new file mode 100644 index 0000000..3a23f61 --- /dev/null +++ b/es_server/find_es_article.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# @date:2024/8/9 9:24 +# @Author:LiuYiJie +# @file: find_es_article +import json +from elasticsearch import Elasticsearch +from hbase_server.phoenix_operate import phoenixServer + +ES_CONF = { + "host": "101.43.232.153", + "port": "9200", + "user": "elastic", + "passwd": "kcidea1509!%)(" +} + + +class findEsArticle: + def __init__(self, es_index=None): + self.es = Elasticsearch( + [ES_CONF['host']], + http_auth=(ES_CONF['user'], ES_CONF['passwd']), + port=ES_CONF['port'] + ) + self.es_index = es_index + # 初始化phoenix + self.phoenix = phoenixServer() + + def search_article(self): + query_json = { + "query": { + "bool": { + "must": [ + { + "term": { + "school_ids": { + "value": "51" + } + } + }, + { + "term": { + "source_type": { + "value": 6 + } + } + } + ] + } + }, + "from": 0, + "size": 3000 + } + results = self.es.search(index=self.es_index, body=query_json)['hits']['hits'] + data = [(str(result['_source']['id']), json.dumps(result['_source'], ensure_ascii=False), '2024-08-16') for result in results] + + # for result in results: + # id = str(result['_source']['id']) + # data = json.dumps(result['_source']) + # update_time = '2024-08-13' + # + # # sql = """upsert into SCIENCE.SCIENCE_ARTICLE_METADATA(id, C1."article_msg",C1."update_time") values('%s','%s','%s')""" % (id, data, update_time) + # print(result) + # data = [ + # ('111111', 'test1', '2024-08-13'), + # ('222222', 'test2', '2024-08-14'), + # ] + sql = """upsert into SCIENCE.SCIENCE_ARTICLE_METADATA(id, C1."article_msg",C1."update_time") values(?,?,?)""" + self.phoenix.upsert(sql, data) + self.phoenix.pool.close_all() + + +if __name__ == '__main__': + c = findEsArticle(es_index='science-article-metadata-v3') + c.search_article()