|
|
|
@ -0,0 +1,74 @@
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
# @date:2024/8/9 9:24
|
|
|
|
|
# @Author:LiuYiJie
|
|
|
|
|
# @file: find_es_article
|
|
|
|
|
import json
|
|
|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
|
from hbase_server.phoenix_operate import phoenixServer
|
|
|
|
|
|
|
|
|
|
ES_CONF = {
|
|
|
|
|
"host": "101.43.232.153",
|
|
|
|
|
"port": "9200",
|
|
|
|
|
"user": "elastic",
|
|
|
|
|
"passwd": "kcidea1509!%)("
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class findEsArticle:
|
|
|
|
|
def __init__(self, es_index=None):
|
|
|
|
|
self.es = Elasticsearch(
|
|
|
|
|
[ES_CONF['host']],
|
|
|
|
|
http_auth=(ES_CONF['user'], ES_CONF['passwd']),
|
|
|
|
|
port=ES_CONF['port']
|
|
|
|
|
)
|
|
|
|
|
self.es_index = es_index
|
|
|
|
|
# 初始化phoenix
|
|
|
|
|
self.phoenix = phoenixServer()
|
|
|
|
|
|
|
|
|
|
def search_article(self):
|
|
|
|
|
query_json = {
|
|
|
|
|
"query": {
|
|
|
|
|
"bool": {
|
|
|
|
|
"must": [
|
|
|
|
|
{
|
|
|
|
|
"term": {
|
|
|
|
|
"school_ids": {
|
|
|
|
|
"value": "51"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"term": {
|
|
|
|
|
"source_type": {
|
|
|
|
|
"value": 6
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"from": 0,
|
|
|
|
|
"size": 3000
|
|
|
|
|
}
|
|
|
|
|
results = self.es.search(index=self.es_index, body=query_json)['hits']['hits']
|
|
|
|
|
data = [(str(result['_source']['id']), json.dumps(result['_source'], ensure_ascii=False), '2024-08-16') for result in results]
|
|
|
|
|
|
|
|
|
|
# for result in results:
|
|
|
|
|
# id = str(result['_source']['id'])
|
|
|
|
|
# data = json.dumps(result['_source'])
|
|
|
|
|
# update_time = '2024-08-13'
|
|
|
|
|
#
|
|
|
|
|
# # sql = """upsert into SCIENCE.SCIENCE_ARTICLE_METADATA(id, C1."article_msg",C1."update_time") values('%s','%s','%s')""" % (id, data, update_time)
|
|
|
|
|
# print(result)
|
|
|
|
|
# data = [
|
|
|
|
|
# ('111111', 'test1', '2024-08-13'),
|
|
|
|
|
# ('222222', 'test2', '2024-08-14'),
|
|
|
|
|
# ]
|
|
|
|
|
sql = """upsert into SCIENCE.SCIENCE_ARTICLE_METADATA(id, C1."article_msg",C1."update_time") values(?,?,?)"""
|
|
|
|
|
self.phoenix.upsert(sql, data)
|
|
|
|
|
self.phoenix.pool.close_all()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
c = findEsArticle(es_index='science-article-metadata-v3')
|
|
|
|
|
c.search_article()
|