From 648f690252eaab2c3e4276611387803df7b478f8 Mon Sep 17 00:00:00 2001 From: wangyitong Date: Thu, 22 Aug 2024 14:40:43 +0800 Subject: [PATCH] feat(etl): dataX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit org测试流程,dataX项目导出 --- .gitignore | 33 ++++++ ...l_org_subject_article_data_hdfs2mysql.json | 101 ++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 .gitignore create mode 100644 etl_org/datax/etl_org_subject_article_data_hdfs2mysql.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..549e00a --- /dev/null +++ b/.gitignore @@ -0,0 +1,33 @@ +HELP.md +target/ +!.mvn/wrapper/maven-wrapper.jar +!**/src/main/**/target/ +!**/src/test/**/target/ + +### STS ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache + +### IntelliJ IDEA ### +.idea +*.iws +*.iml +*.ipr + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ +!**/src/main/**/build/ +!**/src/test/**/build/ + +### VS Code ### +.vscode/ diff --git a/etl_org/datax/etl_org_subject_article_data_hdfs2mysql.json b/etl_org/datax/etl_org_subject_article_data_hdfs2mysql.json new file mode 100644 index 0000000..3dba0ef --- /dev/null +++ b/etl_org/datax/etl_org_subject_article_data_hdfs2mysql.json @@ -0,0 +1,101 @@ +{ + "job": { + "setting": { + "speed": { + "channel": 1 + } + }, + "content": [ + { + "reader": { + "name": "hdfsreader", + "csvReaderConfig": { + "safetySwitch": false, + "skipEmptyRecords": false, + "useTextQualifier": false + }, + "parameter": { + "path": "/user/hive/warehouse/science_etl_dws.db/etl_org_subject_article_data/*", + "defaultFS": "hdfs://hadoop01:8020", + "column": [ + { + "index": 0, + "type": "String" + }, + { + "index": 1, + "type": "String" + }, + { + "index": 2, + "type": "String" + }, + { + "index": 3, + "type": "String" + }, + { + "index": 4, + "type": "String" + }, + { + "index": 5, + "type": "String" + }, + { + "index": 6, + "type": "String" + }, + { + "index": 7, + "type": "String" + }, + { + "index": 8, + "type": "String" + }, + { + "index": 9, + "type": "String" + } + ], + "fileType": "orc", + "encoding": "UTF-8", + "fieldDelimiter": "\t" + } + }, + "writer": { + "name": "mysqlwriter", + "parameter": { + "writeMode": "insert", + "username": "root", + "password": "123456", + "column": [ + "source_type", + "school_id", + "subject_id", + "org_id", + "article_count", + "first_count", + "rep_count", + "cited_count", + "first_cited_count", + "rep_cited_count" + ], + "preSql": [ + "delete from etl_org_subject_article_data" + ], + "connection": [ + { + "jdbcUrl": "jdbc:mysql://192.168.1.31:3305/datax?characterEncoding=UTF-8&connectionCollation=utf8mb4_general_ci&useUnicode=true&useSSL=false&allowMultiQueries=true&serverTimezone=GMT%2b8", + "table": [ + "etl_org_subject_article_data" + ] + } + ] + } + } + } + ] + } +} \ No newline at end of file