从神策平台同步数据到hdfs示例
从神策平台同步数据到hdfs示例:
可参考以下官方文档:
https://manual.sensorsdata.cn/sa/docs/tech_export_jdbc/v0204
1. 测试环境地址
jdbc:hive2://11.22.33.44:21050/rawdata;auth=noSasl
--测试连通性
ping 11.22.33.44
telnet 11.22.33.44 21050
2. jdbc方式连接
beeline -u "jdbc:hive2://11.22.33.44:21050/rawdata;auth=noSasl"
beeline -u "jdbc:hive2://11.22.33.44:21050/rawdata;auth=noSasl" -e "select 1"
语法:
--查询数据库
show databases;
use rawdata;
--查询数据表
show tables;
--查看表结构
desc events /*SA(ylfx)*/;
--查询表数据量
SELECT
count(1)
FROM rawdata.events
/*SA(ylfx)*/;
1798483
--查询样例数据
SELECT
*
FROM rawdata.events limit 3
/*SA(ylfx)*/;
--数据导出到神策所在的hdfs集群(文件默认分隔符是'\001') (导出数据为0)
create table test0117 STORED AS textfile LOCATION '/tmp' as
/*SA_BEGIN*/
select event,user_id,day,event_id,month_id,week_id,distinct_id,date,time
FROM rawdata.events
where date >= '2025-01-15 00:00:00'
/*SA_END*/;
3. impala-shell 方式连接
impala-shell -i 11.22.33.44
impala-shell -i 11.22.33.44 -q "select 1"
--查询表数据量
SELECT
count(1) as cnt
FROM rawdata.events
where date >= '2025-01-15 00:00:00'
/*SA(ylfx)*/;
+-------+
| cnt |
+-------+
| 23976 |
+-------+
Fetched 1 row(s) in 0.42s
--数据导出到神策所在的hdfs集群(文件默认分隔符是'\001')
create table test_0116 STORED AS textfile LOCATION '/tmp' as
/*SA_BEGIN*/
select event,user_id,day,event_id,month_id,week_id,distinct_id,date,time
FROM rawdata.events
where date >= '2025-01-15 00:00:00'
/*SA_END*/;
+-----------------------+
| summary |
+-----------------------+
| Inserted 25346 row(s) |
+-----------------------+
Fetched 1 row(s) in 0.62s
--使用hadoop distcp把数据同步到指定hdfs集群
hadoop distcp source_path dist_path
原文地址:https://blog.csdn.net/weixin_45547818/article/details/145268916
免责声明:本站文章内容转载自网络资源,如侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!