二百六十五、Hive——目前Hive数仓各层表样例
一、目的
梳理一下目前Hive数仓各层表
注意:只建立一个数据库hurys_db
二、ODS层
2.1 建表语句
create external table if not exists hurys_db.ods_queue( device_no string comment '设备编号', source_device_type string comment '设备类型', sn string comment '设备序列号 ', model string comment '设备型号', create_time string comment '创建时间', lane_no int comment '车道编号', lane_type int comment '车道类型 0:渠化1:来向2:出口3:去向4:左弯待转区5:直行待行区6:右转专用道99:未定义车道', queue_count int comment '排队车辆数', queue_len float comment '排队长度(m)', queue_head float comment '排队头车距停止线距离(m)', queue_tail float comment '排队尾车距停止线距离(m)' ) comment '静态排队数据外部表——静态分区' partitioned by (day string) row format delimited fields terminated by ',' stored as SequenceFile ;
2.2 SQL语句
--刷新表分区 msck repair table ods_queue; --查看表分区 show partitions ods_queue; --查看表数据 select * from ods_queue;
三、DWD层
3.1 脱敏清洗表
3.1.1 建表语句
create table if not exists hurys_db.dwd_queue( id string comment '唯一ID', device_no string comment '设备编号', source_device_type string comment '设备类型', sn string comment '设备序列号 ', model string comment '设备型号', create_time string comment '创建时间', lane_no int comment '车道编号', lane_type int comment '车道类型 0:渠化1:来向2:出口3:去向4:左弯待转区5:直行待行区6:右转专用道99:未定义车道', queue_count int comment '排队车辆数', queue_len decimal(10,2) comment '排队长度(m)', queue_head decimal(10,2) comment '排队头车距停止线距离(m)', queue_tail decimal(10,2) comment '排队尾车距停止线距离(m)' ) comment '静态排队数据表——动态分区' partitioned by (day string) stored as orc ;
3.1.2 SQL语句
with t1 as ( select device_no, source_device_type, sn, model, create_time, lane_no, lane_type, case when queue_count is null then 0 else queue_count end as queue_count, case when queue_len is null then 0 else cast(queue_len as decimal(10,2)) end as queue_len, case when queue_head is null then 0 else cast(queue_head as decimal(10,2)) end as queue_head, case when queue_tail is null then 0 else cast(queue_tail as decimal(10,2)) end as queue_tail, substr(create_time,1,10) day from hurys_db.ods_queue where day = '2024-09-10' ) insert overwrite table hurys_db.dwd_queue partition(day) select UUID() as id, device_no, source_device_type, sn, model, create_time, lane_no, lane_type, queue_count, queue_len, queue_head, queue_tail, day from t1 where day = '2024-09-10' and device_no is not null and create_time is not null and queue_len between 0 and 500 and queue_head between 0 and 500 and lane_no between 0 and 255 and queue_tail between 0 and 500 and queue_count between 0 and 100 group by device_no, source_device_type, sn, model, create_time, lane_no, lane_type, queue_count, queue_len, queue_head, queue_tail, day ; --查看分区 show partitions hurys_db.dwd_queue; --查看数据 select * from hurys_db.dwd_queue where day='2024-09-10'; --删掉表分区 alter table hurys_db.dwd_queue drop partition (day='2024-09-04');
四、维度表
4.1 节假日表
4.1.1 原始表建表语句
create external table if not exists hurys_db.tb_holiday( id int comment '主键id', day date comment '日期', holiday string comment '节假日', year string comment '年份' ) comment '节假日表' row format delimited fields terminated by ',' stored as textfile location '/data/tb_holiday' tblproperties("skip.header.line.count"="1") ;
4.1.2 清洗表建表语句
create table if not exists hurys_db.dwd_holiday( day date comment '日期', holiday string comment '节假日', year string comment '年份' ) comment '节假日表' stored as orc ;
五、DWS层
5.1 统计数据流量表——动态分区——1天周期
5.1.1 建表语句
create table if not exists hurys_db.dws_statistics_volume_1day( device_no string comment '设备编号', scene_name string comment '场景名称', lane_no int comment '车道编号', lane_direction string comment '车道流向', section_no int comment '断面编号', device_direction string comment '雷达朝向', sum_volume_day int comment '每天总流量', week_day string comment '周几', month string comment '月份' ) comment '统计数据流量表——动态分区——1天周期' partitioned by (day string) stored as orc ;
5.1.2 SQL语句
insert overwrite table hurys_db.dws_statistics_volume_1day partition(day) select dwd_st.device_no, dwd_sc.scene_name, dwd_st.lane_no, dwd_rl.lane_direction, dwd_st.section_no, dwd_rc.device_direction, sum(volume_sum) sum_volume_day, case when pmod(datediff(create_time,'2023-11-27') + 1,7) = 1 then '周一' when pmod(datediff(create_time,'2023-11-27') + 1,7) = 2 then '周二' when pmod(datediff(create_time,'2023-11-27') + 1,7) = 3 then '周三' when pmod(datediff(create_time,'2023-11-27') + 1,7) = 4 then '周四' when pmod(datediff(create_time,'2023-11-27') + 1,7) = 5 then '周五' when pmod(datediff(create_time,'2023-11-27') + 1,7) = 6 then '周六' else '周日' end as week_day, substr(day,1,7) month, day from hurys_db.dwd_statistics as dwd_st right join hurys_db.dwd_radar_lane as dwd_rl on dwd_rl.device_no=dwd_st.device_no and dwd_rl.lane_no=dwd_st.lane_no right join hurys_db.dwd_device_scene as dwd_ds on dwd_ds.device_no=dwd_st.device_no right join hurys_db.dwd_scene as dwd_sc on dwd_sc.scene_id = dwd_ds.scene_id right join hurys_db.dwd_radar_config as dwd_rc on dwd_rc.device_no=dwd_st.device_no where dwd_st.create_time is not null and dwd_st.day='2024-09-05' group by dwd_st.device_no, dwd_sc.scene_name, dwd_st.lane_no, dwd_rl.lane_direction, dwd_st.section_no, dwd_rc.device_direction, case when pmod(datediff(create_time,'2023-11-27') + 1,7) = 1 then '周一' when pmod(datediff(create_time,'2023-11-27') + 1,7) = 2 then '周二' when pmod(datediff(create_time,'2023-11-27') + 1,7) = 3 then '周三' when pmod(datediff(create_time,'2023-11-27') + 1,7) = 4 then '周四' when pmod(datediff(create_time,'2023-11-27') + 1,7) = 5 then '周五' when pmod(datediff(create_time,'2023-11-27') + 1,7) = 6 then '周六' else '周日' end, day ;
这就是目前数仓各层的表样例,这边只是逻辑分层,并没有给每一层建立一个单独的数据库,那样在多层的表联合查询时速度会相比慢很多!!!
原文地址:https://blog.csdn.net/tiantang2renjian/article/details/142358568
免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!