0


二百七十六、ClickHouse——Hive和ClickHouse非常不同的DWS指标数据SQL语句

一、目的

在完成数据之后对业务指标进行分析,Hive和ClickHouseSQL真不一样

二、部分业务指标表

2.1 统计数据流量表1天周期

2.1.1 Hive中原有代码

2.1.1.1 Hive中建表语句
  1. --1、统计数据流量表——动态分区——1天周期
  2. create table if not exists hurys_db.dws_statistics_volume_1day(
  3. device_no string comment '设备编号',
  4. scene_name string comment '场景名称',
  5. lane_no int comment '车道编号',
  6. lane_direction string comment '车道流向',
  7. section_no int comment '断面编号',
  8. device_direction string comment '雷达朝向',
  9. sum_volume_day int comment '每天总流量',
  10. week_day string comment '周几',
  11. month string comment '月份'
  12. )
  13. comment '统计数据流量表——动态分区——1天周期'
  14. partitioned by (day string)
  15. stored as orc
  16. ;
2.1.1.2 Hive中SQL语句
  1. --动态加载数据
  2. insert overwrite table hurys_db.dws_statistics_volume_1day partition(day)
  3. select
  4. dwd_st.device_no,
  5. dwd_sc.scene_name,
  6. dwd_st.lane_no,
  7. dwd_rl.lane_direction,
  8. dwd_st.section_no,
  9. dwd_rc.device_direction,
  10. sum(volume_sum) sum_volume_day,
  11. case when pmod(datediff(create_time,'2023-11-27') + 1,7) = 1 then '周一'
  12. when pmod(datediff(create_time,'2023-11-27') + 1,7) = 2 then '周二'
  13. when pmod(datediff(create_time,'2023-11-27') + 1,7) = 3 then '周三'
  14. when pmod(datediff(create_time,'2023-11-27') + 1,7) = 4 then '周四'
  15. when pmod(datediff(create_time,'2023-11-27') + 1,7) = 5 then '周五'
  16. when pmod(datediff(create_time,'2023-11-27') + 1,7) = 6 then '周六'
  17. else '周日' end as week_day,
  18. substr(day,1,7) month,
  19. day
  20. from hurys_db.dwd_statistics as dwd_st
  21. right join hurys_db.dwd_radar_lane as dwd_rl
  22. on dwd_rl.device_no=dwd_st.device_no and dwd_rl.lane_no=dwd_st.lane_no
  23. right join hurys_db.dwd_device_scene as dwd_ds
  24. on dwd_ds.device_no=dwd_st.device_no
  25. right join hurys_db.dwd_scene as dwd_sc
  26. on dwd_sc.scene_id = dwd_ds.scene_id
  27. right join hurys_db.dwd_radar_config as dwd_rc
  28. on dwd_rc.device_no=dwd_st.device_no
  29. where dwd_st.create_time is not null and dwd_st.day='2024-09-05'
  30. group by dwd_st.device_no, dwd_sc.scene_name, dwd_st.lane_no, dwd_rl.lane_direction, dwd_st.section_no, dwd_rc.device_direction, case when pmod(datediff(create_time,'2023-11-27') + 1,7) = 1 then '周一'
  31. when pmod(datediff(create_time,'2023-11-27') + 1,7) = 2 then '周二'
  32. when pmod(datediff(create_time,'2023-11-27') + 1,7) = 3 then '周三'
  33. when pmod(datediff(create_time,'2023-11-27') + 1,7) = 4 then '周四'
  34. when pmod(datediff(create_time,'2023-11-27') + 1,7) = 5 then '周五'
  35. when pmod(datediff(create_time,'2023-11-27') + 1,7) = 6 then '周六'
  36. else '周日' end, day
  37. ;

2.1.2 ClickHouse中现有代码

2.1.2.1 ClickHouse中表结构
  1. --1、统计数据流量表——动态分区——1天周期
  2. create table if not exists hurys_jw.dws_statistics_volume_1day(
  3. device_no String comment '设备编号',
  4. scene_name String comment '场景名称',
  5. lane_no Nullable(Int32) comment '车道编号',
  6. lane_direction Nullable(String) comment '车道流向',
  7. section_no Nullable(Int32) comment '断面编号',
  8. device_direction Nullable(String) comment '雷达朝向',
  9. sum_volume_day Nullable(Int32) comment '每天总流量',
  10. week_day Nullable(String) comment '周几',
  11. month Nullable(String) comment '月份',
  12. day Date comment '日期'
  13. )
  14. ENGINE = MergeTree
  15. PARTITION BY day
  16. PRIMARY KEY day
  17. ORDER BY day
  18. SETTINGS index_granularity = 8192;
2.1.2.2 ClickHouse中SQL语句
  1. --动态加载数据
  2. select
  3. dwd_st.device_no,
  4. dwd_sc.scene_name,
  5. dwd_st.lane_no,
  6. dwd_rl.lane_direction,
  7. dwd_st.section_no,
  8. dwd_rc.device_direction,
  9. sum(volume_sum) sum_volume_day,
  10. case when toDayOfWeek(create_time) = 1 then '周一'
  11. when toDayOfWeek(create_time) = 2 then '周二'
  12. when toDayOfWeek(create_time) = 3 then '周三'
  13. when toDayOfWeek(create_time) = 4 then '周四'
  14. when toDayOfWeek(create_time) = 5 then '周五'
  15. when toDayOfWeek(create_time) = 6 then '周六'
  16. when toDayOfWeek(create_time) = 7 then '周日'
  17. end as week_day, concat(toString(toYear(dwd_st.day)), '-', lpad(toString(toMonth(dwd_st.day)), 2, '0')) AS month,
  18. cast(dwd_st.day as String) day
  19. from hurys_jw.dwd_statistics as dwd_st
  20. right join hurys_jw.dwd_radar_lane as dwd_rl
  21. on dwd_rl.device_no=dwd_st.device_no and dwd_rl.lane_no=dwd_st.lane_no
  22. right join hurys_jw.dwd_device_scene as dwd_ds
  23. on dwd_ds.device_no=dwd_st.device_no
  24. right join hurys_jw.dwd_scene as dwd_sc
  25. on dwd_sc.scene_id = dwd_ds.scene_id
  26. right join hurys_jw.dwd_radar_config as dwd_rc
  27. on dwd_rc.device_no=dwd_st.device_no
  28. where dwd_st.create_time is not null and dwd_st.lane_no is not null and dwd_st.day >= ?
  29. group by dwd_st.device_no, dwd_sc.scene_name, dwd_st.lane_no, dwd_rl.lane_direction, dwd_st.section_no, dwd_rc.device_direction, case when toDayOfWeek(create_time) = 1 then '周一'
  30. when toDayOfWeek(create_time) = 2 then '周二'
  31. when toDayOfWeek(create_time) = 3 then '周三'
  32. when toDayOfWeek(create_time) = 4 then '周四'
  33. when toDayOfWeek(create_time) = 5 then '周五'
  34. when toDayOfWeek(create_time) = 6 then '周六'
  35. when toDayOfWeek(create_time) = 7 then '周日'
  36. end, dwd_st.day
  37. ;

2.2 统计数据流量表5分钟周期

2.2.1 Hive中原有代码

2.2.1.1 Hive中建表语句
  1. --5、统计数据流量表——动态分区——5分钟周期
  2. create table if not exists hurys_db.dws_statistics_volume_5min(
  3. device_no string comment '设备编号',
  4. scene_name string comment '场景名称',
  5. lane_no int comment '车道编号',
  6. lane_direction string comment '车道流向',
  7. section_no int comment '断面编号',
  8. device_direction string comment '雷达朝向',
  9. sum_volume_5min int comment '每5分钟总流量',
  10. start_time timestamp comment '开始时间'
  11. )
  12. comment '统计数据流量表——动态分区——5分钟周期'
  13. partitioned by (day string)
  14. stored as orc
  15. ;
2.2.1.2 Hive中SQL语句
  1. --动态加载数据
  2. insert overwrite table hurys_db.dws_statistics_volume_5min partition(day)
  3. select
  4. dwd_st.device_no,
  5. dwd_sc.scene_name,
  6. dwd_st.lane_no,
  7. dwd_rl.lane_direction,
  8. dwd_st.section_no,
  9. dwd_rc.device_direction,
  10. sum(volume_sum) sum_volume_5min,
  11. case when minute(create_time ) < 5 then
  12. concat(substr(create_time, 1, 14), '00:00')
  13. when minute(create_time) >=5 and minute(create_time) <10 then
  14. concat(substr(create_time, 1, 14), '05:00')
  15. when minute(create_time) >=10 and minute(create_time) <15 then
  16. concat(substr(create_time, 1, 14), '10:00')
  17. when minute(create_time) >=15 and minute(create_time) <20 then
  18. concat(substr(create_time, 1, 14), '15:00')
  19. when minute(create_time) >=20 and minute(create_time) <25 then
  20. concat(substr(create_time, 1, 14), '20:00')
  21. when minute(create_time) >=25 and minute(create_time) <30 then
  22. concat(substr(create_time, 1, 14), '25:00')
  23. when minute(create_time) >=30 and minute(create_time) <35 then
  24. concat(substr(create_time, 1, 14), '30:00')
  25. when minute(create_time) >=35 and minute(create_time) <40 then
  26. concat(substr(create_time, 1, 14), '35:00')
  27. when minute(create_time) >=40 and minute(create_time) <45 then
  28. concat(substr(create_time, 1, 14), '40:00')
  29. when minute(create_time) >=45 and minute(create_time) <50 then
  30. concat(substr(create_time, 1, 14), '45:00')
  31. when minute(create_time) >=50 and minute(create_time) <55 then
  32. concat(substr(create_time, 1, 14), '50:00')
  33. else
  34. concat(substr(create_time, 1, 14), '55:00') end as start_time,
  35. day
  36. from hurys_db.dwd_statistics as dwd_st
  37. right join hurys_db.dwd_radar_lane as dwd_rl
  38. on dwd_rl.device_no=dwd_st.device_no and dwd_rl.lane_no=dwd_st.lane_no
  39. right join hurys_db.dwd_device_scene as dwd_ds
  40. on dwd_ds.device_no=dwd_st.device_no
  41. right join hurys_db.dwd_scene as dwd_sc
  42. on dwd_sc.scene_id = dwd_ds.scene_id
  43. right join hurys_db.dwd_radar_config as dwd_rc
  44. on dwd_rc.device_no=dwd_st.device_no
  45. where dwd_st.create_time is not null and dwd_st.day='2024-09-05'
  46. group by dwd_st.device_no, dwd_sc.scene_name, dwd_st.lane_no, dwd_rl.lane_direction, dwd_st.section_no, dwd_rc.device_direction, case when minute(create_time ) < 5 then
  47. concat(substr(create_time, 1, 14), '00:00')
  48. when minute(create_time) >=5 and minute(create_time) <10 then
  49. concat(substr(create_time, 1, 14), '05:00')
  50. when minute(create_time) >=10 and minute(create_time) <15 then
  51. concat(substr(create_time, 1, 14), '10:00')
  52. when minute(create_time) >=15 and minute(create_time) <20 then
  53. concat(substr(create_time, 1, 14), '15:00')
  54. when minute(create_time) >=20 and minute(create_time) <25 then
  55. concat(substr(create_time, 1, 14), '20:00')
  56. when minute(create_time) >=25 and minute(create_time) <30 then
  57. concat(substr(create_time, 1, 14), '25:00')
  58. when minute(create_time) >=30 and minute(create_time) <35 then
  59. concat(substr(create_time, 1, 14), '30:00')
  60. when minute(create_time) >=35 and minute(create_time) <40 then
  61. concat(substr(create_time, 1, 14), '35:00')
  62. when minute(create_time) >=40 and minute(create_time) <45 then
  63. concat(substr(create_time, 1, 14), '40:00')
  64. when minute(create_time) >=45 and minute(create_time) <50 then
  65. concat(substr(create_time, 1, 14), '45:00')
  66. when minute(create_time) >=50 and minute(create_time) <55 then
  67. concat(substr(create_time, 1, 14), '50:00')
  68. else
  69. concat(substr(create_time, 1, 14), '55:00') end, day
  70. ;

2.2.2 ClickHouse中现有代码

2.2.2.1 ClickHouse中表结构
  1. --5、统计数据流量表——动态分区——5分钟周期
  2. create table if not exists hurys_jw.dws_statistics_volume_5min(
  3. device_no String comment '设备编号',
  4. scene_name String comment '场景名称',
  5. lane_no Nullable(Int32) comment '车道编号',
  6. lane_direction Nullable(String) comment '车道流向',
  7. section_no Nullable(Int32) comment '断面编号',
  8. device_direction Nullable(String) comment '雷达朝向',
  9. sum_volume_5min Nullable(Int32) comment '每5分钟总流量',
  10. start_time DateTime comment '开始时间',
  11. day Date comment '日期'
  12. )
  13. ENGINE = MergeTree
  14. PARTITION BY day
  15. PRIMARY KEY day
  16. ORDER BY day
  17. SETTINGS index_granularity = 8192;
2.2.2.2 ClickHouse中SQL语句
  1. --动态加载数据
  2. select
  3. dwd_st.device_no,
  4. dwd_sc.scene_name,
  5. dwd_st.lane_no,
  6. dwd_rl.lane_direction,
  7. dwd_st.section_no,
  8. dwd_rc.device_direction,
  9. sum(volume_sum) sum_volume_5min,
  10. toDateTime(concat(toString(toDate(create_time)),' ',lpad(toString(extract(hour FROM create_time)), 2, '0'),':',
  11. CASE
  12. WHEN extract(minute FROM create_time) < 5 THEN '00'
  13. WHEN extract(minute FROM create_time) >= 5 AND extract(minute FROM create_time) < 10 THEN '05'
  14. WHEN extract(minute FROM create_time) >= 10 AND extract(minute FROM create_time) < 15 THEN '10'
  15. WHEN extract(minute FROM create_time) >= 15 AND extract(minute FROM create_time) < 20 THEN '15'
  16. WHEN extract(minute FROM create_time) >= 20 AND extract(minute FROM create_time) < 25 THEN '20'
  17. WHEN extract(minute FROM create_time) >= 25 AND extract(minute FROM create_time) < 30 THEN '25'
  18. WHEN extract(minute FROM create_time) >= 30 AND extract(minute FROM create_time) < 35 THEN '30'
  19. WHEN extract(minute FROM create_time) >= 35 AND extract(minute FROM create_time) < 40 THEN '35'
  20. WHEN extract(minute FROM create_time) >= 40 AND extract(minute FROM create_time) < 45 THEN '40'
  21. WHEN extract(minute FROM create_time) >= 45 AND extract(minute FROM create_time) < 50 THEN '45'
  22. WHEN extract(minute FROM create_time) >= 50 AND extract(minute FROM create_time) < 55 THEN '50'
  23. ELSE '55'
  24. END,
  25. ':00'
  26. )) as start_time,
  27. cast(dwd_st.day as String) day
  28. from hurys_jw.dwd_statistics as dwd_st
  29. right join hurys_jw.dwd_radar_lane as dwd_rl
  30. on dwd_rl.device_no=dwd_st.device_no and dwd_rl.lane_no=dwd_st.lane_no
  31. right join hurys_jw.dwd_device_scene as dwd_ds
  32. on dwd_ds.device_no=dwd_st.device_no
  33. right join hurys_jw.dwd_scene as dwd_sc
  34. on dwd_sc.scene_id = dwd_ds.scene_id
  35. right join hurys_jw.dwd_radar_config as dwd_rc
  36. on dwd_rc.device_no=dwd_st.device_no
  37. where dwd_st.create_time is not null and dwd_st.lane_no is not null and dwd_st.day >= ?
  38. group by dwd_st.device_no, dwd_sc.scene_name, dwd_st.lane_no, dwd_rl.lane_direction, dwd_st.section_no, dwd_rc.device_direction, toDateTime(concat(toString(toDate(create_time)),' ',lpad(toString(extract(hour FROM create_time)), 2, '0'),':',
  39. CASE
  40. WHEN extract(minute FROM create_time) < 5 THEN '00'
  41. WHEN extract(minute FROM create_time) >= 5 AND extract(minute FROM create_time) < 10 THEN '05'
  42. WHEN extract(minute FROM create_time) >= 10 AND extract(minute FROM create_time) < 15 THEN '10'
  43. WHEN extract(minute FROM create_time) >= 15 AND extract(minute FROM create_time) < 20 THEN '15'
  44. WHEN extract(minute FROM create_time) >= 20 AND extract(minute FROM create_time) < 25 THEN '20'
  45. WHEN extract(minute FROM create_time) >= 25 AND extract(minute FROM create_time) < 30 THEN '25'
  46. WHEN extract(minute FROM create_time) >= 30 AND extract(minute FROM create_time) < 35 THEN '30'
  47. WHEN extract(minute FROM create_time) >= 35 AND extract(minute FROM create_time) < 40 THEN '35'
  48. WHEN extract(minute FROM create_time) >= 40 AND extract(minute FROM create_time) < 45 THEN '40'
  49. WHEN extract(minute FROM create_time) >= 45 AND extract(minute FROM create_time) < 50 THEN '45'
  50. WHEN extract(minute FROM create_time) >= 50 AND extract(minute FROM create_time) < 55 THEN '50'
  51. ELSE '55'
  52. END,
  53. ':00'
  54. )), cast(dwd_st.day as String)
  55. ;

就先这样,反正ClickHouse和Hive的SQL语句非常非常不一样!!!

标签: clickhouse hive

本文转载自: https://blog.csdn.net/tiantang2renjian/article/details/143407102
版权归原作者 天地风雷水火山泽 所有, 如有侵权,请联系我们删除。

“二百七十六、ClickHouse——Hive和ClickHouse非常不同的DWS指标数据SQL语句”的评论:

还没有评论