拉链表的Hive环境下的详细操作
目录
1.原始表操作
[root@linux01 ~]# cd /opt/tmp/lalian
[root@linux01 lalian]# vi ./orders.txt
vi ./orders.txt 原始表
1 2021-12-20 2021-12-20 创建
2 2021-12-20 2021-12-20 创建
3 2021-12-20 2021-12-20 创建
hive> drop database lalian cascade;
hive> create database lalian;
hive> use lalian;
hive> drop table orders;
hive> create table orders(
orderid int,
createdate string,
modifiedtime string,
status string
) row format delimited fields terminated by 't';
OK
Time taken: 0.129 seconds
hive> show tables;
OK
orders
[root@linux01 lalian]# hdfs dfs -put ./orders.txt /user/hive/warehouse/lalian.db/orders
hive> select * from orders;
OK
1 2021-12-20 2021-12-20 创建
2 2021-12-20 2021-12-20 创建
3 2021-12-20 2021-12-20 创建
Time taken: 0.157 seconds, Fetched: 3 row(s)
------------------------------------------------------------
hive> create table ods_orders_inc(
> orderid int,
> createtime string,
> modifiedtime string,
> status string
> ) partitioned by (day string)
> row format delimited fields terminated by 't';
hive> insert overwrite table ods_orders_inc partition(day='2021-12-20')
> select orderid,createdate,modifiedtime,status from orders;
hive> show partitions ods_orders_inc;
OK
day=2021-12-20
hive> select * from ods_orders_inc;
OK
1 2021-12-20 2021-12-20 创建 2021-12-20
2 2021-12-20 2021-12-20 创建 2021-12-20
3 2021-12-20 2021-12-20 创建 2021-12-20
Time taken: 0.116 seconds, Fetched: 3 row(s)
------------------------------------------------------------
hive> create table dws_orders_his(
> orderid int,
> createtime string,
> modifiedtime string,
> status string,
> start_time string,
> end_time string
> ) row format delimited fields terminated by 't';
hive> insert overwrite table dws_orders_his
> select orderid,createtime,modifiedtime,status,modifiedtime,'9999-12-31' from ods_orders_inc
> where day='2021-12-20';
hive> select * from dws_orders_his;
OK
1 2021-12-20 2021-12-20 创建 2021-12-20 9999-12-31
2 2021-12-20 2021-12-20 创建 2021-12-20 9999-12-31
3 2021-12-20 2021-12-20 创建 2021-12-20 9999-12-31
Time taken: 0.092 seconds, Fetched: 3 row(s)
2.新增三条数据操作
1 2021-12-20 2021-12-20 创建
2 2021-12-20 2021-12-20 创建
3 2021-12-20 2021-12-20 创建
1 2021-12-20 2021-12-21 支付
2 2021-12-20 2021-12-21 支付
4 2021-12-21 2021-12-21 创建
[root@linux01 lalian]# hdfs dfs -put -f ./orders.txt /user/hive/warehouse/lalian.db/orders
未使用动态分区 此为静态分区
---------将21日orders表里的状态修改及新增的数据导入到 ods_orders_inc 分区为2021-12-21---------
hive> insert overwrite table ods_orders_inc partition(day='2021-12-21')
> select orderid,createdate,modifiedtime,status from orders
> where (createdate='2021-12-21' and modifiedtime='2021-12-21') or modifiedtime='2021-12-21';
可只写modifiedtime='2021-12-21' 此处这样写是为增强可读性
hive> select * from ods_orders_inc;
OK
1 2021-12-20 2021-12-20 创建 2021-12-20
2 2021-12-20 2021-12-20 创建 2021-12-20
3 2021-12-20 2021-12-20 创建 2021-12-20
1 2021-12-20 2021-12-21 支付 2021-12-21
2 2021-12-20 2021-12-21 支付 2021-12-21
4 2021-12-21 2021-12-21 创建 2021-12-21
Time taken: 0.082 seconds, Fetched: 6 row(s)
-- 合并查询 UNION ALL --
-- 通过his left join inc 找出订单状态有更新的数据 --
-- 将inc 2021-12-21的数据添加end_time为9999-12-31后插入his表 --
create table dws_his_20212121_tmp as
select t.orderid,t.createtime,t.modifiedtime,
t.status,t.start_time,t.end_time
from
(
select
t1.orderid,
t1.createtime,
t1.modifiedtime,
t1.status,
t1.start_time,
case when t2.orderid is not null and t1.end_time > '2021-12-21' then '2021-12-21' else t1.end_time end end_time
from dws_orders_his t1
left join
(select orderid from ods_orders_inc where day='2021-12-21') t2
on t1.orderid=t2.orderid
union all
select
orderid,
createtime,
modifiedtime,
status,
modifiedtime as start_time,
'9999-12-31' as end_time
from ods_orders_inc where day='2021-12-21'
) t
order by t.orderid,t.start_time;
hive> select * from dws_orders_his;
OK
1 2021-12-20 2021-12-20 创建 2021-12-20 9999-12-31
2 2021-12-20 2021-12-20 创建 2021-12-20 9999-12-31
3 2021-12-20 2021-12-20 创建 2021-12-20 9999-12-31
hive> insert overwrite table dws_orders_his select * from dws_his_20212121_tmp;
hive> select * from dws_orders_his;
1 2021-12-20 2021-12-20 创建 2021-12-20 2021-12-21
1 2021-12-20 2021-12-21 支付 2021-12-21 9999-12-31
2 2021-12-20 2021-12-20 创建 2021-12-20 2021-12-21
2 2021-12-20 2021-12-21 支付 2021-12-21 9999-12-31
3 2021-12-20 2021-12-20 创建 2021-12-20 9999-12-31
4 2021-12-21 2021-12-21 创建 2021-12-21 9999-12-31
3.新增四条数据
vi ./orders.txt 新增四条数据
1 2021-12-20 2021-12-20 创建
2 2021-12-20 2021-12-20 创建
3 2021-12-20 2021-12-20 创建
1 2021-12-20 2021-12-21 支付
2 2021-12-20 2021-12-21 支付
4 2021-12-21 2021-12-21 创建
1 2021-12-20 2021-12-22 完成
3 2021-12-20 2021-12-22 支付
4 2021-12-21 2021-12-22 支付
5 2021-12-22 2021-12-22 创建
[root@linux01 lalian]# hdfs dfs -put -f ./orders.txt /user/hive/warehouse/lalian.db/orders
hive> insert overwrite table ods_orders_inc partition(day='2021-12-22')
> select orderid,createdate,modifiedtime,status from orders
> where (createdate='2021-12-22' and modifiedtime='2021-12-22') or modifiedtime='2021-12-22';
hive> select * from ods_orders_inc;
OK
1 2021-12-20 2021-12-20 创建 2021-12-20
2 2021-12-20 2021-12-20 创建 2021-12-20
3 2021-12-20 2021-12-20 创建 2021-12-20
1 2021-12-20 2021-12-21 支付 2021-12-21
2 2021-12-20 2021-12-21 支付 2021-12-21
4 2021-12-21 2021-12-21 创建 2021-12-21
Time taken: 0.075 seconds, Fetched: 6 row(s)
create table dws_his_20212122_tmp as
select t.orderid,t.createtime,t.modifiedtime,
t.status,t.start_time,t.end_time
from
(
select
t1.orderid,
t1.createtime,
t1.modifiedtime,
t1.status,
t1.start_time,
case when t2.orderid is not null and t1.end_time > '2021-12-22' then '2021-12-22' else t1.end_time end end_time
from dws_orders_his t1
left join
(select orderid from ods_orders_inc where day='2021-12-22') t2
on t1.orderid=t2.orderid
union all
select
orderid,
createtime,
modifiedtime,
status,
modifiedtime as start_time,
'9999-12-31' as end_time
from ods_orders_inc where day='2021-12-22'
) t
order by t.orderid,t.start_time;
hive> insert overwrite table dws_orders_his select * from dws_his_20212122_tmp;
hive> select * from dws_orders_his;
1 2021-12-20 2021-12-20 创建 2021-12-20 2021-12-21
1 2021-12-20 2021-12-21 支付 2021-12-21 2021-12-22
1 2021-12-20 2021-12-22 完成 2021-12-22 9999-12-31
2 2021-12-20 2021-12-20 创建 2021-12-20 2021-12-21
2 2021-12-20 2021-12-21 支付 2021-12-21 9999-12-31
3 2021-12-20 2021-12-20 创建 2021-12-20 2021-12-22
3 2021-12-20 2021-12-22 支付 2021-12-22 9999-12-31
4 2021-12-21 2021-12-21 创建 2021-12-21 2021-12-22
4 2021-12-21 2021-12-22 支付 2021-12-22 9999-12-31
5 2021-12-22 2021-12-22 创建 2021-12-22 9999-12-31
本图文内容来源于网友网络收集整理提供,作为学习参考使用,版权属于原作者。
THE END
二维码