拉链表的Hive环境下的详细操作

目录

1.原始表操作

2.新增三条数据操作

3.新增四条数据


 

1.原始表操作

[root@linux01 ~]# cd /opt/tmp/lalian

[root@linux01 lalian]# vi ./orders.txt 

vi ./orders.txt 原始表
1	2021-12-20	2021-12-20	创建
2	2021-12-20	2021-12-20	创建
3	2021-12-20	2021-12-20	创建

hive> drop database lalian cascade;

hive> create database lalian;

hive> use lalian;

hive> drop table orders;

hive> create table orders(
      orderid int,
      createdate string,
      modifiedtime string,
      status string
      ) row format delimited fields terminated by 't';
OK
Time taken: 0.129 seconds

hive> show tables;
OK
orders

[root@linux01 lalian]# hdfs dfs -put ./orders.txt /user/hive/warehouse/lalian.db/orders

hive> select * from orders;
OK
1	2021-12-20	2021-12-20	创建
2	2021-12-20	2021-12-20	创建
3	2021-12-20	2021-12-20	创建
Time taken: 0.157 seconds, Fetched: 3 row(s)

------------------------------------------------------------

hive> create table ods_orders_inc(
    > orderid int,
    > createtime string,
    > modifiedtime string,
    > status string
    > ) partitioned by (day string)
    > row format delimited fields terminated by 't';

hive> insert overwrite table ods_orders_inc partition(day='2021-12-20') 
    > select orderid,createdate,modifiedtime,status from orders;

hive> show partitions ods_orders_inc;
OK
day=2021-12-20

hive> select * from ods_orders_inc;
OK
1	2021-12-20	2021-12-20	创建	2021-12-20
2	2021-12-20	2021-12-20	创建	2021-12-20
3	2021-12-20	2021-12-20	创建	2021-12-20
Time taken: 0.116 seconds, Fetched: 3 row(s)

------------------------------------------------------------

hive> create table dws_orders_his(
    > orderid int,
    > createtime string,
    > modifiedtime string,
    > status string,
    > start_time string,
    > end_time string
    > ) row format delimited fields terminated by 't';

hive> insert overwrite table dws_orders_his
    > select orderid,createtime,modifiedtime,status,modifiedtime,'9999-12-31' from ods_orders_inc
    > where day='2021-12-20';
	
hive> select * from dws_orders_his;
OK
1	2021-12-20	2021-12-20	创建	2021-12-20	9999-12-31
2	2021-12-20	2021-12-20	创建	2021-12-20	9999-12-31
3	2021-12-20	2021-12-20	创建	2021-12-20	9999-12-31
Time taken: 0.092 seconds, Fetched: 3 row(s)

2.新增三条数据操作

1	2021-12-20	2021-12-20	创建
2	2021-12-20	2021-12-20	创建
3	2021-12-20	2021-12-20	创建
1	2021-12-20	2021-12-21	支付
2	2021-12-20	2021-12-21	支付
4	2021-12-21	2021-12-21	创建

[root@linux01 lalian]# hdfs dfs -put -f ./orders.txt /user/hive/warehouse/lalian.db/orders
	
未使用动态分区 此为静态分区
---------将21日orders表里的状态修改及新增的数据导入到 ods_orders_inc 分区为2021-12-21---------
hive> insert overwrite table ods_orders_inc partition(day='2021-12-21')
    > select orderid,createdate,modifiedtime,status from orders 
    > where (createdate='2021-12-21' and modifiedtime='2021-12-21') or modifiedtime='2021-12-21';
可只写modifiedtime='2021-12-21' 此处这样写是为增强可读性

hive> select * from ods_orders_inc;
OK
1	2021-12-20	2021-12-20	创建	2021-12-20
2	2021-12-20	2021-12-20	创建	2021-12-20
3	2021-12-20	2021-12-20	创建	2021-12-20
1	2021-12-20	2021-12-21	支付	2021-12-21
2	2021-12-20	2021-12-21	支付	2021-12-21
4	2021-12-21	2021-12-21	创建	2021-12-21
Time taken: 0.082 seconds, Fetched: 6 row(s)

--  合并查询 UNION ALL --
--  通过his  left join inc  找出订单状态有更新的数据  --
--  将inc 2021-12-21的数据添加end_time为9999-12-31后插入his表 --
create table dws_his_20212121_tmp as
select t.orderid,t.createtime,t.modifiedtime,
t.status,t.start_time,t.end_time 
from 
(
select 
t1.orderid, 
t1.createtime, 
t1.modifiedtime, 
t1.status,
t1.start_time, 
case when t2.orderid is not null and t1.end_time > '2021-12-21' then  '2021-12-21' else t1.end_time end end_time
from dws_orders_his t1 
left join 
(select orderid from ods_orders_inc  where day='2021-12-21') t2 
on t1.orderid=t2.orderid
union all
select
orderid, 
createtime, 
modifiedtime, 
status,
modifiedtime as start_time,
'9999-12-31' as end_time
from ods_orders_inc  where day='2021-12-21'
) t 
order by t.orderid,t.start_time;

hive>  select * from dws_orders_his;
OK
1	2021-12-20	2021-12-20	创建	2021-12-20	9999-12-31
2	2021-12-20	2021-12-20	创建	2021-12-20	9999-12-31
3	2021-12-20	2021-12-20	创建	2021-12-20	9999-12-31

hive> insert overwrite table dws_orders_his select * from dws_his_20212121_tmp;

hive> select * from dws_orders_his;
1	2021-12-20	2021-12-20	创建	2021-12-20	2021-12-21
1	2021-12-20	2021-12-21	支付	2021-12-21	9999-12-31
2	2021-12-20	2021-12-20	创建	2021-12-20	2021-12-21
2	2021-12-20	2021-12-21	支付	2021-12-21	9999-12-31
3	2021-12-20	2021-12-20	创建	2021-12-20	9999-12-31
4	2021-12-21	2021-12-21	创建	2021-12-21	9999-12-31

3.新增四条数据

vi ./orders.txt 新增四条数据
1	2021-12-20	2021-12-20	创建
2	2021-12-20	2021-12-20	创建
3	2021-12-20	2021-12-20	创建
1	2021-12-20	2021-12-21	支付
2	2021-12-20	2021-12-21	支付
4	2021-12-21	2021-12-21	创建
1	2021-12-20	2021-12-22	完成
3	2021-12-20	2021-12-22	支付
4	2021-12-21	2021-12-22	支付
5	2021-12-22	2021-12-22	创建        

[root@linux01 lalian]# hdfs dfs -put -f ./orders.txt /user/hive/warehouse/lalian.db/orders              

hive> insert overwrite table ods_orders_inc partition(day='2021-12-22')
    > select orderid,createdate,modifiedtime,status from orders 
    > where (createdate='2021-12-22' and modifiedtime='2021-12-22') or modifiedtime='2021-12-22';
	
hive> select * from ods_orders_inc;
OK
1	2021-12-20	2021-12-20	创建	2021-12-20
2	2021-12-20	2021-12-20	创建	2021-12-20
3	2021-12-20	2021-12-20	创建	2021-12-20
1	2021-12-20	2021-12-21	支付	2021-12-21
2	2021-12-20	2021-12-21	支付	2021-12-21
4	2021-12-21	2021-12-21	创建	2021-12-21
Time taken: 0.075 seconds, Fetched: 6 row(s)

create table dws_his_20212122_tmp as
select t.orderid,t.createtime,t.modifiedtime,
t.status,t.start_time,t.end_time 
from 
(
select 
t1.orderid, 
t1.createtime, 
t1.modifiedtime, 
t1.status,
t1.start_time, 
case when t2.orderid is not null and t1.end_time > '2021-12-22' then  '2021-12-22' else t1.end_time end end_time
from dws_orders_his t1 
left join 
(select orderid from ods_orders_inc  where day='2021-12-22') t2 
on t1.orderid=t2.orderid
union all
select
orderid, 
createtime, 
modifiedtime, 
status,
modifiedtime as start_time,
'9999-12-31' as end_time
from ods_orders_inc  where day='2021-12-22'
) t 
order by t.orderid,t.start_time;

hive> insert overwrite table dws_orders_his select * from dws_his_20212122_tmp;

hive> select * from dws_orders_his;

1	2021-12-20	2021-12-20	创建	2021-12-20	2021-12-21
1	2021-12-20	2021-12-21	支付	2021-12-21	2021-12-22
1	2021-12-20	2021-12-22	完成	2021-12-22	9999-12-31
2	2021-12-20	2021-12-20	创建	2021-12-20	2021-12-21
2	2021-12-20	2021-12-21	支付	2021-12-21	9999-12-31
3	2021-12-20	2021-12-20	创建	2021-12-20	2021-12-22
3	2021-12-20	2021-12-22	支付	2021-12-22	9999-12-31
4	2021-12-21	2021-12-21	创建	2021-12-21	2021-12-22
4	2021-12-21	2021-12-22	支付	2021-12-22	9999-12-31
5	2021-12-22	2021-12-22	创建	2021-12-22	9999-12-31

 

 

 

本图文内容来源于网友网络收集整理提供,作为学习参考使用,版权属于原作者。
THE END
分享
二维码
< <上一篇
下一篇>>