HiveSQL中复杂数据类型操作
?
HiveSQL運行優化參數配置
HiveSQL常用數據處理語句
HiveSQL中復雜數據類型操作
?
Hive有三種復雜數據類型ARRAY、MAP和STRUCT,復雜數據類型允許任意層次的嵌套。
目錄
array類型
map類型
struct類型
array類型
name與locations之間制表符分隔,locations中元素之間逗號分隔,數據樣本集為:
zhangsan ? ? ?beijing,shanghai,tianjin,hangzhou
lisi? ? ? ? ? ? ? ? ?changchu,chengdu,wuhan,beijing
建表語句
create table hive_array(name string, work_locations array<string>)
row format delimited fields terminated by '\t'
COLLECTION ITEMS TERMINATED BY ',';
導入數據
load data local inpath '/home/zhangfei/data/work_locations.txt' overwrite into table hive_array;
常用查詢
select * from hive_array;
zhangsan ? ? ?["beijing","shanghai","tianjin","hangzhou"]
lisi? ? ? ? ? ? ? ? ?["changchu","chengdu","wuhan","beijing"]
array_contains常與where子句連用
select name, work_locations[0] location from hive_array;
--取array的第一個元素??work_locations[0],同樣是采用下標的方式,下標從0開始
select name, size(work_locations) location from hive_array;
--取array的長度size(work_locations)
select * from hive_array where array_contains(work_locations,'tianjin');
--取判斷為ture的數據 array_contains(work_locations,'tianjin')
select explode(work_locations) from hive_array ;
--explode()會將數組元素展開展示
注意:explode()函數只是生成了一個數據的展示方式,無法在表中產生一個新的數據列,即select name,explode(work_locations) from hive_array 會報錯的
map類型
數據格式
字段與字段分隔符: “,”;需要map字段之間的分隔符:"#";map內部k-v分隔符:":"
zhangsan,? father:xiaoming? ? ?#mother:xiaohuang? #brother:xiaoxu,28
lisi,? ? ? ? ? ? ?father:mayun? ? ? ? ?#mother:huangyi? ? ?#brother:guanyu,22
wangwu,? ? father:wangjianlin? #mother:ruhua? ? ? ? #sister:jingtian,29
mayun,? ? ? father:mayongzhen#mother:angelababy,26
建表語句
create table hive_map(
id int, name string, members map<string,string>, age int
)
row format delimited
fields terminated by ','
COLLECTION ITEMS TERMINATED BY '#'??---條目分隔符
MAP KEYS TERMINATED BY ':'
;??
---k-v分隔符
導入數據
load data local inpath '/home/zhangfei/data/hive_map.txt' overwrite into table hive_map;
常用查詢
select * from hive_map;
zhangsan ? ? ? ?{"father":"xiaoming","mother":"xiaohuang","brother":"xiaoxu"} ? 28
lisi? ? ? ? ? ? ? ? ? ?{"father":"mayun","mother":"huangyi","brother":"guanyu"} ? ? ? ?22
wangwu? ? ? ? ? {"father":"wangjianlin","mother":"ruhua","sister":"jingtian"} ? 29
mayun? ? ? ? ? ??{"father":"mayongzhen","mother":"angelababy"} ? 26
查詢語句
select id, name, members['father'] father, members['mother'] mother, age from hive_map;
select id, name, map_keys(members) as relation from hive_map;
select id, name, map_values(members) as relation from hive_map;
select id,name,size(members) num from hive_map;
select * from hive_map where array_contains(map_keys(members), 'brother');
select id,name, members['brother'] brother from hive_map where array_contains(map_keys(members), 'brother');
struct類型
數據格式
說明:字段之間#分割,第二個字段之間冒號分割
192.168.1.1 # zhangsan:40
192.168.1.2 # lisi:50
192.168.1.3 # wangwu:60
192.168.1.4 # zhaoliu:70
建表語句
create table hive_struct(
ip string, info struct<name:string, age:int>
)
row format delimited
fields terminated by '#'
COLLECTION ITEMS TERMINATED BY ':';
導入數據
load data local inpath '/home/zhangfei/data/hive_struct.txt' into table hive_struct;
常用查詢
select * from hive_struct;
ip? ? ? ? ? ? ? ? ? ? ? info
192.168.1.1 ? ? {"name":"zhangsan","age":40}
192.168.1.2 ? ? {"name":"lisi","age":50}
192.168.1.3 ? ? {"name":"wangwu","age":60}
192.168.1.4 ? ? {"name":"zhaoliu","age":70}
可直接通過.訪問數據
select ip, info.name from hive_struct;
192.168.1.1 ? ? zhangsan
192.168.1.2 ? ? lisi
192.168.1.3 ? ? wangwu
192.168.1.4 ? ? zhaoliu
?
《新程序員》:云原生和全面數字化實踐50位技術專家共同創作,文字、視頻、音頻交互閱讀總結
以上是生活随笔為你收集整理的HiveSQL中复杂数据类型操作的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: HiveSQL常用数据处理语句
- 下一篇: 风控业务中的信用与欺诈的定义区别