load data local inpath '/opt/test/emp.txt' into table emp_part partition (year='2016', month='3');
load data local inpath '/opt/test/emp.txt' into table emp_part partition (year='2016', month='4');
?
用hdfs中指定位置的數據,增加分區表中數據,此操作不會移動數據文件到分區表目錄中
alter table emp_part add partition (year='2016', month='5') location '/data';
把hdfs中指定位置的數據移動到分區表目錄中,增加數據
load data inpath '/emp.txt' into table emp_part partition (year='2016', month='6');
? 5.其他創建表的方式 (1) create-as
create table emp3
as
select * from emp;
(2) create-like
create table emp4 like emp;
load data local inpath '/opt/test/emp.txt' overwrite into table emp4;
create table emp_orc_snappy2 like emp_orc tblproperties ("orc.compression"="snappy");
insert overwrite table emp_orc_snappy2 select * from emp;
c)利用非壓縮表直接創建orc+snappy表并導入數據
create table emp_orc_snappy3
stored as orc tblproperties("orc.compression"="snappy")
as select * from emp;
7.hive執行參數-e,-f,--hiveconf (1)命令行直接執行hql語句
hive -e "select * from db_hive01.emp"
(2)執行hql文件中的語句
hive -f emp.hql
(3)打開調試模式
hive --hiveconf hive.root.logger=DEBUG,console
8.數據導出 (1)導出數據到本地 a)insert
insert overwrite local directory '/opt/test/local'
row format delimited fields terminated by '\t'
select * from emp;
如果不指定row format delimited fields terminated by '\t',字段間默認沒有分割符?? ? ? b)
hive -e 'select * from testdb2.emp' >> ./emp_export.txt
? (2)導出到hdfs a)
insert overwrite directory '/export_data'
select * from emp;
hive 0.13.1版本還不支持導出數據到hdfs時指定分隔符row format delimited fields terminated by '\t' ? b)
export table emp to '/export_data';
導出后會在會生成/export_data/data目錄, emp.txt存放在此目錄中,即/export_data/data/emp.txt ? 9. 排序 (1)order by 全局排序
insert overwrite local directory '/opt/test/local'
row format delimited fields terminated by '\t'
select * from emp order by empno;
(2)sort by 與 distributed by 類似MR中partition,進行分區,結合sort by使用 每個reduce內部進行排序,全局不是排序, distribute by 一定是放在sort by 前面, 且必須要指定mapreduce.job.reduces數量,否則導出結果還是在一個文件中 ?
set mapreduce.job.reduces=3;
insert overwrite local directory '/opt/test/local'
row format delimited fields terminated by '\t'
select * from emp distribute by deptno sort by empno;
(3)cluster by 當distributed by和sort by 字段一樣的時候,直接使用cluster by ? 10.常用函數
select upper(empname) from emp;
select unix_timestamp(trackTime) from bflog limit 3 ;
select year(hiredate) from emp ;
select month(hiredate) from emp ;
select hour(hiredate) from emp ;
select substr(hiredate,1,4) from .emp ;
select split(hiredate,'-')[1] from emp ;
select reverse(hiredate) from emp ;
select concat(empno,'-',empname) from emp ;case when 條件1 then ...when 條件2 then ...else end
可以使用desc function substr 查看函數說明, substr第二個參數為index 從1技術,第三個參數為length ? 11. 自定義UDF
add jar /opt/test/mylower.jar ;
CREATE TEMPORARY FUNCTION mylower AS 'org.gh.hadoop.hive.MyLower';
? 12. 使用正則表達式加載數據字段
create table beifenglog(
remote_addr string,
remote_user string,
time_local string,
request string,
status string,
body_bytes_sent string,
request_body string,
http_referer string,
http_user_agent string,
http_x_forwarded_for string,
host string)
row format serde 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe'
with serdeproperties(
"input.regex" = "(\\\"[\\d\\.]+\\\") (\\\"[^ ]+\\\") (\\\".*?\\\") (\\\".*?\\\") (\\\"\\d+\\\") (\\\"\\d+\\\") ([^ ]+) (\\\"[^ ]+\\\") (\\\".*?\\\") (\\\"[^ ]+\\\") (\\\"[^ ]+\\\")"
)
stored as textfile;加載原表數據
load data local inpath '/opt/test/beifenglog.data' overwrite into table beifenglog;
可以使用工具調試正則:http://tool.chinaz.com/regex ? ? 13.注意點 (1)在創建表(無論管理表還是外部表)時,如果沒有指定location,可以使用load data加載數據 a) 指定本地目錄中的數據,會上傳數據文件到hdfs中 b) 指定hdfs中數據文件,如果指定的路徑與表所在的目錄不一致,則移動數據文件到表目錄中 ?
create external table emp_ext2 like emp;
load data inpath '/emp.txt' into table emp_ext2;
會把/emp.txt移動到/user/hive/warehouse/testdb2.db/emp_ext2/目錄中 create table emp2 like emp;
load data inpath '/emp.txt' into table emp2;
會把/emp.txt移動到/user/hive/warehouse/testdb2.db/emp2/目錄中
? (2)create-like時不能指定stored as為其他格式,否則報錯 以下操作會報錯 FAILED: ParseException line 1:31 missing EOF at 'stored' near 'emp'