odps结合mysql统计
生活随笔
收集整理的這篇文章主要介紹了
odps结合mysql统计
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
需求:結合odps對日志就行統計分析并將結果導入到mysql中
解決方法:結合odps命令行odpscmd和mysqldump、mysql以及contab完成該工作。
Shell:
#!/bin/bashPORT="3306" #端口號 USERNAME="biuser" #用戶名 PASSWORD="!#123date" #密碼 DBNAME="bitest" #數據庫名稱 RUNDATE=`date +%Y-%m-%d` period=`date +"%Y%m" -d "-1days"`PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/root/shenl/odpscmd_public/bin:/root/jdk1.7.0_75/bin####1 同步其它庫的相關數據 mysqldump -umdshop -p'xdata123' -h'192.168.128.33' testdb tb_carry tb_city --set-gtid-purged=OFF>testdb.sql mysql -u$USERNAME -p$PASSWORD -D$DBNAME < '/root/testdb.sql'1>/root/syscODPS.log###2 刪除現有分類 時段 渠道 商品數據 rm -rf /tmp/timespan.txt rm -rf /tmp/timearea.txt rm -rf /tmp/seekarea.txt rm -rf /tmp/tb_good.txt###3 mysql里加工分類 時段 渠道 商品數據 mysql -u$USERNAME -p$PASSWORD -D$DBNAME <'/root/shenl/timespan.sql' 1>/root/shenl/synctimespan.log###4 odps里刪除分類 時段 渠道 商品數據 odpscmd -e "DROP TABLE tb_bi_meta_timespan;" odpscmd -e "DROP TABLE tb_bi_meta_timearea;" odpscmd -e "DROP TABLE tb_bi_meta_seekarea;" odpscmd -e "DROP TABLE tb_good;"###5 odps里創建分類 時段 渠道 商品數據以便后期加工 odpscmd -e "CREATE TABLE tb_bi_meta_timespan(catalogid int,catalogname string,span int, channel int,action int);" odpscmd -e "CREATE TABLE tb_bi_meta_timearea(catalogid int,catalogname string,province string,province_id int,channel int,action int);" odpscmd -e "CREATE TABLE tb_bi_meta_seekarea(province string,province_id int,channel int);" odpscmd -e "create table tb_good(gid string,g_title string,g_catalog_id string,g_status string);"###6 上傳分類 時段 渠道 商品數據到odps對應的表中 odpscmd -e "tunnel upload /tmp/timespan.txt tb_bi_meta_timespan;" odpscmd -e "tunnel upload /tmp/timearea.txt tb_bi_meta_timearea;" odpscmd -e "tunnel upload /tmp/seekarea.txt tb_bi_meta_seekarea;" odpscmd -e "tunnel upload /tmp/tb_good.txt tb_good;"###7 按照分區插入日志數據odpscmd -e "INSERT into table tb_bi_goodbrowse partition(periodsplit="$period")SELECT channel,province,city,itemid,'' as goodclassify,datetrunc(requesttime,'DD') AS period,geid,userid,requesttime,action,getdate() as inserttime FROM tb_bi_marketlog A WHERE isdate(requesttime,'yyyy-mm-dd hh:mi:ss') AND A.period=to_char(dateadd(getdate(), -1, 'dd'),'yyyymm') AND datetrunc(A.requesttime,'DD') = datetrunc(dateadd(getdate(), -1,'dd'),'dd');"###8 odps計算生成出報表數據和導出 odpscmd -f "/root/shenl/odpsstat0320.sql"###9 刪除現有的報表數據 rm -rf /var/log/mysql/timespanout.txt rm -rf /var/log/mysql/timeareaout.txt rm -rf /var/log/mysql/seekareaout.txt###10 odps的報表數據下載到/var/log/mysql目錄內odpscmd -e "tunnel download max_compute.tb_bi_report_timespans /var/log/mysql/timespanout.txt" chown mysql:mysql /var/log/mysql/timespanout.txt odpscmd -e "tunnel downloadmax_compute.tb_bi_report_timeareas /var/log/mysql/timeareaout.txt" chown mysql:mysql /var/log/mysql/timeareaout.txt odpscmd -e "tunnel download -ni ''max_compute.tb_bi_report_seekareas /var/log/mysql/seekareaout.txt" chown mysql:mysql /var/log/mysql/seekareaout.txt###11 mysql導入odps里統計后的數據 mysql -u$USERNAME -p$PASSWORD -D$DBNAME <'/root/shenl/load2mysql.sql' 1>/root/shenl/load2mysql.log1)?其中/root/shenl/timespan.sql的腳本內容:
use bi; SELECT A.id AS catalogid,A.c_name catalogname,B.i AS span,C.i AS channel,D.i AS action INTO OUTFILE '/tmp/timespan.txt' FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' FROM (SELECT id,c_name FROM shenl_catalog WHERE c_parent_id = 0) A CROSS JOIN tb_incr B CROSS JOIN (SELECT * FROM tb_incr WHERE i>0 AND i<4)C CROSS JOIN (SELECT * FROM tb_incr WHERE i>0 AND i<5)D;SELECT A.id AS catalogid,A.c_name catalogname,B.province,B.province_id,C.i AS channel,D.i AS action into outfile '/tmp/timearea.txt' FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' FROM (SELECT id,c_name FROM shenl_catalog WHERE c_parent_id = 0)A CROSS JOIN (SELECT DISTINCT province,province_id FROM tb_bi_area)B CROSS JOIN(SELECT * FROM tb_incr WHERE i>0 AND i<4)C CROSS JOIN(SELECT * FROM tb_incr WHERE i>0 AND i<5)D;SELECT B.province,B.province_id,C.i AS channel into outfile '/tmp/seekarea.txt' FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'FROM (SELECT DISTINCT province, province_id FROM tb_bi_area) B CROSS JOIN (SELECT * FROM tb_incr WHERE i > 0 AND i < 4) C;SELECT gid,REPLACE(REPLACE(REPLACE(g_title,char(10),''),CHAR(13),''),',','') AS g_title,g_catalog_id,g_status into outfile '/tmp/shenl_goods.txt' FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' FROM shenl_goods;2)?/root/shenl/odpsstat0320.sql 內統計腳本內容:
insert OVERWRITE table tb_bi_report_timespans SELECT datetrunc(dateadd(getdate(), -1, 'dd'),'dd') as period,A.catalogname,A.span,case A.channel WHEN 1 THEN "tel" WHEN 2 THEN "smell" WHEN 3 THEN "bigscreen" END AS channel, A.action,case when D.catalogid IS NULL then 0 else browsetimes END as stattimes FROM tb_bi_meta_timespan ALEFT OUTER JOIN(SELECT C.g_catalog_id as catalogid,channel,datepart(browsetime,'hh') as spanid,COUNT(DISTINCT browsetime) as browsetimes,action from tb_bi_goodbrowse AJOIN shenl_goods CON A.good = C.gid AND A.period=datetrunc(dateadd(getdate(), -1, 'dd'),'dd') AND A.periodsplit=to_char(dateadd(getdate(), -1, 'dd'),'yyyymm')GROUP BY C.g_catalog_id,datepart(browsetime,'hh'),channel,action)DON A.catalogid = D.catalogid AND A.span = D.spanid AND A.channel = D.channel AND A.action = D.action;insert OVERWRITE table tb_bi_report_timeareas SELECT datetrunc(dateadd(getdate(), -1, 'dd'),'dd') as period,A.catalogname,A.province,case A.channel WHEN 1 THEN "tel" WHEN 2 THEN "smell" WHEN 3 THEN "bigscreen" END AS channel, A.action,case when D.catalogid IS NULL then 0 else browsetimes END as stattimes FROM tb_bi_meta_timearea ALEFT OUTER JOIN(SELECT B.province_id,period,channel,COUNT(1) as browsetimes,C.g_catalog_id as catalogid,actionfrom tb_bi_goodbrowse AJOIN (SELECT distinct province,province_id FROM tb_bi_area)BON A.province = B.province_idJOIN shenl_goods CON A.good = C.gid AND A.period=datetrunc(dateadd(getdate(), -1, 'dd'),'dd') AND A.periodsplit=to_char(dateadd(getdate(), -1, 'dd'),'yyyymm')GROUP BY B.province_id,C.g_catalog_id,period,channel,action)DON A.catalogid = D.catalogid AND A.province_id = D.province_id AND A.channel=D.channel AND A.action = D.action;insert overwrite table tb_bi_report_seekareas SELECT datetrunc(dateadd(getdate(), -1, 'dd'),'dd') as period,A.province,D.item as item,case A.channel WHEN 1 THEN "tel" WHEN 2 THEN "smell" WHEN 3 THEN "bigscreen" END AS channel,case when D.province_id IS NULL then 0 else browsetimes END as stattimes FROM tb_bi_meta_seekarea ALEFT OUTER JOIN(SELECT B.province_id,datetrunc(dateadd(getdate(), -1, 'dd'),'dd') as period,channel,item,COUNT(DISTINCT requesttime) as browsetimesfrom tb_bi_marketlog AJOIN (SELECT distinct province,province_id FROM tb_bi_area)BON A.province = B.province_id AND A.action =2 AND isdate(A.requesttime,'yyyy-mm-dd hh:mi:ss') AND A.period=to_char(dateadd(getdate(), -1, 'dd'),'yyyymm') AND datetrunc(A.requesttime,'DD') = datetrunc(dateadd(getdate(), -1, 'dd'),'dd') GROUP BY B.province_id,period,channel,item)DON A.province_id = D.province_id AND A.channel=D.channel3)?/root/shenl/load2mysql.sql腳本中的內容是:
load data infile '/var/log/mysql/timespanout.txt' into tabletb_bi_report_timespans fields terminated by ',' lines terminated by '\n'; load data infile '/var/log/mysql/timeareaout.txt' into tabletb_bi_report_timeareas fields terminated by ',' lines terminated by '\n'; load data infile '/var/log/mysql/seekareaout.txt' into tabletb_bi_report_seekareas fields terminated by ',' lines terminated by '\n';代碼解讀:詳見注釋部分
總結
以上是生活随笔為你收集整理的odps结合mysql统计的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: win7怎么确保显卡打开 Win7如何正
- 下一篇: winpe怎么安装win7 WinPE下