vi /softw/ssb-tools/build-ssb-dbgen.sh
#修改如下:不去下載了,直接解壓下載好的包# download ssb-dbgen firstif[[ -d $SSB_DBGEN_DIR]];thenecho"Dir $CURDIR/ssb-dbgen/ already exists. No need to download."echo"If you want to download ssb-dbgen again, please delete this dir first."else#curl https://palo-cloud-repo-bd.bd.bcebos.com/baidu-doris-release/ssb-dbgen-linux.tar.gz | tar xz -C $CURDIR/tar -zxvf $CURDIR/ssb-dbgen-linux.tar.gz -C $CURDIR/
fi
# Any of FE hostexportFE_HOST='lsyk01'# http_port in fe.confexportFE_HTTP_PORT=8030# query_port in fe.confexportFE_QUERY_PORT=9030# Doris usernameexportUSER='root'# Doris passwordexportPASSWORD='fa'# The database where SSB tables locatedexportDB='ssb'
建表
sh ./create-ssb-tables.sh
sh ./create-ssb-flat-table.sh
導入數據
sh ./load-ssb-dimension-data.shsh ./load-ssb-fact-data.sh -c 5
? 很吃內存:
用時 8分鐘,大小大概6.8G,原文件是:24G
mysql>selectcount(1)from ssb.lineorder
由此可見,apache doris 的緩存了得啊。。。
導入flat寬表
sh ./load-ssb-flat-data.sh
報錯:
查看代碼,發現沒有指定密碼:
增加-p密碼
耗時25分鐘,還報錯了,是內存不足了:
語句拿出來,半年一次,100秒,比官方的腳本快
掛了
INSERTINTO ssb.lineorder_flat
SELECTLO_ORDERDATE,LO_ORDERKEY,LO_LINENUMBER,LO_CUSTKEY,LO_PARTKEY,LO_SUPPKEY,LO_ORDERPRIORITY,LO_SHIPPRIORITY,LO_QUANTITY,LO_EXTENDEDPRICE,LO_ORDTOTALPRICE,LO_DISCOUNT,LO_REVENUE,LO_SUPPLYCOST,LO_TAX,LO_COMMITDATE,LO_SHIPMODE,C_NAME,C_ADDRESS,C_CITY,C_NATION,C_REGION,C_PHONE,C_MKTSEGMENT,S_NAME,S_ADDRESS,S_CITY,S_NATION,S_REGION,S_PHONE,P_NAME,P_MFGR,P_CATEGORY,P_BRAND,P_COLOR,P_TYPE,P_SIZE,P_CONTAINER
FROM(SELECTlo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmodeFROM ssb.lineorder-- WHERE ${con}) l
INNERJOIN ssb.customer c
ON(c.c_custkey = l.lo_custkey)INNERJOIN ssb.supplier s
ON(s.s_suppkey = l.lo_suppkey)INNERJOIN ssb.part p
ON(p.p_partkey = l.lo_partkey);select'part',count(*)from ssb.part unionallselect'customer',count(*)from ssb.customer unionallselect'supplier',count(*)from ssb.supplier unionallselect'date',count(*)from ssb.dates unionallselect'lineorder',count(*)from ssb.lineorder unionallselect'lineorder_flat',count(*)from ssb.lineorder_flat