HBase API 详细例子(封装的DAO类)
生活随笔
收集整理的這篇文章主要介紹了
HBase API 详细例子(封装的DAO类)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
HBase中沒有庫的概念
HBase lib目錄下所有JAR包復制到項目中,Hbase 版本0.98.5
package?com.zxing.imgQRCode;import?java.io.IOException; import?java.util.LinkedList; import?java.util.List;import?org.apache.hadoop.conf.Configuration; import?org.apache.hadoop.hbase.HBaseConfiguration; import?org.apache.hadoop.hbase.HColumnDescriptor; import?org.apache.hadoop.hbase.HTableDescriptor; import?org.apache.hadoop.hbase.KeyValue; import?org.apache.hadoop.hbase.MasterNotRunningException; import?org.apache.hadoop.hbase.ZooKeeperConnectionException; import?org.apache.hadoop.hbase.client.Get; import?org.apache.hadoop.hbase.client.HBaseAdmin; import?org.apache.hadoop.hbase.client.HConnection; import?org.apache.hadoop.hbase.client.HConnectionManager; import?org.apache.hadoop.hbase.client.HTableInterface; import?org.apache.hadoop.hbase.client.Put; import?org.apache.hadoop.hbase.client.Result; import?org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import?org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;public?class?HbaseConnection?{private?String?rootDir;private?String?zkServer;private?String?port;private?Configuration?conf;private?HConnection?hConn=null;public?HbaseConnection(String?rootDir,?String?zkServer,?String?port)?{super();this.rootDir?=?rootDir;this.zkServer?=?zkServer;this.port?=?port;conf=HBaseConfiguration.create();conf.set("hbase.rootdir",?rootDir);conf.set("hbase.zookeeper.quorum?",?zkServer);conf.set("hbase.zookeeper.property.clientPort",?port);try?{hConn=HConnectionManager.createConnection(conf);}?catch?(IOException?e)?{e.printStackTrace();}}//創建表public?void?crateTable(String?tableName,List<String>?cols){try?{HBaseAdmin?admin=new?HBaseAdmin(conf);if(admin.tableExists(tableName))throw?new?IOException("table?exists");else{HTableDescriptor?tableDesc=new?HTableDescriptor(tableName);for(String?col:cols){HColumnDescriptor?colDesc=new?HColumnDescriptor(col);colDesc.setCompressionType(Algorithm.GZ);colDesc.setDataBlockEncoding(DataBlockEncoding.DIFF);tableDesc.addFamily(colDesc);}admin.createTable(tableDesc);}}?catch?(MasterNotRunningException?e)?{//?TODO?Auto-generated?catch?blocke.printStackTrace();}?catch?(ZooKeeperConnectionException?e)?{//?TODO?Auto-generated?catch?blocke.printStackTrace();}?catch?(IOException?e)?{//?TODO?Auto-generated?catch?blocke.printStackTrace();}}//插入數據public?void?saveData(String?tableName,List<Put>?puts){try?{HTableInterface?table?=hConn.getTable(tableName);table.put(puts);table.setAutoFlush(false);table.flushCommits();}?catch?(IOException?e)?{e.printStackTrace();}}//得到數據public?Result?getData(String?tableName,String?rowkey){try?{HTableInterface?table?=hConn.getTable(tableName);Get?get=new?Get(rowkey.getBytes());return?table.get(get);}?catch?(IOException?e)?{e.printStackTrace();}return?null;}?????//輸出result結果public?void?format(Result?result){String?rowkey=Bytes.toString(result.getRow());KeyValue[]?kvs=result.raw();for?(KeyValue?kv:kvs){String?family=?Bytes.toString(kv.getFamily());String?qualifier=?Bytes.toString(kv.getQualifier());System.out.println("rowkey->"+rowkey+"family->"+family+"qualifier->"+qualifier);}}//全表掃描public?void?hbaseScan(String?tableName){Scan?scan=new?Scan();//掃描器scan.setCaching(1000);//緩存1000條數據,一次讀取1000條try?{HTableInterface?table?=hConn.getTable(tableName);ResultScanner?scanner=table.getScanner(scan);//返回迭代器for(Result?res:scanner){format(res);}}?catch?(IOException?e)?{e.printStackTrace();}}//比較過濾器public?void?filterTest(String?tableName){Scan?scan=new?Scan();//掃描器scan.setCaching(1000);//緩存1000條數據,一次讀取1000條RowFilter?filter?=new?RowFilter(CompareFilter.CompareOp.EQUAL,new?BinaryComparator("Jack".getBytes()));RowFilter?filter1?=new?RowFilter(CompareFilter.CompareOp.EQUAL,new?RegexStringComparator("J\\w+"));scan.setFilter(filter);try?{HTableInterface?table?=hConn.getTable(tableName);ResultScanner?scanner=table.getScanner(scan);//返回迭代器for(Result?res:scanner){format(res);}}?catch?(IOException?e)?{e.printStackTrace();}}//PageFilter分頁public?void?pageFilterTest(String?tableName){PageFilter?filter?=?new??PageFilter(4);byte[]?lastRow=null;int?pageCount=0;?//記錄第幾頁try?{HTableInterface?table?=hConn.getTable(tableName);while(++pageCount>0){System.out.println("pageCount?=?"+?pageCount);Scan?scan=new?Scan();scan.setFilter(filter);if(lastRow!=null){scan.setStartRow(lastRow);}ResultScanner?scanner=table.getScanner(scan);int?count=0;//計數器for(Result?res:scanner){lastRow=res.getRow();if(++count>3)break;format(res);if(count<3){break;}}}}?catch?(IOException?e)?{e.printStackTrace();}}public?static?void?main(String[]?args)?{String?rootDir="hdfs://ns1/hbase";String?zkServer="10.128.129.230";//集群內網IPString?port="2181";//HbaseConnection?conn=new?HbaseConnection(rootDir,?zkServer,?port);List<String>?cols=new?LinkedList<String>();cols.add("basicInfo");cols.add("moreInfo");conn.crateTable("students",?cols);//List<Put>?puts=new?LinkedList<Put>();Put?put1=new?Put("Tom".getBytes());//rowkeyput1.add("basicInfo".getBytes(),?"age".getBytes(),?"27".getBytes());put1.add("moreInfo".getBytes(),?"tel".getBytes(),?"110".getBytes());Put?put2=new?Put("Jim".getBytes());put2.add("basicInfo".getBytes(),?"age".getBytes(),?"28".getBytes());put2.add("moreInfo".getBytes(),?"tel".getBytes(),?"111".getBytes());puts.add(put1);puts.add(put2);conn.saveData("students",?puts);//Result?result=??conn.getData("students",?"Tom");conn.format(result);//conn.hbaseScan("students");//conn.filterTest("students");//conn.pageFilterTest("students");}}常用接口
package?test;import?hbase.HbaseUtils;import?java.io.IOException; import?java.util.Calendar; import?java.util.Date; import?java.util.Iterator; import?java.util.Map.Entry; import?java.util.concurrent.TimeUnit;import?net.sf.json.JSONObject;import?org.apache.hadoop.hbase.Cell; import?org.apache.hadoop.hbase.CellUtil; import?org.apache.hadoop.hbase.client.HTableInterface; import?org.apache.hadoop.hbase.client.Result; import?org.apache.hadoop.hbase.client.ResultScanner; import?org.apache.hadoop.hbase.client.Scan; import?org.apache.hadoop.hbase.filter.Filter; import?org.apache.hadoop.hbase.filter.PageFilter; import?org.apache.hadoop.hbase.util.Bytes; import?org.junit.Test;import?com.xd.iis.se.common.Constants; import?com.xd.iis.se.hbase.CommHbaseUtils; import?com.xd.iis.se.hbutils.MeUtils;import?commn.CommonConstants;public?class?SyncTestUtils?{//hbase表名(hbaseapi包中的Constants類中定義了表名和數字的映射關系) private??final?static??String?wz_content="wz_content";//1 private??final?static??String?lt_content="lt_content";//4 private??final?static??String?wb_content="wb_content";//2 private?static??final???String?wb_comment="wb_comment";//45private?static?final?String?sinawb_user="sinawb_user";//?微博用戶表/*???TitleHs的定義在hbaseapi包中SwitchBeanAndJsonUtils類中jsonToDocument方法里*?*???從326行代碼開始*?*?hbase表字段定義hbaseapi包中HIContentCommon類*?*?pfsearch包中IContentCommon類*?*/@Testpublic?void?hbaseTableNameToDigitalMapping()?{for(Entry<String,?String>?????entry:?Constants.rstypemp.entrySet()){System.out.println(entry.getKey()+":"+entry.getValue());}}@Testpublic?void?seconds(){System.out.println(new?Date().getTime());System.out.println(System.nanoTime());System.out.println(System.currentTimeMillis());//時間轉換System.out.println(TimeUnit.NANOSECONDS.convert(10L,?TimeUnit.MILLISECONDS));}//毫秒轉換成日期@Testpublic?void?millsToDate(){String?mills="1460459403324";Date?date=new?Date(Long.parseLong(mills));System.out.println(date);System.out.println(date.getTime());}//手工干預生成19位的全網微博評論tokenKey(鍵,rowkey)=wbcomment_key//TokenTable=hotmanwb_token/**?hbase(main):003:0>?scan?'hotmanwb_token',LIMIT=>2ROW??????????????????????????????????????????COLUMN+CELL?????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????hotmanwb_key?????????????????????????????????column=content:date,?timestamp=1459310036375,?value=1459310031972331086?????????????????????????????????????????????????????????????hotmanwbcomment_key??????????????????????????column=content:date,?timestamp=1460600117890,?value=1460600079542140091?????????????????????????????????????????????????????????????ltcomment_key????????????????????????????????column=content:date,?timestamp=1460600483717,?value=1460600441668719114?????????????????????????????????????????????????????????????wzcomment_key????????????????????????????????column=content:date,?timestamp=1460599817280,?value=1460599777817713930??*/@Testpublic?void?generateTokenKeyForWeiboComment(){Calendar?calendar?=?Calendar.getInstance();calendar.setTime(new?Date());//三十天前的時間calendar.add(calendar.DATE,?-30);Date?date?=?calendar.getTime();//第一位表示星期System.out.println(date);//13位加6位拼成19位String?startTime=date.getTime()+"000000";System.out.println("startTime:"+startTime);//插入或者更新時間HbaseUtils.insertData("hotmanwb_token",?"wbcomment_key",?startTime);//1458109165143000000???19位}//hbase根據表名和rowkey查詢一條數據(tokenkey)@Testpublic?void?findByRowKey(){String?startTime=HbaseUtils.QueryByCondition1("hotmanwb_token",?"wbcomment_key");System.out.println(startTime);}//hbase返回前幾條數據/*key:014604646505869913352145key:014604646505954866550445key:014604988869079841915645key:014605014502935460283945key:014605014503712711041745*/@Testpublic??void??scanTopRowComment(){ResultScanner?resultScanner?=?null;HTableInterface?table?=?HbaseUtils.pool.getTable(wz_content);try?{Scan?scan?=?new?Scan();//設置過濾器,只返回20條Filter?filter?=?new?PageFilter(5);?scan.setFilter(filter);//RegionServer是否應當緩存?當前客戶端訪問過的數據塊????如果是隨機的get?這個最好為falsescan.setCacheBlocks(true);/*簡而言之就是??batch?是qualifier?column級別的???????caching是row級別的batch?就是每次迭代從服務器獲取的記錄數,?設置太小?會頻繁到服務器取數據,太大?會對客戶端造成比較大的壓力,??具體根據需要使用?,?正常使用可以不必管它,?大批量讀取可以考慮用它改善性能這里要注意了:?這個記錄數是qualifier不是row,?如果一個row有17個qualifier,setBatch(5),一個row就會分散到4個Result中,?分別持有5,5,5,2個qualifier(默認一個row的所有qualifier會在一個Result中)*//*scan.setBatch(100);*/????//setFilter與setBatch不能都打開,會沖突//setCaching發給scanners的緩存的Row的數量scan.setCaching(100);scan.setMaxVersions(1);resultScanner?=?table.getScanner(scan);/*????for?(Result?r?:?rs)?{return?new?String(r.getRow());}*/Iterator<Result>?res?=?resultScanner.iterator();//?返回查詢遍歷器while?(res.hasNext())?{Result?result?=?res.next();System.out.println(result);System.out.println("key:"?+?new?String(result.getRow()));//date列存的是json字符串String?value?=?new?String(result.getValue(CommonConstants.CRAWLERCONTENT_TABLE_COLUMNS.getBytes(),CommonConstants.CRAWLERCONTENT_TABLE_COLUMN2.getBytes()),?"ISO8859-1");System.out.println("value:"?+?value);JSONObject?js?=?JSONObject.fromObject(value);System.out.println(js);}}?catch?(Exception?e)?{e.printStackTrace();}finally{//這樣一定要記住?用完closeif(resultScanner!=null)??resultScanner.close();}}//根據rowkey范圍掃描@Testpublic??void??scanByRowKeyRangeComment(){ResultScanner?resultScanner?=?null;HTableInterface?table?=?HbaseUtils.pool.getTable(wb_comment);String?startRow="01420459403324297147";//String?stopRow="014605014503712711";//20位try?{Scan?scan?=?new?Scan();//設置過濾器,只返回20條Filter?filter?=?new?PageFilter(5);?scan.setFilter(filter);scan.setStartRow(startRow.getBytes());scan.setStopRow(stopRow.getBytes());//RegionServer是否應當緩存?當前客戶端訪問過的數據塊????如果是隨機的get?這個最好為falsescan.setCacheBlocks(true);/*簡而言之就是??batch?是qualifier?column級別的???????caching是row級別的batch?就是每次迭代從服務器獲取的記錄數,?設置太小?會頻繁到服務器取數據,太大?會對客戶端造成比較大的壓力,??具體根據需要使用?,?正常使用可以不必管它,?大批量讀取可以考慮用它改善性能這里要注意了:?這個記錄數是qualifier不是row,?如果一個row有17個qualifier,setBatch(5),一個row就會分散到4個Result中,?分別持有5,5,5,2個qualifier(默認一個row的所有qualifier會在一個Result中)*//*scan.setBatch(100);*/????//setFilter與setBatch不能都打開,會沖突//setCaching發給scanners的緩存的Row的數量scan.setCaching(100);scan.setMaxVersions(1);resultScanner?=?table.getScanner(scan);/*????for?(Result?r?:?rs)?{return?new?String(r.getRow());}*/Iterator<Result>?res?=?resultScanner.iterator();//?返回查詢遍歷器while?(res.hasNext())?{Result?result?=?res.next();System.out.println(result);System.out.println("key:"?+?new?String(result.getRow()));//date列存的是json字符串String?value?=?new?String(result.getValue(CommonConstants.CRAWLERCONTENT_TABLE_COLUMNS.getBytes(),CommonConstants.CRAWLERCONTENT_TABLE_COLUMN2.getBytes()),?"ISO8859-1");System.out.println("value:"?+?value);JSONObject?js?=?JSONObject.fromObject(value);System.out.println(js);}}?catch?(Exception?e)?{e.printStackTrace();}finally{//這樣一定要記住?用完closeif(resultScanner!=null)??resultScanner.close();}}@Test//hbase生成行鍵(hbaseApi包)?第一個url參數無用public?void?createRowKey(){//typemp.put("wb_comment",?"45");//?微博評論表對應編碼最后兩位String?newRowKey=MeUtils.createKeyCode("",?"wb_comment");System.out.println(newRowKey);/*String?oldRowKey=MeUtils.createKeyCode_oid("http://www.baidu.com",?"wb_comment");System.out.println(oldRowKey);*///rowkey=114606860784008157170445?24位//1+19位時間戳+2位隨機數+2位表名}/*TimestampHBase通過row和column確定一份數據,這份數據的值可能有多個版本,不同版本的值按照時間倒序排序,即最新的數據排在最前面,查詢時默認返回最新版本。如上例中row?key=1的author:nickname值有兩個版本,分別為1317180070811對應的“一葉渡江”和1317180718830對應的“yedu”(對應到實際業務可以理解為在某時刻修改了nickname為yedu,但舊值仍然存在)。Timestamp默認為系統當前時間(精確到毫秒),也可以在寫入數據時指定該值。Value每個值通過4個鍵唯一索引,tableName+RowKey+ColumnKey+Timestamp=>value,例如上例中{tableName=’blog’,RowKey=’1’,ColumnName=’author:nickname’,Timestamp=’?1317180718830’}索引到的唯一值是“yedu”。*//*大Solr(192.168.20.190對應三個域名)#?24?indexsolr_24h=http://solr-24h.wyq.cn/solr#?month?indexsolr_month=http://solr-month.wyq.cn/solr#?week?indexsolr_week=http://solr-week.wyq.cn/solr*/????@Testpublic??void??scanTopRowContent(){ResultScanner?resultScanner?=?null;HTableInterface?table?=?HbaseUtils.pool.getTable(wz_content);try?{Scan?scan?=?new?Scan();//設置過濾器,只返回20條Filter?filter?=?new?PageFilter(5);?scan.setFilter(filter);//RegionServer是否應當緩存?當前客戶端訪問過的數據塊????如果是隨機的get?這個最好為falsescan.setCacheBlocks(true);/*簡而言之就是??batch?是qualifier?column級別的???????caching是row級別的batch?就是每次迭代從服務器獲取的記錄數,?設置太小?會頻繁到服務器取數據,太大?會對客戶端造成比較大的壓力,??具體根據需要使用?,?正常使用可以不必管它,?大批量讀取可以考慮用它改善性能這里要注意了:?這個記錄數是qualifier不是row,?如果一個row有17個qualifier,setBatch(5),一個row就會分散到4個Result中,?分別持有5,5,5,2個qualifier(默認一個row的所有qualifier會在一個Result中)*//*scan.setBatch(100);*/????//setFilter與setBatch不能都打開,會沖突//setCaching發給scanners的緩存的Row的數量scan.setCaching(100);scan.setMaxVersions(1);resultScanner?=?table.getScanner(scan);//?返回查詢遍歷器Iterator<Result>?res?=?resultScanner.iterator();while?(res.hasNext())?{System.out.println("--------------行分割線-------------");Result?result?=?res.next();System.out.println("\n"+"------單個result--------");System.out.println(result);System.out.println("\n"+"------result中Cells--------");//由{row?key,?Family:Qualifier,?version}?唯一確定的單元。cell中的數據是沒有類型的,全部是以字節的形式進行存儲的for??(Cell?cell?:?result.rawCells())?{//rowkeySystem.out.println("Rowkey?:?"?+Bytes.toString?(CellUtil.cloneRow(cell)));//列簇+列(Family是第一級列,Qualifier是第二級列)System.out.println("Familiy:Quilifier?:?"?+Bytes.toString?(CellUtil.cloneFamily(cell))+":"+Bytes.toString?(CellUtil.cloneQualifier?(cell)));?//值System.out.println?("Value?:?"?+Bytes.toString?(CellUtil.cloneValue?(cell)));System.out.println("TimeStamp?:?"??+cell.getTimestamp());}/*?????//老APISystem.out.println("\n"+?"------result中KeyValues--------");???for(?KeyValue?kv:result.list()){??System.out.println(String.format("row:%s,?family:%s,?qualifier:%s,?qualifiervalue:%s,?timestamp:%s.",???Bytes.toString(kv.getRow()),???Bytes.toString(kv.getFamily()),???Bytes.toString(kv.getQualifier()),???Bytes.toString(kv.getValue()),??kv.getTimestamp()));???????}?*/?}}?catch?(Exception?e)?{e.printStackTrace();}finally{//這樣一定要記住?用完closeif(resultScanner!=null)??resultScanner.close();}}//SecureCRT上傳下載文件//sz??下載命令//rz?-be?上傳文件?單獨用rz會有兩個問題:上傳中斷、上傳文件變化(md5不同),/*解決辦法是上傳是用rz?-be,并且去掉彈出的對話框中“Upload?files?as?ASCII”前的勾選。-a,?–ascii-b,?–binary?用binary的方式上傳下載,不解釋字符為ascii-e,?–escape?強制escape?所有控制字符,比如Ctrl+x,DEL等rar,gif等文件文件采用?-b?用binary的方式上傳。文件比較大而上傳出錯的話,采用參數?-e*///根據rowkey查找數據@Testpublic?void?select(){String?ID="114615497672016941968326";try?{String?json=CommHbaseUtils.select(ID);System.out.println(json);JSONObject?js?=?JSONObject.fromObject(json);System.out.println(js);}?catch?(IOException?e)?{e.printStackTrace();}}/*//血和淚的經驗教訓ArrayList非線程安全,即使使用Collections.synchronizedList(new?ArrayList<SolrInputDocument>())訪問方法size()方法得出來的大小也是錯的,還是推薦使用vector代替//因為solrserver服務的url配置文件pfs.properties未打包進去,找不到url發生空指針異常ScheduledExecutorService對于線程中發生的http服務方面的異常無法捕獲,jstack?-l命令打印信息java.lang.Thread.State:?WAITING?(parking)?at?sun.misc.Unsafe.park(Native?Method)-?parking?to?wait?for??<0x0000000712e8e7e8>?(a?解決方案:異常可以替代使用Timer定時器來捕獲*//*修改properties文件編碼*?全局修改:*??window->?preference?->?general?->?content?types?找到右邊的?java?properties?file?,將其編碼改為?utf-8??單個文件修改:右擊該properties文件--properties--Resource--Text?file?encoding,選中other,選擇其它編碼方式,如UTF-8或GBK,這樣就能在properties里面輸入中文,而不會自動轉成Unicode了。*?*/}本文出自 “點滴積累” 博客,請務必保留此出處http://tianxingzhe.blog.51cto.com/3390077/1698822
總結
以上是生活随笔為你收集整理的HBase API 详细例子(封装的DAO类)的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Linux下的tr编辑器命令详解
- 下一篇: js事件监听器用法实例详解