1、搭建環境
? 新建JAVA項目,添加的包有:
?? 有關Hadoop的hadoop-core-0.20.204.0.jar
?? 有關Hbase的hbase-0.90.4.jar、hbase-0.90.4-tests.jar以及Hbase資源包中lib目錄下的所有jar包
?
2、主要程序
?
Java代碼??
package ?com.wujintao.hbase.test;???? import ?java.io.IOException;??import ?java.util.ArrayList;??import ?java.util.List;???? import ?org.apache.hadoop.conf.Configuration;??import ?org.apache.hadoop.hbase.HBaseConfiguration;??import ?org.apache.hadoop.hbase.HColumnDescriptor;??import ?org.apache.hadoop.hbase.HTableDescriptor;??import ?org.apache.hadoop.hbase.KeyValue;??import ?org.apache.hadoop.hbase.MasterNotRunningException;??import ?org.apache.hadoop.hbase.ZooKeeperConnectionException;??import ?org.apache.hadoop.hbase.client.Delete;??import ?org.apache.hadoop.hbase.client.Get;??import ?org.apache.hadoop.hbase.client.HBaseAdmin;??import ?org.apache.hadoop.hbase.client.HTable;??import ?org.apache.hadoop.hbase.client.HTablePool;??import ?org.apache.hadoop.hbase.client.Put;??import ?org.apache.hadoop.hbase.client.Result;??import ?org.apache.hadoop.hbase.client.ResultScanner;??import ?org.apache.hadoop.hbase.client.Scan;??import ?org.apache.hadoop.hbase.filter.Filter;??import ?org.apache.hadoop.hbase.filter.FilterList;??import ?org.apache.hadoop.hbase.filter.SingleColumnValueFilter;??import ?org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;??import ?org.apache.hadoop.hbase.util.Bytes;???? public ?class ?JinTaoTest?{???? ????public ?static ?Configuration?configuration;?? ????static ?{?? ????????configuration?=?HBaseConfiguration.create();?? ????????configuration.set("hbase.zookeeper.property.clientPort" ,?"2181" );?? ????????configuration.set("hbase.zookeeper.quorum" ,?"192.168.1.100" );?? ????????configuration.set("hbase.master" ,?"192.168.1.100:600000" );?? ????}?? ?? ????public ?static ?void ?main(String[]?args)?{?? ????????//?createTable("wujintao"); ?? ????????//?insertData("wujintao"); ?? ????????//?QueryAll("wujintao"); ?? ????????//?QueryByCondition1("wujintao"); ?? ????????//?QueryByCondition2("wujintao"); ?? ????????//QueryByCondition3("wujintao"); ?? ????????//deleteRow("wujintao","abcdef"); ?? ????????deleteByCondition("wujintao" ,"abcdef" );?? ????}?? ?? ????/** ? ?????*?創建表 ??????*?@param?tableName ??????*/ ??????public ?static ?void ?createTable(String?tableName)?{?? ????????System.out.println("start?create?table?......" );?? ????????try ?{?? ????????????HBaseAdmin?hBaseAdmin?=?new ?HBaseAdmin(configuration);?? ????????????if ?(hBaseAdmin.tableExists(tableName))?{//?如果存在要創建的表,那么先刪除,再創建 ?? ????????????????hBaseAdmin.disableTable(tableName);?? ????????????????hBaseAdmin.deleteTable(tableName);?? ????????????????System.out.println(tableName?+?"?is?exist,detele...." );?? ????????????}?? ????????????HTableDescriptor?tableDescriptor?=?new ?HTableDescriptor(tableName);?? ????????????tableDescriptor.addFamily(new ?HColumnDescriptor("column1" ));?? ????????????tableDescriptor.addFamily(new ?HColumnDescriptor("column2" ));?? ????????????tableDescriptor.addFamily(new ?HColumnDescriptor("column3" ));?? ????????????hBaseAdmin.createTable(tableDescriptor);?? ????????}?catch ?(MasterNotRunningException?e)?{?? ????????????e.printStackTrace();?? ????????}?catch ?(ZooKeeperConnectionException?e)?{?? ????????????e.printStackTrace();?? ????????}?catch ?(IOException?e)?{?? ????????????e.printStackTrace();?? ????????}?? ????????System.out.println("end?create?table?......" );?? ????}?? ?? ????/** ? ?????*?插入數據 ??????*?@param?tableName ??????*/ ??????public ?static ?void ?insertData(String?tableName)?{?? ????????System.out.println("start?insert?data?......" );?? ????????HTablePool?pool?=?new ?HTablePool(configuration,?1000 );?? ????????HTable?table?=?(HTable)?pool.getTable(tableName);?? ????????Put?put?=?new ?Put("112233bbbcccc" .getBytes());//?一個PUT代表一行數據,再NEW一個PUT表示第二行數據,每行一個唯一的ROWKEY,此處rowkey為put構造方法中傳入的值 ?? ????????put.add("column1" .getBytes(),?null ,?"aaa" .getBytes());//?本行數據的第一列 ?? ????????put.add("column2" .getBytes(),?null ,?"bbb" .getBytes());//?本行數據的第三列 ?? ????????put.add("column3" .getBytes(),?null ,?"ccc" .getBytes());//?本行數據的第三列 ?? ????????try ?{?? ????????????table.put(put);?? ????????}?catch ?(IOException?e)?{?? ????????????e.printStackTrace();?? ????????}?? ????????System.out.println("end?insert?data?......" );?? ????}?? ?? ????/** ? ?????*?刪除一張表 ??????*?@param?tableName ??????*/ ??????public ?static ?void ?dropTable(String?tableName)?{?? ????????try ?{?? ????????????HBaseAdmin?admin?=?new ?HBaseAdmin(configuration);?? ????????????admin.disableTable(tableName);?? ????????????admin.deleteTable(tableName);?? ????????}?catch ?(MasterNotRunningException?e)?{?? ????????????e.printStackTrace();?? ????????}?catch ?(ZooKeeperConnectionException?e)?{?? ????????????e.printStackTrace();?? ????????}?catch ?(IOException?e)?{?? ????????????e.printStackTrace();?? ????????}?? ?? ????}?? ????/** ? ?????*?根據?rowkey刪除一條記錄 ??????*?@param?tablename ??????*?@param?rowkey ??????*/ ???????public ?static ?void ?deleteRow(String?tablename,?String?rowkey)??{?? ????????try ?{?? ????????????HTable?table?=?new ?HTable(configuration,?tablename);?? ????????????List?list?=?new ?ArrayList();?? ????????????Delete?d1?=?new ?Delete(rowkey.getBytes());?? ????????????list.add(d1);?? ?????????????? ????????????table.delete(list);?? ????????????System.out.println("刪除行成功!" );?? ?????????????? ????????}?catch ?(IOException?e)?{?? ????????????e.printStackTrace();?? ????????}?? ?????????? ?? ????}?? ?? ?????/** ? ??????*?組合條件刪除 ???????*?@param?tablename ???????*?@param?rowkey ???????*/ ???????public ?static ?void ?deleteByCondition(String?tablename,?String?rowkey)??{?? ????????????//目前還沒有發現有效的API能夠實現?根據非rowkey的條件刪除?這個功能能,還有清空表全部數據的API操作 ?? ?? ????}?? ?? ?? ????/** ? ?????*?查詢所有數據 ??????*?@param?tableName ??????*/ ??????public ?static ?void ?QueryAll(String?tableName)?{?? ????????HTablePool?pool?=?new ?HTablePool(configuration,?1000 );?? ????????HTable?table?=?(HTable)?pool.getTable(tableName);?? ????????try ?{?? ????????????ResultScanner?rs?=?table.getScanner(new ?Scan());?? ????????????for ?(Result?r?:?rs)?{?? ????????????????System.out.println("獲得到rowkey:" ?+?new ?String(r.getRow()));?? ????????????????for ?(KeyValue?keyValue?:?r.raw())?{?? ????????????????????System.out.println("列:" ?+?new ?String(keyValue.getFamily())?? ????????????????????????????+?"====值:" ?+?new ?String(keyValue.getValue()));?? ????????????????}?? ????????????}?? ????????}?catch ?(IOException?e)?{?? ????????????e.printStackTrace();?? ????????}?? ????}?? ?? ????/** ? ?????*?單條件查詢,根據rowkey查詢唯一一條記錄 ??????*?@param?tableName ??????*/ ??????public ?static ?void ?QueryByCondition1(String?tableName)?{?? ?? ????????HTablePool?pool?=?new ?HTablePool(configuration,?1000 );?? ????????HTable?table?=?(HTable)?pool.getTable(tableName);?? ????????try ?{?? ????????????Get?scan?=?new ?Get("abcdef" .getBytes());//?根據rowkey查詢 ?? ????????????Result?r?=?table.get(scan);?? ????????????System.out.println("獲得到rowkey:" ?+?new ?String(r.getRow()));?? ????????????for ?(KeyValue?keyValue?:?r.raw())?{?? ????????????????System.out.println("列:" ?+?new ?String(keyValue.getFamily())?? ????????????????????????+?"====值:" ?+?new ?String(keyValue.getValue()));?? ????????????}?? ????????}?catch ?(IOException?e)?{?? ????????????e.printStackTrace();?? ????????}?? ????}?? ?? ????/** ? ?????*?單條件按查詢,查詢多條記錄 ??????*?@param?tableName ??????*/ ??????public ?static ?void ?QueryByCondition2(String?tableName)?{?? ?? ????????try ?{?? ????????????HTablePool?pool?=?new ?HTablePool(configuration,?1000 );?? ????????????HTable?table?=?(HTable)?pool.getTable(tableName);?? ????????????Filter?filter?=?new ?SingleColumnValueFilter(Bytes?? ????????????????????.toBytes("column1" ),?null ,?CompareOp.EQUAL,?Bytes?? ????????????????????.toBytes("aaa" ));?//?當列column1的值為aaa時進行查詢 ?? ????????????Scan?s?=?new ?Scan();?? ????????????s.setFilter(filter);?? ????????????ResultScanner?rs?=?table.getScanner(s);?? ????????????for ?(Result?r?:?rs)?{?? ????????????????System.out.println("獲得到rowkey:" ?+?new ?String(r.getRow()));?? ????????????????for ?(KeyValue?keyValue?:?r.raw())?{?? ????????????????????System.out.println("列:" ?+?new ?String(keyValue.getFamily())?? ????????????????????????????+?"====值:" ?+?new ?String(keyValue.getValue()));?? ????????????????}?? ????????????}?? ????????}?catch ?(Exception?e)?{?? ????????????e.printStackTrace();?? ????????}?? ?? ????}?? ?? ????/** ? ?????*?組合條件查詢 ??????*?@param?tableName ??????*/ ??????public ?static ?void ?QueryByCondition3(String?tableName)?{?? ?? ????????try ?{?? ????????????HTablePool?pool?=?new ?HTablePool(configuration,?1000 );?? ????????????HTable?table?=?(HTable)?pool.getTable(tableName);?? ?? ????????????List<Filter>?filters?=?new ?ArrayList<Filter>();?? ?? ????????????Filter?filter1?=?new ?SingleColumnValueFilter(Bytes?? ????????????????????.toBytes("column1" ),?null ,?CompareOp.EQUAL,?Bytes?? ????????????????????.toBytes("aaa" ));?? ????????????filters.add(filter1);?? ?? ????????????Filter?filter2?=?new ?SingleColumnValueFilter(Bytes?? ????????????????????.toBytes("column2" ),?null ,?CompareOp.EQUAL,?Bytes?? ????????????????????.toBytes("bbb" ));?? ????????????filters.add(filter2);?? ?? ????????????Filter?filter3?=?new ?SingleColumnValueFilter(Bytes?? ????????????????????.toBytes("column3" ),?null ,?CompareOp.EQUAL,?Bytes?? ????????????????????.toBytes("ccc" ));?? ????????????filters.add(filter3);?? ?? ????????????FilterList?filterList1?=?new ?FilterList(filters);?? ?? ????????????Scan?scan?=?new ?Scan();?? ????????????scan.setFilter(filterList1);?? ????????????ResultScanner?rs?=?table.getScanner(scan);?? ????????????for ?(Result?r?:?rs)?{?? ????????????????System.out.println("獲得到rowkey:" ?+?new ?String(r.getRow()));?? ????????????????for ?(KeyValue?keyValue?:?r.raw())?{?? ????????????????????System.out.println("列:" ?+?new ?String(keyValue.getFamily())?? ????????????????????????????+?"====值:" ?+?new ?String(keyValue.getValue()));?? ????????????????}?? ????????????}?? ????????????rs.close();?? ?? ????????}?catch ?(Exception?e)?{?? ????????????e.printStackTrace();?? ????????}?? ?? ????}?? ?? }??
?注意:可能大家沒看到更新數據的操作,其實更新的操作跟添加完全一致,只不過是添加呢rowkey不存在,更新呢rowkey已經存在,并且timstamp相同的情況下,還有就是目前好像還沒辦法實現hbase數據的分頁查詢,不知道有沒有人知道怎么做
?
HBase性能優化建議:
? 針對前面的代碼,有很多不足之處,在此我就不修改上面的代碼了,只是提出建議的地方,大家自己加上
?? 1)配置
? 當你調用create方法時將會加載兩個配置文件:hbase-default.xml and hbase-site.xml,利用的是當前的java類路徑, 代碼中configuration設置的這些配置將會覆蓋hbase-default.xml和hbase-site.xml中相同的配置,如果兩個配置文件都存在并且都設置好了相應參上面的屬性下面的屬性即可
?
?2)關于建表
??
public void createTable(HTableDescriptor desc)
?
HTableDescriptor 代表的是表的schema,?提供的方法中比較有用的有
setMaxFileSize,指定最大的region size
setMemStoreFlushSize?指定memstore flush到HDFS上的文件大小
增加family通過?addFamily方法
?
public void addFamily(final HColumnDescriptor family)
?
HColumnDescriptor代表的是column的schema,提供的方法比較常用的有
setTimeToLive:指定最大的TTL,單位是ms,過期數據會被自動刪除。
setInMemory:指定是否放在內存中,對小表有用,可用于提高效率。默認關閉
setBloomFilter:指定是否使用BloomFilter,可提高隨機查詢效率。默認關閉
setCompressionType:設定數據壓縮類型。默認無壓縮。
setMaxVersions:指定數據最大保存的版本個數。默認為3。
?
注意的是,一般我們不去setInMemory為true,默認是關閉的
?
3)關于入庫
?? 官方建議
?table.setAutoFlush(false); //數據入庫之前先設置此項為false
?table.setflushCommits();//入庫完成后,手動刷入數據
注意:
? 在入庫過程中,put.setWriteToWAL(true/flase);
? 關于這一項如果不希望大量數據在存儲過程中丟失,建議設置為true,如果僅是在測試演練階段,為了節省入庫時間建議設置為false
?
4)關于獲取表實例
HTablePool pool = new HTablePool(configuration, Integer.MAX_VALUE);
HTable table = (HTable) pool.getTable(tableName);
建議用表連接池的方式獲取表,具體池有什么作用,我想用過數據庫連接池的同學都知道,我就不再重復
不建議使用new HTable(configuration,tableName);的方式獲取表
?
5)關于查詢
?建議每個查詢語句都放入try catch語句塊,并且finally中要進行關閉ResultScanner實例以及將不使用的表重新放入到HTablePool中的操作,具體做法如下
Java代碼??
public ?static ?void ?QueryAll(String?tableName)?{??????????HTablePool?pool?=?new ?HTablePool(configuration,?Integer.MAX_VALUE);?? ????????HTable?table?=?null ;?? ????????ResultScanner?rs?=?null ;?? ????????try ?{?? ????????????Scan?scan?=?new ?Scan();?? ????????????table?=?(HTable)?pool.getTable(tableName);?? ????????????rs?=?table.getScanner(scan);?? ????????????for ?(Result?r?:?rs)?{?? ????????????????System.out.println("獲得到rowkey:" ?+?new ?String(r.getRow()));?? ????????????????for ?(KeyValue?keyValue?:?r.raw())?{?? ????????????????????System.out.println("列:" ?+?new ?String(keyValue.getFamily())?? ????????????????????????????+?"====值:" ?+?new ?String(keyValue.getValue()));?? ????????????????}?? ????????????}?? ????????}?catch ?(IOException?e)?{?? ????????????e.printStackTrace();?? ????????}finally {?? ????????????rs.close();//?最后還得關閉 ?? ????????????pool.putTable(table);?//實際應用過程中,pool獲取實例的方式應該抽取為單例模式的,不應在每個方法都重新獲取一次(單例明白?就是抽取到專門獲取pool的邏輯類中,具體邏輯為如果pool存在著直接使用,如果不存在則new) ?? ????????}?? ????}??
?
?所以,以上代碼有缺陷的地方,感興趣的同學可以針對優化建議作出相應修改
總結
以上是生活随笔 為你收集整理的Java操作Hbase进行建表、删表以及对数据进行增删改查,条件查询 的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔 網站內容還不錯,歡迎將生活随笔 推薦給好友。