第1关:使用过滤器查询指定行的数据
知识点
1.使用过滤器的步骤:
(1)创建过滤器:RowFilter(CompareOperator op,ByteArrayComparable rowComparator),第一个参数接收的是比较操作对象,第二个参数接收的是条件。
第一个参数有很多种取值以匹配多种场景,取值表格如下:
操作描述CompareOperator.LESS匹配小于设定值的值CompareOperator.LESS_OR_EQUAL匹配小于或等于设定值的值CompareOperator.EQUAL匹配等于设定值的值CompareOperator.NOT_EQUAL匹配与设定值不相等的值CompareOperator.GREATER_OR_EQUAL匹配大于或等于设定值的值CompareOperator.GREATER匹配大于设定值的值CompareOperator.NO_OP排除一切值
(2)设置过滤器。
编程要求
请补全函数
query(String tName)
,需要你查询的数据如下,表名会作为方法的参数传入:
- 查询
basic_info
列族gender
列,且行键为2018
的值; - 查询
school_info
列族college
列,且行键大于2018
的值; - 查询
basic_info
列族name
列,且行键小于等于2020
的值。
package step1;
import java.io.IOException;
import org.apache.hadoop.cli.util.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.*;
public class Task {
public void query(String tName) throws Exception {
/********* Begin *********/
Configuration config = new Configuration();
Connection conn = ConnectionFactory.createConnection(config);
TableName tableName = TableName.valueOf(tName);
Table table = conn.getTable(tableName);
Scan scan1=new Scan();
scan1.addColumn(Bytes.toBytes("basic_info"), Bytes.toBytes("gender"));
Filter filter1 = new RowFilter(CompareOperator.EQUAL,new BinaryComparator(Bytes.toBytes("2018")));
scan1.setFilter(filter1);
ResultScanner scanner1 = table.getScanner(scan1);
System.out.println("row:2018");
for (Result result : scanner1) {
for(Cell cell : result.listCells()){
System.out.println("basic_info:gender " + new String(CellUtil.cloneValue(cell),"utf-8") );
}
}
scanner1.close();
Scan scan2=new Scan();
scan2.addColumn(Bytes.toBytes("school_info"), Bytes.toBytes("college"));
Filter filter2 = new RowFilter(CompareOperator.GREATER,new BinaryComparator(Bytes.toBytes("2018")));
scan2.setFilter(filter2);
ResultScanner scanner2 = table.getScanner(scan2);
for (Result result : scanner2) {
System.out.println("row:" + new String(result.getRow(),"utf-8"));
for(Cell cell : result.listCells()){
System.out.println("school_info:college " + new String(CellUtil.cloneValue(cell),"utf-8") );
}
}
scanner2.close();
Scan scan3=new Scan();
scan3.addColumn(Bytes.toBytes("basic_info"), Bytes.toBytes("name"));
Filter filter3 = new RowFilter(CompareOperator.LESS_OR_EQUAL,new BinaryComparator(Bytes.toBytes("2020")));
scan3.setFilter(filter3);
ResultScanner scanner3 = table.getScanner(scan3);
for (Result result : scanner3) {
System.out.println("row:" + new String(result.getRow(),"utf-8"));
for(Cell cell : result.listCells()){
System.out.println("basic_info:name " + new String(CellUtil.cloneValue(cell),"utf-8") );
}
}
scanner3.close();
conn.close();
/********* End *********/
}
}
第2关:使用正则表达式与子字符串匹配行键
知识点
比较器的子类
比较器描述BinaryComparator使用Bytes.compareTo()比较当前值与阈值BinaryPrefixComparator与上面类似,但是是从左端开始前缀匹配NullComparator不做匹配,只判断当前值是不是nullBitComparator通过BitwiseOp类提供的按位与(AND)、或(OR)、异或(XOR)操作执行位级比较RegexStringComparator(正则比较器)根据一个正则表达式,在实例化这个比较器的时候去匹配表中的数据SubStringComparator(子串过滤器)把阈值和表中数据当做String实例,同时通过contains()操作匹配字符串
编程要求
查询表
t2_student_table
中的数据。 要求如下:
- 查询以
1
开头,并以9
结尾的行键,并输出该行所有列的值; - 查询包含
231
的行键,并输出该行所有列的值。
package step2;
import java.io.IOException;
import org.apache.hadoop.cli.util.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.*;
import org.apache.hadoop.hbase.filter.SubstringComparator;
public class Task {
public void query() throws Exception {
/********* Begin *********/
Configuration config = new Configuration();
Connection conn = ConnectionFactory.createConnection(config);
TableName tablename=TableName.valueOf("t2_student_table");
Table table=conn.getTable(tablename);
Scan scan1=new Scan();
Filter filter1 = new RowFilter(CompareOperator.EQUAL,new RegexStringComparator("1.*9$")); //查询以1开头,并以9结尾的行键
scan1.setFilter(filter1);
ResultScanner scanner1 = table.getScanner(scan1);
for (Result result : scanner1) {
System.out.println("row:" + new String(result.getRow(),"utf-8"));
for(Cell cell : result.listCells()){
String family = Bytes.toString(CellUtil.cloneFamily(cell));
String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println(family + ":" + qualifier + " " + value);
}
}
scanner1.close();
Scan scan2=new Scan();
Filter filter2 = new RowFilter(CompareOperator.EQUAL,new SubstringComparator("231")); //查询包含231的行键
scan2.setFilter(filter2);
ResultScanner scanner2 = table.getScanner(scan2);
for (Result result : scanner2) {
System.out.println("row:" + new String(result.getRow(),"utf-8"));
for(Cell cell : result.listCells()){
String family = Bytes.toString(CellUtil.cloneFamily(cell));
String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println(family + ":" + qualifier + " " + value);
}
}
scanner2.close();
conn.close();
/********* End *********/
}
}
第3关:列族过滤器、值过滤器、列名过滤器
编程要求
使用过滤器对表
t3_student_table
完成如下查询操作:
- 查询行键
1019
中列族school_info
所有列,输出值; - 查询行键
2020
中,列名包含字母c
的所有列,输出值; - 查询表所有行中包含
张
的值,并输出该值。
package step3;
import java.io.IOException;
import org.apache.hadoop.cli.util.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.*;
import org.apache.hadoop.hbase.filter.SubstringComparator;
public class Task {
public void query() throws Exception {
/********* Begin *********/
Configuration config = new Configuration();
Connection conn = ConnectionFactory.createConnection(config);
TableName tableName = TableName.valueOf(Bytes.toBytes("t3_student_table"));
Table table = conn.getTable(tableName);
Filter filter1 = new FamilyFilter(CompareOperator.EQUAL,new BinaryComparator(Bytes.toBytes("school_info"))); //列族过滤器
Get get1 = new Get(Bytes.toBytes("1019"));
get1.setFilter(filter1);
Result result1 = table.get(get1);
System.out.println("row:" + new String(result1.getRow(),"utf-8"));
for(Cell cell : result1.listCells()){
String family = Bytes.toString(CellUtil.cloneFamily(cell));
String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println(family + ":" + qualifier + " " + value);
}
Filter filter2 = new QualifierFilter(CompareOperator.EQUAL,
new SubstringComparator("c")); //列名过滤器
Get get2 = new Get(Bytes.toBytes("2020"));
get2.setFilter(filter2);
Result result2 = table.get(get2);
System.out.println("row:" + new String(result2.getRow(),"utf-8"));
for(Cell cell : result2.listCells()){
String family = Bytes.toString(CellUtil.cloneFamily(cell));
String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println(family + ":" + qualifier + " " + value);
}
Scan scan3 = new Scan();
Filter filter3 = new ValueFilter(CompareOperator.EQUAL, new SubstringComparator("张")); //值过滤器
scan3.setFilter(filter3);
ResultScanner scanner3 = table.getScanner(scan3);
for (Result result : scanner3) {
System.out.println("row:" + new String(result.getRow(),"utf-8"));
for(Cell cell : result.listCells()){
String family = Bytes.toString(CellUtil.cloneFamily(cell));
String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
String value = Bytes.toString(CellUtil.cloneValue(cell));
System.out.println(family + ":" + qualifier + " " + value);
}
}
scanner3.close();
conn.close();
/********* End *********/
}
}
版权归原作者 咖啡不提神 所有, 如有侵权,请联系我们删除。