第1关 UDF——“一进一出”
package myudf;
import org.apache.hadoop.hive.ql.exec.UDF;
public class AvgCost extends UDF {
public String evaluate(String raw){
/************** Begin **************/
if (!"".equals(raw)&&raw!=null){
if (raw.contains("省")){
raw=raw.substring(0,raw.length()-1);
}else{
raw=raw+"市";
}
return raw;
}else {
System.out.println("111");
return raw;
}
/************** End **************/
}
}
点击评测后,进入命令行
Linux操作:
#进入工作目录。
cd /data/workspace/myshixun/step1
#mvn打包(打包过程可能会需要些时间,请耐心等候)。
mvn clean package
Hive操作:
#打开hive,
hive --service cli
#输入建表命令。
create table comment( com_no string,com_food string,com_province string,com_price string,com_content string) row format delimited fields terminated by "," stored as textfile;
#导入数据
load data local inpath "/data/workspace/myshixun/step1/data.txt" into table comment;
导入jar包
add jar /data/workspace/myshixun/step1/target/step1-1.0-SNAPSHOT.jar;
创建临时自定义函数
create temporary function procost as 'myudf.AvgCost';
新建 Hive 表newdata。
create table newdata(com_no string,com_food string,new_province string,com_price string,com_content string) row format delimited fields terminated by "," stored as textfile;
导入利用自定函数查询出来的数据至表`newdata`。
insert overwrite table newdata select com_no,com_food,procost(com_province),com_price,com_content from comment;
第2关 UDAF——“多进一出”
package myudaf;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
import org.apache.hadoop.io.IntWritable;
public class FindMax extends UDAF {
public static class FindMaxUDAFEvaluator implements UDAFEvaluator {
private IntWritable result;
/*
init函数类似于构造函数,用于UDAF的初始化。
*/
@Override
public void init() {
result = null;
}
/*
iterate接收传入的参数,并进行内部的轮转。其返回类型为boolean。
*/
public boolean iterate(IntWritable value) {
/*********** Begin ***********/
//判断是value值否为空
if (value == null)
return false;
//判断result是否为空,是则将value的值赋值给result,否则将俩比谁更大,然后赋值给result。
if (result == null)
result = new IntWritable(value.get());
else
result.set(Math.max(result.get(), value.get()));
/*********** End ***********/
return true;
}
/*
terminatePartial无参数,其为iterate函数遍历结束后,返回轮转数据
*/
public IntWritable terminatePartial()
{
return result;
}
/*
merge接收terminatePartial的返回结果,进行数据merge操作,其返回类型为boolean
*/
public boolean merge(IntWritable other)
{
return iterate(other);
}
//Hive最终聚集结果的时候就会调用该方法。
public IntWritable terminate()
{
return result;
}
}
}
点击评测后,进入命令行
Linux操作:
#进入工作目录。
cd /data/workspace/myshixun/step2
#mvn打包(打包过程可能会需要些时间,请耐心等候)。
mvn clean package
Hive操作:
#打开hive
hive --service cli
#输入建表命令
create table studentscore(stu_no int,stu_name string,course_name string,scores int) row format delimited fields terminated by "," stored as textfile;
#导入数据
load data local inpath "/data/workspace/myshixun/step2/data.txt" into table studentscore;
#新建表newdata2
create table newdata2(course_name string,max_score string) row format delimited fields terminated by "," stored as textfile;
#在Hive中导入jar包
add jar /data/workspace/myshixun/step2/target/step2-1.0-SNAPSHOT.jar;
#创建临时自定义函数
create temporary function findmax as 'myudaf.FindMax';
#导入利用自定函数查询出来的数据至表newdata2
insert overwrite table newdata2 select course_name,findmax(scores) from studentscore group by course_name;
第3关 UDTF——“一进多出”
package myudtf;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
public class CostUDTF extends GenericUDTF {
@Override
public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
// 异常检测
if (args.length != 1) {
throw new UDFArgumentException("NameParserGenericUDTF() takes exactly one argument");
}
if(args[0].getCategory()!=ObjectInspector.Category.PRIMITIVE&&((PrimitiveObjectInspector) args[0]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
throw new UDFArgumentException("NameParserGenericUDTF() takes a string as a parameter");
}
ArrayList<String> fieldNames = new ArrayList<String>();
//输入参数的ObjectInspector可以帮助我们序列化对象
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
fieldNames.add("name");
//将工厂产生的String类型的数据放入
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("sex");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,fieldOIs);
}
@Override
public void process(Object[] args) throws HiveException {
//处理字段
String input = args[0].toString();
String[] test = input.split(";");
for(int i=0; i<test.length; i++) {
try {
String[] result = test[i].split(":");
//输出结果,forward是GenericUDTF自带的
forward(result);
} catch (Exception e) {
continue;
}
}
}
@Override
public void close() throws HiveException {
}
}
点击评测后,进入命令行
Linux操作:
#进入工作目录。
cd /data/workspace/myshixun/step3
#mvn打包(打包过程可能会需要些时间,请耐心等候)。
mvn clean package
Hive操作:
#打开Hive
hive --service cli
#Hive建表
create table usertable(user_no int,user_info string) row format delimited fields terminated by "," stored as textfile;
#导入数据
load data local inpath "/data/workspace/myshixun/step3/data.txt" into table usertable;
#在Hive中导入jar包
add jar /data/workspace/myshixun/step3/target/step3-1.0-SNAPSHOT.jar;
#创建临时自定义函数
create temporary function usercost as 'myudtf.CostUDTF';
#创建新表
create table newuser(user_field string,user_info string) row format delimited fields terminated by "," stored as textfile;
#将查询出来的用户3的信息导入到新表内
insert overwrite table newuser select usercost(user_info) from usertable where user_no = 3;
本文转载自: https://blog.csdn.net/qq_56857828/article/details/127770853
版权归原作者 咕噜咕噜咚~~ 所有, 如有侵权,请联系我们删除。
版权归原作者 咕噜咕噜咚~~ 所有, 如有侵权,请联系我们删除。