编译
从源码编译安装最新版本
git https://github.com/apache/incubator-hivemall.git
cd incubator-hivemall;
ubuntu@id-cdh-manager:/usr/local/datadisk/incubator-hivemall$ git branch -a
* master
remotes/origin/HEAD -> origin/master
remotes/origin/dependabot/maven/hive.version-3.1.2
remotes/origin/dev/v0.4.2
remotes/origin/master
remotes/origin/v0.5.0
remotes/origin/v0.5.2
remotes/origin/v0.6.0
ubuntu@id-cdh-manager:/usr/local/datadisk/incubator-hivemall$ git checkout -b remotes/origin/v0.6.0
Switched to a new branch 'remotes/origin/v0.6.0'
ubuntu@id-cdh-manager:/usr/local/datadisk/incubator-hivemall$
bin/build.sh
[INFO] Scanning for projects...
[INFO] ------------------------------------------------------------------------
[INFO] Reactor Build Order:
[INFO]
[INFO] Apache Hivemall [pom]
[INFO] Hivemall Core [jar]
[INFO] Hivemall NLP [jar]
[INFO] Hivemall XGBoost Integration [jar]
[INFO] Hivemall Mix Server [jar]
[INFO] Hivemall on Apache Spark [pom]
[INFO] Hivemall on Spark Common [jar]
[INFO] Hivemall on Spark 2.2 [jar]
[INFO] Hivemall on Spark 2.3 [jar]
[INFO] Hivemall Distribution [jar]
[INFO] Hivemall Tools [pom]
[INFO] Hivemall Documentation Tool [maven-plugin]
[INFO]
[INFO] --------------------< org.apache.hivemall:hivemall >--------------------
[INFO] Building Apache Hivemall 0.6.0-incubating-SNAPSHOT [1/12]
hdfs dfs -put target ./hivemall
hdfs dfs -ls ./hivemall
安装
连接hive:beeline -u "jdbc:hive2://172.31.17.252:10000/" -n username
CREATE DATABASE IF NOT EXISTS hivemall;
USE hivemall;
set hivevar:hivemall_jar=hdfs:///user/hive/hivemall/hivemall-all-0.6.0-incubating-SNAPSHOT.jar
--set hivevar:hivemall_jar=hdfs:///user/hive/hivemall/hivemall-all-0.6.2-incubating-SNAPSHOT.jar
source /home/ubuntu/data/xuefeng/incubator-hivemall/
问题 CatalogServiceCatalog.java:517 Ignoring load of incompatible Java function
当重启impala后,udf有问题不影响impala使用,但是函数太多的时候,impala deamon 进程日志一直等待 catalog 初始化。
暂时没有考虑怎么解决不兼容的问题,用
define-need-premanent
文件包含所有的函数,只创建有用的函数来
use hivemall;
-- 用source来执行SQL的时候,SQL里面可以引用前面定义的变量
set hivevar:hivemall_jar=hdfs:///user/hive/hivemall/hivemall-all-0.6.0-incubating-SNAPSHOT.jar;
source /home/ubuntu/data/xuefeng/incubator-hivemall/define-need-premanent.hive;
我第二次升级的时候,只替换了
hdfs dfs -mv hivemall/hivemall-all-0.6.2-incubating-SNAPSHOT.jar ./hivemall/hivemall-all-0.6.0-incubating-SNAPSHOT.jar
测试
show functions "hivemall.*";
> hivemall.adadelta
> hivemall.adagrad
SELECT hivemall_version();
select
haversine_distance(35.6833, 139.7667, 34.6603, 135.5232) as km,
haversine_distance(35.6833, 139.7667, 34.6603, 135.5232, true) as mile;