免费注册
帮助文档(华北一、二)

  • 1. 使用JAVA连接HiveServer2(实现创建表格、加载数据,展示数据操作)

    此示例需要您先登陆托管Hadoop集群master1节点,以下操作默认在master1节点执行

    org.apache.hive.jdbc.HiveDriver是hiveserver2的dirvername,hiveserver2的访问地址是"jdbc:hive2://ip:10000/default"。

    ● 编写示例代码

    示例代码Hive2JdbcClient.java如下:

     
    import java.sql.SQLException; 
    import java.sql.Connection; 
    import java.sql.ResultSet; 
    import java.sql.Statement; 
    import java.sql.DriverManager;   
    public class Hive2JdbcClient {     
      private static String driverName = "org.apache.hive.jdbc.HiveDriver";   
      /**      
      * @param args      
      * @throws SQLException      
      */     
      public static void main(String[] args) throws SQLException {         
        try {             
          Class.forName(driverName);         
        } catch (ClassNotFoundException e) {             
          // TODO Auto-generated catch block             
          e.printStackTrace();             
          System.exit(1);         
        }         
        //replace "hive" here with the name of the user the queries should run as         
        Connection con = DriverManager.getConnection("jdbc:hive2://uhadoop-******-
    master2:10000/default", "", "");         
        Statement stmt = con.createStatement();         
        String tableName = "testHive2DriverTable";         
        stmt.execute("drop table if exists " + tableName);   
        stmt.execute("create table " + tableName + " (key int, value string)");         
        // show tables         
        String sql = "show tables '" + tableName + "'";         
        System.out.println("Running: " + sql);         
        ResultSet res = stmt.executeQuery(sql);         
        if (res.next()) {             
          System.out.println(res.getString(1));         
        }         
        // describe table         
        sql = "describe " + tableName;         
        System.out.println("Running: " + sql);         
        res = stmt.executeQuery(sql);         
        while (res.next()) {             
          System.out.println(res.getString(1) + "\t" + res.getString(2));         
        }           
        // load data into table         
        // NOTE: filepath has to be local to the hive server         
        // NOTE: /tmp/a.txt is a ctrl-A separated file with two fields per line         
        String filepath = "/user/hive/warehouse/b.txt";         
        sql = "load data inpath '" + filepath + "' into table " + tableName;         
        System.out.println("Running: " + sql);         
        stmt.execute(sql);           
        // select * query         
        sql = "select * from " + tableName;         
        System.out.println("Running: " + sql);         
        res = stmt.executeQuery(sql);         
        while (res.next()) {                 
          System.out.println(String.valueOf(res.getInt(1)) + "\t" + res.getString(2));         
        }           
        // regular hive query         
        sql = "select count(1) from " + tableName;         
        System.out.println("Running: " + sql);         
        res = stmt.executeQuery(sql);         
        while (res.next()) {             
          System.out.println(res.getString(1));         
        }     
      } 
    }

    注解:

    Connection con = DriverManager.getConnection("jdbc:hive2://uhadoop-******-master2:10000/default", "", "");

    uhadoop-******-master2须改成您集群master2节点的主机名称或者IP。

    - 编译

       javac Hive2JdbcClient.java

    ● 执行程序

    test.sh代码如下:

     
    #!/bin/bash 
    hdfs dfs -rm /user/hive/warehouse/b.txt 
    echo -e '1\x01foo' > /tmp/b.txt 
    echo -e '2\x01bar' >> /tmp/b.txt 
    hdfs dfs -put /tmp/b.txt /user/hive/warehouse/ 
    HADOOP_HOME=/home/hadoop/ 
    CLASSPATH=.:$HIVE_HOME/conf 
    for i in ${HADOOP_HOME}/share/hadoop/mapreduce/lib/hadoop-*.jar ; do     
      CLASSPATH=$CLASSPATH:$i 
    done 
    for i in ${HADOOP_HOME}/share/hadoop/mapreduce/hadoop-*.jar ; do     
      CLASSPATH=$CLASSPATH:$i 
    Done 
    for i in ${HADOOP_HOME}/share/hadoop/common/lib/hadoop-*.jar ; do     
      CLASSPATH=$CLASSPATH:$i 
    done 
    for i in ${HADOOP_HOME}/share/hadoop/common/hadoop-*.jar ; do     
      CLASSPATH=$CLASSPATH:$i 
    done   
    for i in ${HIVE_HOME}/lib/*.jar ; do     
      CLASSPATH=$CLASSPATH:$i 
    done 
    java -cp $CLASSPATH Hive2JdbcClient

    2. 使用Python连接HiveServer2(实现创建表格、加载数据,展示数据操作)

    Hiveserver2使用python客户端的过程如下:

    ● 下载pyhs2 git clone :

    https://github.com/BradRuderman/pyhs2.git

    ● 安装依赖:yum install gcc-c++ cyrus-sasl-* python-devel

    ● 安装setuptools:

    wget -q http://peak.telecommunity.com/dist/ez_setup.py ./python ez_setup.py

    如果上面方式安装失败需要手动下载setuptools-0.6c11.tar.gz安装包安装

    - 编译安装pyhs2

    进入pyhs2目录并安装

     
    cd pyhs2 
    python setup.py build 
    python setup.py install

    编写示例代码

    示例代码,即pyhs2下example.py

     
    import pyhs2 
    with pyhs2.connect(host='uhadoop-******-master2',                    
            port=10000,                    
            authMechanism="PLAIN",                    
            user='root',                    
            password='test',                    
            database='default') as conn:     
      with conn.cursor() as cur:         
        #Show databases         
        print cur.getDatabases()           
        #Execute query         
        cur.execute("select * from test_hive")           
        #Return column info from query         
        print cur.getSchema()           
        #Fetch table results         
        for i in cur.fetch():             
          print i

    3. Hive外表读取HBase数据

    通过在Hive中创建HBase外表,可利用简单的sql语句分析HBase的非结构化数据

    打开HBase shell,创建t1表

     
    create 't1',{NAME => 'f1',VERSIONS => 2} 
    put 't1','rowkey001','f1:col1','value01' 
    put 't1','rowkey001','f1:col2','value02' 
    put 't1','rowkey001','f1:colf','value03' 
    scan 't1'

    得到的t1表结构如下:

     
    hbase(main):013:0> scan 't1' 
    ROW                                            COLUMN+CELL 
    rowkey001                                     column=f1:col1, 
    timestamp=1481075364575, value=value01 
    rowkey001                                     column=f1:col2, 
    timestamp=1481075364607, value=value02 
    rowkey001                                     column=f1:colf, 
    timestamp=1481075364641, value=value03

    打开Hive Cli,创建外表:

     
    hive> CREATE EXTERNAL TABLE t_hive_hbase(     
      > rowkey string,     
      > cf map<STRING,STRING>     
      > )     
      > STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'     
      > WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f1:")     
      > TBLPROPERTIES ("hbase.table.name" = "t1");

    使用sql语句读取hbase数据,结果如下:

     
    hive> select * from  t_hive_hbase; 
    OK 
    rowkey001   {"col1":"value01","col2":"value02","colf":"value03"}

文档是否已解决您的问题?

  已解决   未解决

如您有其它疑问,您也可以与我们技术专家联系探讨。

联系技术专家