SparkSQL操作Hiveテーブルデータ

3875 ワード

  • Hadoop:./sbin/start-all.sh
  • Spark-Schell:./bin/spark-shell --master local[2]
  • scala> spark.sql("show tables").show
    +--------+---------+-----------+
    |database|tableName|isTemporary|
    +--------+---------+-----------+
    | default|     dept|      false|
    | default|      emp|      false|
    +--------+---------+-----------+
    scala> spark.sql("select * from emp").show
    +-----+------+---------+----+----------+-------+------+------+
    |empno| ename|      job| mgr|  hiredate|    sal|  comm|deptno|
    +-----+------+---------+----+----------+-------+------+------+
    | 7369| SMITH|    CLERK|7902|1980-12-17|  800.0|  null|    20|
    | 7499| ALLEN| SALESMAN|7698| 1981-2-20| 1600.0| 300.0|    30|
    | 7521|  WARD| SALESMAN|7698| 1981-2-22| 1250.0| 500.0|    30|
    | 7566| JONES|  MANAGER|7839|  1981-4-2| 2975.0|  null|    20|
    | 7654|MARTIN| SALESMAN|7698| 1981-9-28| 1250.0|1400.0|    30|
    | 7698| BLAKE|  MANAGER|7839|  1981-5-1| 2850.0|  null|    30|
    | 7782| CLARK|  MANAGER|7839|  1981-6-9| 2450.0|  null|    10|
    | 7788| SCOTT|  ANALYST|7566| 1987-4-19| 3000.0|  null|    20|
    | 7839|  KING|PRESIDENT|null|1981-11-17| 5000.0|  null|    10|
    | 7844|TURNER| SALESMAN|7698|  1981-9-8| 1500.0|   0.0|    30|
    | 7876| ADAMS|    CLERK|7788| 1987-5-23| 1100.0|  null|    20|
    | 7900| JAMES|    CLERK|7698| 1981-12-3|  950.0|  null|    30|
    | 7902|  FORD|  ANALYST|7566| 1981-12-3| 3000.0|  null|    20|
    | 7934|MILLER|    CLERK|7782| 1982-1-23| 1300.0|  null|    10|
    | 8888|  HIVE|  PROGRAM|7839| 1988-1-23|10300.0|  null|  null|
    +-----+------+---------+----+----------+-------+------+------+
    
    
    
    
    scala> spark.sql("select deptno,count(1) from emp group by deptno").show
    +------+--------+
    |deptno|count(1)|
    +------+--------+
    |  null|       1|
    |    20|       5|
    |    10|       3|
    |    30|       6|
    +------+--------+
    
    
    
    scala> spark.sql("select deptno,count(1) from emp group by deptno").filter("deptno is not null").show
    +------+--------+
    |deptno|count(1)|
    +------+--------+
    |    20|       5|
    |    10|       3|
    |    30|       6|
    +------+--------+
    
    spark.sql("select deptno,count(1) from emp group by deptno").filter("deptno is not null").write.saveAsTable("hive_table")
    
    org.apache.spark.sql.AnalysisException: Attribute name "count(1)" contains invalid character(s) among " ,;{}()
    \t=". Please use alias to rename it.;
    scala> spark.sql("select deptno,count(1) as mount from emp group by deptno").filter("deptno is not null").write.saveAsTable("hive_table")
    Warning: fs.defaultFS is not set when running "chgrp" command.                  
    Warning: fs.defaultFS is not set when running "chmod" command.
    
    
    scala> spark.sql("show tables")
    res8: org.apache.spark.sql.DataFrame = [database: string, tableName: string ... 1 more field]
    
    
    scala> spark.sql("show tables").show
    +--------+----------+-----------+
    |database| tableName|isTemporary|
    +--------+----------+-----------+
    | default|      dept|      false|
    | default|       emp|      false|
    | default|hive_table|      false|
    +--------+----------+-----------+
    
    scala> spark.sql("select * from hive_table").show
    +------+-----+
    |deptno|mount|
    +------+-----+
    |    20|    5|
    |    10|    3|
    |    30|    6|
    +------+-----+
    
    
    
    
    scala> spark.table("hive_table").show
    +------+-----+
    |deptno|mount|
    +------+-----+
    |    20|    5|
    |    10|    3|
    |    30|    6|
    +------+-----+
    
    spark.sqlContext.setConf("spark.sql.shuffle.partitions","10")
    
    
                 spark.sql.shuffle.partitions,   200