Updated: 2021-07-04 23:22:54 CST +08

AWS Athena Create Table Manually

  1. Create Table

    CREATE EXTERNAL TABLE `20210121_test_log`(
      `uuid` string,
      `hostid` string,
      `hostname` string,
      `osname` string)
    PARTITIONED BY (
      `x` string,
      `year` string,
      `month` string,
      `day` string)
    ROW FORMAT SERDE
      'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
    STORED AS INPUTFORMAT
      'org.apache.hadoop.mapred.TextInputFormat'
    OUTPUTFORMAT
      'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
    LOCATION
      's3://dinolai-test-log/'
    TBLPROPERTIES (
      'has_encrypted_data'='false',
      'transient_lastDdlTime'='1611144510')
    
  2. Add Partition

    ALTER TABLE `20210121_test_log` ADD
      PARTITION (x = '0', year = '2021', month = '01', day = '20')
      PARTITION (x = '1', year = '2021', month = '01', day = '20')
      PARTITION (x = '2', year = '2021', month = '01', day = '20')
      PARTITION (x = '3', year = '2021', month = '01', day = '20')
      PARTITION (x = '4', year = '2021', month = '01', day = '20')
      PARTITION (x = '5', year = '2021', month = '01', day = '20')
      PARTITION (x = '6', year = '2021', month = '01', day = '20')
      PARTITION (x = '7', year = '2021', month = '01', day = '20')
      PARTITION (x = '8', year = '2021', month = '01', day = '20')
      PARTITION (x = '9', year = '2021', month = '01', day = '20')
      PARTITION (x = 'a', year = '2021', month = '01', day = '20')
      PARTITION (x = 'b', year = '2021', month = '01', day = '20')
      PARTITION (x = 'c', year = '2021', month = '01', day = '20')
      PARTITION (x = 'd', year = '2021', month = '01', day = '20')
      PARTITION (x = 'e', year = '2021', month = '01', day = '20')
      PARTITION (x = 'f', year = '2021', month = '01', day = '20')
    ;
    
  3. Query

    SELECT osname, hostname, count(distinct(hostid)) as host_count, count(*) as log_count
    FROM "default"."20210121_test_log"
    WHERE osname = 'linux'
    GROUP BY osname;