- Kiran Dalvi
- 15 Dec, 2021
- 0 Comments
- 2 Mins Read
Hive Bucketing
Hive Bucketing
Click on below link to download dataset
Create Bucketing table with partition column
CREATE TABLE Temperature( Country string, City string, Month string, Day int, year int, AvgTemperature int) PARTITIONED BY(Region string) CLUSTERED BY (City) INTO 20 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
Create Non Bucket Table as temporary
CREATE TABLE temp_Temperature( Region string, Country string, City string, Month string, Day int, year int, AvgTemperature int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
Set hive bucketing properties
set hive.exec.dynamic.partition=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.max.dynamic.partitions.pernode=1000; set hive.enforce.bucketing = true;
Load data from non bucket to bucket table
insert overwrite table Temperature partition(Region) select Country,City,Month,Day,year,AvgTemperature,region from temp_temperature;
Verify data from select query
select * from Temperature where region='Africa' and city='Lusaka'
Verify Bucketing data from Hdfs location
hdoop@hadoop:~$ hdfs dfs -ls /user/hive/warehouse/bucket.db/temperature/ Found 6 items drwxr-xr-x - hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Africa drwxr-xr-x - hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Asia drwxr-xr-x - hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Australia%2FSouth Pacific drwxr-xr-x - hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Europe drwxr-xr-x - hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Middle East drwxr-xr-x - hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Region hdoop@hadoop:~$ hdoop@hadoop:~$ hdoop@hadoop:~$ hdfs dfs -ls /user/hive/warehouse/bucket.db/temperature/region=Africa Found 20 items -rw-r--r-- 1 hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Africa/000000_0 -rw-r--r-- 1 hdoop supergroup 438268 2021-12-16 00:25 /user/hive/warehouse/bucket.db/temperature/region=Africa/000001_0 -rw-r--r-- 1 hdoop supergroup 752713 2021-12-16 00:25 /user/hive/warehouse/bucket.db/temperature/region=Africa/000002_0 -rw-r--r-- 1 hdoop supergroup 509013 2021-12-16 00:25 /user/hive/warehouse/bucket.db/temperature/region=Africa/000003_0 -rw-r--r-- 1 hdoop supergroup 231246 2021-12-16 00:25 /user/hive/warehouse/bucket.db/temperature/region=Africa/000004_0 -rw-r--r-- 1 hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Africa/000005_0 -rw-r--r-- 1 hdoop supergroup 334932 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000006_0 -rw-r--r-- 1 hdoop supergroup 518782 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000007_0 -rw-r--r-- 1 hdoop supergroup 417141 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000008_0 -rw-r--r-- 1 hdoop supergroup 260511 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000009_0 -rw-r--r-- 1 hdoop supergroup 537009 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000010_0 -rw-r--r-- 1 hdoop supergroup 249772 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000011_0 -rw-r--r-- 1 hdoop supergroup 582428 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000012_0 -rw-r--r-- 1 hdoop supergroup 746306 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000013_0 -rw-r--r-- 1 hdoop supergroup 732124 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000014_0 -rw-r--r-- 1 hdoop supergroup 579341 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000015_0 -rw-r--r-- 1 hdoop supergroup 342511 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000016_0 -rw-r--r-- 1 hdoop supergroup 268305 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000017_0 -rw-r--r-- 1 hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Africa/000018_0 -rw-r--r-- 1 hdoop supergroup 167846 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Africa/000019_0
Bucketing in Hive : Querying from a particular bucket
SELECT col_name FROM table_name TABLESAMPLE(BUCKET x out of n on bucket_col_name)