Blog

Hive Bucketing

Click on below link to download dataset

Create Bucketing table with partition column

CREATE TABLE Temperature(
Country string,
City string,
Month string,
Day int,
year int,
AvgTemperature int)
PARTITIONED BY(Region string)
CLUSTERED BY (City) INTO 20 BUCKETS
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',';

Create Non Bucket Table as temporary

CREATE TABLE temp_Temperature(
Region string,
Country string,
City string,
Month string,
Day int,
year int,
AvgTemperature int)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ',';

Set hive bucketing properties

set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=1000;
set hive.enforce.bucketing = true;

Load data from non bucket to bucket table

 
insert overwrite table Temperature partition(Region) select Country,City,Month,Day,year,AvgTemperature,region from temp_temperature;

Verify data from select query

select * from Temperature where region='Africa' and city='Lusaka'

Verify Bucketing data from Hdfs location


hdoop@hadoop:~$ hdfs dfs -ls /user/hive/warehouse/bucket.db/temperature/
Found 6 items
drwxr-xr-x - hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Africa
drwxr-xr-x - hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Asia
drwxr-xr-x - hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Australia%2FSouth Pacific
drwxr-xr-x - hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Europe
drwxr-xr-x - hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Middle East
drwxr-xr-x - hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Region
hdoop@hadoop:~$
hdoop@hadoop:~$
hdoop@hadoop:~$ hdfs dfs -ls /user/hive/warehouse/bucket.db/temperature/region=Africa
Found 20 items
-rw-r--r-- 1 hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Africa/000000_0
-rw-r--r-- 1 hdoop supergroup 438268 2021-12-16 00:25 /user/hive/warehouse/bucket.db/temperature/region=Africa/000001_0
-rw-r--r-- 1 hdoop supergroup 752713 2021-12-16 00:25 /user/hive/warehouse/bucket.db/temperature/region=Africa/000002_0
-rw-r--r-- 1 hdoop supergroup 509013 2021-12-16 00:25 /user/hive/warehouse/bucket.db/temperature/region=Africa/000003_0
-rw-r--r-- 1 hdoop supergroup 231246 2021-12-16 00:25 /user/hive/warehouse/bucket.db/temperature/region=Africa/000004_0
-rw-r--r-- 1 hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Africa/000005_0
-rw-r--r-- 1 hdoop supergroup 334932 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000006_0
-rw-r--r-- 1 hdoop supergroup 518782 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000007_0
-rw-r--r-- 1 hdoop supergroup 417141 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000008_0
-rw-r--r-- 1 hdoop supergroup 260511 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000009_0
-rw-r--r-- 1 hdoop supergroup 537009 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000010_0
-rw-r--r-- 1 hdoop supergroup 249772 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000011_0
-rw-r--r-- 1 hdoop supergroup 582428 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000012_0
-rw-r--r-- 1 hdoop supergroup 746306 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000013_0
-rw-r--r-- 1 hdoop supergroup 732124 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000014_0
-rw-r--r-- 1 hdoop supergroup 579341 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000015_0
-rw-r--r-- 1 hdoop supergroup 342511 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000016_0
-rw-r--r-- 1 hdoop supergroup 268305 2021-12-16 00:26 /user/hive/warehouse/bucket.db/temperature/region=Africa/000017_0
-rw-r--r-- 1 hdoop supergroup 0 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Africa/000018_0
-rw-r--r-- 1 hdoop supergroup 167846 2021-12-16 00:27 /user/hive/warehouse/bucket.db/temperature/region=Africa/000019_0

Bucketing in Hive : Querying from a particular bucket

SELECT col_name FROM table_name TABLESAMPLE(BUCKET x out of n on bucket_col_name)

Follow me

Contact us for Training/ Job Support

Caution: Your use of any information or materials on this website is entirely at your own risk. It is provided for educational purposes only. It has been tested internally, however, we do not guarantee that it will work for you. Ensure that you run it in your test environment before using.