hive通过元数据库删除分区操作步骤

发布于:2025-04-08 ⋅ 阅读:(38) ⋅ 点赞:(0)

删除分区失败:
alter table proj_60_finance.dwd_fm_ma_kpi_di_mm drop partition(year=2025,month=0-3,type=ADJ);

1、查询分区的DB_ID、TBL_ID
– 获取数据库ID-26110
SELECT DB_ID FROM DBS WHERE NAME = ‘proj_60_finance’;
– 获取表ID-307194
SELECT TBL_ID FROM TBLS WHERE TBL_NAME = ‘dwd_fm_ma_kpi_di_mm’ AND DB_ID = 26110;

2、 查询分区的PART_ID和SD_ID
– 根据分区键值查找PART_ID
–第一个分区键year的值
–第二个分区键month的值
–第三个分区键type的值
SELECT p., pk1., pk2., pk3.
FROM PARTITIONS p
JOIN PARTITION_KEY_VALS pk1 ON p.PART_ID = pk1.PART_ID
JOIN PARTITION_KEY_VALS pk2 ON p.PART_ID = pk2.PART_ID
JOIN PARTITION_KEY_VALS pk3 ON p.PART_ID = pk3.PART_ID
WHERE p.TBL_ID = 307194
AND pk1.INTEGER_IDX = 0 AND pk1.PART_KEY_VAL = ‘2025’
AND pk2.INTEGER_IDX = 1 AND pk2.PART_KEY_VAL = ‘0-3’
AND pk3.INTEGER_IDX = 2 AND pk3.PART_KEY_VAL = ‘ADJ’;

3、 删除相关元数据
– 删除分区键值(PARTITION_KEY_VALS)
select * FROM PARTITION_KEY_VALS WHERE PART_ID = 18620706;
DELETE FROM PARTITION_KEY_VALS WHERE PART_ID = 18620706;
– 删除分区参数(PARTITION_PARAMS,如果有)
select * FROM PARTITION_PARAMS WHERE PART_ID = 18620706;
DELETE FROM PARTITION_PARAMS WHERE PART_ID = 18620706;
– 删除分区记录(PARTITIONS)
select * FROM PARTITIONS WHERE PART_ID = 18620706;
DELETE FROM PARTITIONS WHERE PART_ID = 18620706;
– 删除存储描述(SDS,确保无其他引用)
select * FROM SDS WHERE SD_ID = 23951880;
DELETE FROM SDS WHERE SD_ID = 23951880;
– 可选:删除统计信息(PART_COL_STATS)
select * FROM PART_COL_STATS WHERE PART_ID = 18620706;
DELETE FROM PART_COL_STATS WHERE PART_ID = 18620706;

4、 手动清理HDFS数据
hadoop fs -rm -r /hive/path/to/partition # 替换为实际路径

5、刷新hive元缓存
msck repair table proj_60_finance.dwd_fm_ma_kpi_di_mm