From 6426e78e71136d8324520c2d0bd9da118c1fc796 Mon Sep 17 00:00:00 2001 From: wangmingjin163 <529673697@qq.com> Date: Tue, 20 Aug 2024 17:17:10 +0800 Subject: [PATCH] Fix: Corrected issue with merging Parquet files without Field ID --- .../GenericCombinedIcebergDataReader.java | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/amoro-core/src/main/java/org/apache/amoro/io/reader/GenericCombinedIcebergDataReader.java b/amoro-core/src/main/java/org/apache/amoro/io/reader/GenericCombinedIcebergDataReader.java index c16d233941..2e928f87ed 100644 --- a/amoro-core/src/main/java/org/apache/amoro/io/reader/GenericCombinedIcebergDataReader.java +++ b/amoro-core/src/main/java/org/apache/amoro/io/reader/GenericCombinedIcebergDataReader.java @@ -42,6 +42,7 @@ import org.apache.iceberg.encryption.EncryptionManager; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.mapping.NameMappingParser; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.TypeUtil; @@ -191,13 +192,15 @@ private CloseableIterable openFile( return avro.build(); case PARQUET: - Parquet.ReadBuilder parquet = - Parquet.read(input) - .project(fileProjection) - .createReaderFunc( - fileSchema -> - GenericParquetReaders.buildReader( - fileProjection, fileSchema, idToConstant)); + Parquet.ReadBuilder parquet = Parquet.read(input) + .project(fileProjection) + .createReaderFunc(fileSchema -> + GenericParquetReaders.buildReader(fileProjection, fileSchema, idToConstant)); + + //Fix the issue that parquet file schema without field ID ,then compact misaligned columns issue + if (nameMapping != null && !nameMapping.isEmpty()) { + parquet.withNameMapping(NameMappingParser.fromJson(nameMapping)); + } if (reuseContainer) { parquet.reuseContainers();