ApacheHudi表⽬录结构
Apache Hudi 表⽬录结构
记录**⼀部分** 表⽬录结构和⽂件格式。便于对 Hudi 的设计理念和表的组织格式进⾏直观的理解。
数据由运⾏后产⽣。
⼀、COW 表类型组织逻辑
表名字为 stock_ticks_cow
Permission Owner Group Size Last Modified Replication Block Size Name
drwxr-xr-x root supergroup0 B Oct 15 16:2000 B.hoodie drwxr-xr-x root supergroup0 B Oct 15 15:4500 B2018
1.1 ?./tablename/.hoodie
Permission Owner Group Size Last Modified Replication Block Size Name
-rw-r–r--root supergroup968 B Oct 15 15:llback
-rw-r–r--root supergroup0 B Oct 15 15:llback.inflight
-rw-r–r--root supergroup 2.2 KB Oct 15 15:it
-rw-r–r--root supergroup0 B Oct 15 15:it.requested
-rw-r–r--root supergroup350 B Oct 15 15:453128 MB20201015074529.inflight
-rw-r–r--root supergroup 2.21 KB Oct 15 16:it
-rw-r–r--root supergroup0 B Oct 15 16:it.requested尼内特
-rw-r–r--root supergroup 1.01 KB Oct 15 16:203128 MB20201015082021.inflight
-rw-r–r--root supergroup213 B Oct 15 15:363128 MB hoodie.properties
drwxr-xr-x root supergroup0 B Oct 15 15:3600 B.aux
drwxr-xr-x root supergroup0 B Oct 15 16:p
drwxr-xr-x root supergroup0 B Oct 15 15:3600 B archived
1.1.1 ?.rollback ⽂件格式
Objavro.schemaÚ
{"type":"record","name":"HoodieRollbackMetadata","namespace":"org.apache.del","fields":[{"name":"startRollbackTime","type":{"type":"string"," avro.java.string":"String"}},{"name":"timeTakenInMillis","type":"long"},{"name":"totalFilesDeleted","type":"int"},{"name":"commitsRollback","type":{"type":"arra y","items":{"type":"string","avro.java.string":"String"}}},{"name":"partitionMetadata","type":{"type":"map","values":{"type":"record","name":"HoodieRollbackPar titionMetadata","fields":[{"name":"partitionPath","type":{"type":"string","avro.java.string":"String"}},{"name":"successDeleteFiles","type":{"type":"array","items": {"type":"string","avro.java.string":"String"}}},{"name":"failedDeleteFiles","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}}]},"avro.java. string":"String"}},{"name":"version","type":["int","null"],"default":1}]}
1.1.2 ?.rollback.inflight ⽂件格式
暂⽆复现场景
1.1.3 ?.commit ⽂件格式
"path":"2018/08/31/8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-22_20201015074529.parquet",
"prevCommit":"null",
"numWrites":197,
"numDeletes":0,
"numUpdateWrites":0,take care of
"numInrts":197,
"totalWriteBytes":443701,
"totalWriteErrors":0,
"tempPath":null,
"partitionPath":"2018/08/31",
"totalLogRecords":0,
"totalLogFilesCompacted":0,
"totalLogSizeCompacted":0,
"totalUpdatedRecordsCompacted":0,
"totalLogBlocks":0,
"totalCorruptLogBlock":0,
"totalRollbackBlocks":0,
"fileSizeInBytes":443701
}]
},
"compacted":fal,
"extraMetadata":{
"ROLLING_STAT":"{\n \"partitionToRollingStats\" : {\n \"2018/08/31\" : {\n \"8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0\" : {\n \"fileId\" : \"8c8d b0e3-964d-4a2c-b0fe-f4306d08b5c8-0\",\n \"inrts\" : 197,\n \"uprts\" : 0,\n \"deletes\" : 0,\n \"totalInputWriteBytesToDisk\" : 0,\n \" totalInputWriteBytesOnDisk\" : 443701\n }\n }\n },\n \"actionType\" : \"commit\"\n}",
"schema":"{\"type\":\"record\",\"name\":\"stock_ticks\",\"fields\":[{\"name\":\"volume\",\"type\":\"long\"},{\"name\":\"ts\",\"type\":\"string\"},{\"name\":\"symbol\ ",\"type\":\"string\"},{\"name\":\"year\",\"type\":\"int\"},{\"name\":\"month\",\"type\":\"string\"},{\"name\":\"high\",\"type\":\"double\"},{\"name\":\"low\",\"type\":\"dou ble\"},{\"name\":\"key\",\"type\":\"string\"},{\"name\":\"date\",\"type\":\"string\"},{\"name\":\"clo\",\"type\":\"double\"},{\"name\":\"open\",\"type\":\"double\"},{\"n ame\":\"day\",\"type\":\"string\"}]}",
"deltastreamer.checkpoint.key":"stock_ticks,0:3482"
},
"fileIdAndRelativePaths":{
"8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0":"2018/08/31/8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-22_20201015074529.parquet"
},
"totalRecordsDeleted":0,
"totalLogRecordsCompacted":0,
"totalScanTime":0,
"totalCreateTime":793,
"totalUprtTime":0,
"totalCompactedRecordsUpdated":0,
"totalLogFilesCompacted":0,
"totalLogFilesSize":0
}
swl
1.1.4 ?.quested ⽂件格式
暂⽆复现场景
1.1.5 ?.inflight ⽂件格式
"path":null,
"prevCommit":"20201015074529",
"numWrites":0,
"numDeletes":0,
"numUpdateWrites":99,
"numInrts":0,
"totalWriteBytes":0,
"totalWriteErrors":0,
"tempPath":null,
"partitionPath":null,
"totalLogRecords":0,
"totalLogFilesCompacted":0,
"totalLogSizeCompacted":0,
"totalUpdatedRecordsCompacted":0,
"totalLogBlocks":0,
"totalCorruptLogBlock":0,
"totalRollbackBlocks":0,
"fileSizeInBytes":0
}]
},
"compacted":fal,
"extraMetadata":{},
"totalScanTime":0,
哈佛大学录取分"totalCreateTime":0,
"totalUprtTime":0, "totalCompactedRecordsUpdated":0, "totalLogFilesCompacted":0, "totalLogFilesSize":0, "fileIdAndRelativePaths":{
"8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0":null },
"totalRecordsDeleted":0, "totalLogRecordsCompacted":0
}
1.1.6 hoodie.properties
#Properties saved on Thu Oct 15 07:36:26 UTC 2020 #Thu Oct 15 07:36:26 UTC 2020
hoodie.table.name=stock_ticks_cow
hoodie.archivelog.folder=archived
pe=COPY_ON_WRITE
hoodie.timeline.layout.version=1
1.2 ?/tablename/part-n/pn-n/pn-n-n…
Permission Owner Group Size Last
Modified
Replication
Block
Size
Name
-
rw-r–r--root supergroup93 B Oct 15
15:45
3
128
MB
.hoodie_partition_metadata
-rw-r–r--root supergroup 433.3
KB
Oct 15
15:45
3
地震中的父与子128
英孚少儿英语费用MB
8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-
22_20201015074529.parquet
-rw-r–r--root supergroup 433.01
KB
Oct 15
16:20
3
128
MB
8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-
25_20201015082021.parquet
1.2.1 .hoodie_partition_metadata ⽂件格式
#partition metadata
#Thu Oct 15 07:45:31 UTC 2020
commitTime=20201015074529
partitionDepth=3
1.2.2 ?.parquet ⽂件格式
5041 5231 1504 1524 154c 4c15 0215 0400
001f 8b08 0000 0000 0000 00e3 6360 6030
3230 3230 3430 3435 3037 3135 b204 0078thin
5454 0112 0000 0015 0015 1415 382c 158a
0315 0415 0615 081c 180e 3230 3230 3130
3135 3037 3435 3239 180e 3230 3230 3130
3135 3037 3435 3239 1600 280e 3230 3230
3130 3135 3037 3435 3239 180e 3230 3230
......
⼆、MOR 类型表⽬录结构
留学语言
表名字为 stock_ticks_mor
Permission Owner Group Size Last Modified Replication Block Size Name
drwxr-xr-x root supergroup0 B Oct 15 16:2000 B.hoodie drwxr-xr-x root supergroup0 B Oct 15 15:4500 B2018
2.1 ?/tablename/.hoodie
Permission Owner Group Size Last Modified Replication Block Size Name
-rw-r–r--root supergroup968 B Oct 15 15:llback
-rw-r–r--root supergroup0 B Oct 15 15:llback.inflight
-rw-r–r--root supergroup 2.21 KB Oct 15 15:453128 MB20201015074554.deltacommit
-rw-r–r--root supergroup350 B Oct 15 15:453128 MB20201015074554.deltacommit.inflight
不忘沟壑-rw-r–r--root supergroup0 B Oct 15 15:453128 quested -rw-r–r--root supergroup 2.26 KB Oct 15 16:203128 MB20201015082051.deltacommit
-rw-r–r--root supergroup 1.01 KB Oct 15 16:203128 MB20201015082051.deltacommit.inflight
-rw-r–r--root supergroup0 B Oct 15 16:203128 quested -rw-r–r--root supergroup305 B Oct 15 15:373128 MB hoodie.properties
drwxr-xr-x root supergroup0 B Oct 15 15:3700 B.aux
drwxr-xr-x root supergroup0 B Oct 15 15:p
drwxr-xr-x root supergroup0 B Oct 15 15:3700 B archived
2.1.1 *.rollback ⽂件格式
Objavro.schemaÚ
{"type":"record","name":"HoodieRollbackMetadata","namespace":"org.apache.del","fields":[{"name":"startRollbackTime","type":{"type":"string"," avro.java.string":"String"}},{"name":"timeTakenInMillis","type":"long"},{"name":"totalFilesDeleted","type":"int"},{"name":"commitsRollback","type":{"type":"arra y","items":{"type":"string","avro.java.string":"String"}}},{"name":"partitionMetadata","type":{"type":"map","values":{"type":"record","name":"HoodieRollbackPar titionMetadata","fields":[{"name":"partitionPath","type":{"type":"string","avro.java.string":"String"}},{"name":"successDeleteFiles","type":{"type":"array","items": {"type":"string","avro.java.string":"String"}}},{"name":"failedDeleteFiles","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}}]},"avro.java. string":"String"}},{"name":"version","type":["int","null"],"default":1}]}
2.1.2 *.rollback.inflight ⽂件格式
暂⽆场景复现
2.1.3 *.deltacommit ⽂件格式
1232
{
"partitionToWriteStats":{
"2018/08/31":[{
"fileId":"c7922a25-5d97-4add-8580-127fd14aa494-0",
"path":"2018/08/31/c7922a25-5d97-4add-8580-127fd14aa494-0_0-22-22_20201015074554.parquet",
"prevCommit":"null",
"numWrites":197,
"numDeletes":0,
"numUpdateWrites":0,
"numInrts":197,
"totalWriteBytes":443699,
"totalWriteErrors":0,
"tempPath":null,
"partitionPath":"2018/08/31",
"totalLogRecords":0,
"totalLogFilesCompacted":0,
"totalLogSizeCompacted":0,
"totalUpdatedRecordsCompacted":0,
"totalLogBlocks":0,
"totalCorruptLogBlock":0,
"totalRollbackBlocks":0,
"fileSizeInBytes":443699
}]
},
"compacted":fal,
"extraMetadata":{
"ROLLING_STAT":"{\n \"partitionToRollingStats\" : {\n \"2018/08/31\" : {\n \"c7922a25-5d97-4add-8580-127fd14aa494-0\" : {\n \"fileId\" : \"c79 22a25-5d97-4add-8580-127fd14aa494-0\",\n \"inrts\" : 197,\n \"uprts\" : 0,\n \"deletes\" : 0,\n \"totalInputWriteBytesToDisk\" : 0,\n \"totalInputWriteBytesOnDisk\" : 443699\n }\n }\n },\n \"actionType\" : \"deltacommit\"\n}",
"schema":"{\"type\":\"record\",\"name\":\"stock_ticks\",\"fields\":[{\"name\":\"volume\",\"type\":\"long\"},{\"name\":\"ts\",\"type\":\"string\"},{\"name\":\"symbol\ ",\"type\":\"string\"},{\"name\":\"year\",\"type\":\"int\"},{\"name\":\"month\",\"type\":\"string\"},{\"name\":\"high\",\"type\":\"double\"},{\"name\":\"low\",\"type\":\"dou ble\"},{\"name\":\"key\",\"type\":\"st
ring\"},{\"name\":\"date\",\"type\":\"string\"},{\"name\":\"clo\",\"type\":\"double\"},{\"name\":\"open\",\"type\":\"double\"},{\"n ame\":\"day\",\"type\":\"string\"}]}",
"deltastreamer.checkpoint.key":"stock_ticks,0:3482"
},
"fileIdAndRelativePaths":{
"c7922a25-5d97-4add-8580-127fd14aa494-0":"2018/08/31/c7922a25-5d97-4add-8580-127fd14aa494-0_0-22-22_20201015074554.parquet"
},
"totalRecordsDeleted":0,
"totalLogRecordsCompacted":0,
"totalScanTime":0,
"totalCreateTime":1280,
"totalUprtTime":0,
"totalCompactedRecordsUpdated":0,
"totalLogFilesCompacted":0,
"totalLogFilesSize":0
}
2.1.4 *.deltacommit.inflight ⽂件格式