ApacheArrow读写⽂件(parquet,arrow⽂件)
⽬标:在C++层实现将Arrow Table的表数据写⼊到⽂件;在GO层实现读取⽂件完成表数据的提取。
实现流程如下:
C++层:
//构建表数据
/*表结构
a | b
0 | 9
1 | 8
2 | 7
3 | 6
4 | 5
*/
std::shared_ptr<arrow::Table> CreateTable1() {
auto schema =
arrow::schema({arrow::field("a", arrow::int64()),arrow::field("b", arrow::int64())});
布拉美古塔定理std::shared_ptr<arrow::Array> array_a;
std::shared_ptr<arrow::Array> array_b;
arrow::NumericBuilder<arrow::Int64Type> builder;
ABORT_ON_FAILURE(builder.AppendValues({0, 1, 2, 3, 4}));
ABORT_ON_FAILURE(builder.Finish(&array_a));
builder.Ret();
ABORT_ON_FAILURE(builder.AppendValues({9, 8, 7, 6, 5}));
ABORT_ON_FAILURE(builder.Finish(&array_b));
学生干老师
builder.Ret();
return arrow::Table::Make(schema, {array_a, array_b});
}
//表数据写⼊⽂件
std::string CreateExampleFeatherDatat(const std::shared_ptr<fs::FileSystem>& filesystem,
const std::string& root_path) {
auto ba_path = root_path + "/feather_datat";//⽂件夹路径
ABORT_ON_FAILURE(filesystem->CreateDir(ba_path));
// Create an Arrow Table
auto table = CreateTable1();
// Write it into two Feather files
auto output = filesystem->OpenOutputStream(ba_path + "/data1.arrow").ValueOrDie();//写⼊的⽂件路径 /home/weili/CLionProjects/ExampleDataSet/feather_d auto writer = arrow::ipc::(), table->schema()).ValueOrDie();//从stream sink and schema创建⼀个新的IPC⽂件写⼊器
ABORT_ON_FAILURE(writer->WriteTable(*table));//通过创建 record batches序列来写可能是分块的表
ABORT_ON_FAILURE(writer->Clo());//写完关闭⽂件
return ba_path;
}
GO层实现:(GO版本编译需要GO1.17以上)
//读C++写⼊的⽂件
func TestReadFile(t *testing.T) {
schema := arrow.NewSchema(
[]arrow.Field{
arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int64,Nullable:true},
情侣名字配对arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
}, nil,
)
mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
defer mem.AsrtSize(t, 0)
f, err := os.OpenFile("/home/sss/CLionProjects/ExampleDataSet/feather_datat/data1.arrow", os.O_RDWR, 0600) if err != nil {
政府职能有哪些t.Fatal(err)
}
defer f.Clo()
_, err1 := f.Seek(0, io.SeekStart)
舌苔厚黄是什么原因引起的
if err1 != nil {
长信宫
t.Fatal(err1)
}
r, err2 := ipc.NewFileReader(f, ipc.WithSchema(schema), ipc.WithAllocator(mem))
if err2 != nil {
t.Fatal(err2)
}
defer r.Clo()
for i := 0; i < r.NumRecords(); i++ {
rec, err := r.Record(i)
if err != nil {
t.Fatalf("could not read record %d: %v", i, err)
}
fmt.Printf("该表⾏数: %d\n",rec.NumRows())
fmt.Printf("该表列数: %d\n",rec.NumCols())
fmt.Printf("该表第1列名字: %s\n",rec.ColumnName(0))
fmt.Printf("该表第2列名字: %s\n",rec.ColumnName(1))
fmt.Printf("该表第1列数据: ")
敬松
fmt.Println(rec.Column(0))
fmt.Printf("该表第2列数据: ")
万圣节给糖
fmt.Println(rec.Column(1))
}
}
输出结果:
结论:与C++层写⼊的信息⼀致。