forked from apache/arrow-go
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample_csv_test.go
More file actions
112 lines (100 loc) · 3.73 KB
/
example_csv_test.go
File metadata and controls
112 lines (100 loc) · 3.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package csv_test
import (
"fmt"
"os"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
arrowcsv "github.com/apache/arrow-go/v18/arrow/csv"
)
func Example_reader() {
filePath := "../../arrow-testing/data/csv/aggregate_test_100.csv" // Test csv file
f, err := os.Open(filePath)
if err != nil {
fmt.Printf("Failed to open file: %v\n", err)
return
}
defer f.Close()
// Schema defined in the csv file
schema := arrow.NewSchema([]arrow.Field{
{Name: "c1", Type: arrow.BinaryTypes.String, Nullable: true},
{Name: "c2", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
{Name: "c3", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
{Name: "c4", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
{Name: "c5", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
{Name: "c6", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
{Name: "c7", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
{Name: "c8", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
{Name: "c9", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
{Name: "c10", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
{Name: "c11", Type: arrow.PrimitiveTypes.Float64, Nullable: true},
{Name: "c12", Type: arrow.PrimitiveTypes.Float64, Nullable: true},
{Name: "c13", Type: arrow.BinaryTypes.String, Nullable: true},
}, nil)
reader := arrowcsv.NewReader(f, schema, arrowcsv.WithHeader(true), arrowcsv.WithChunk(-1))
defer reader.Release()
// Read the first record
if !reader.Next() {
if err := reader.Err(); err != nil {
fmt.Printf("Error reading CSV: %v\n", err)
return
}
fmt.Println("No records found")
return
}
// Get the record but don't release it - the reader will handle that
record := reader.Record()
fmt.Printf("Number of rows: %d\n", record.NumRows())
fmt.Printf("Number of columns: %d\n", record.NumCols())
fmt.Println()
fmt.Println("Basic statistics for numeric columns:")
for i := 1; i < 10; i++ { // cols c2 through c10 are Int64
col := record.Column(i).(*array.Int64)
var sum int64
for j := 0; j < col.Len(); j++ {
sum += col.Value(j)
}
avg := float64(sum) / float64(col.Len())
fmt.Printf("Column c%d: Average = %.2f\n", i+1, avg)
}
for i := 10; i < 12; i++ { // cols c11 and c12 are Float64
col := record.Column(i).(*array.Float64)
var sum float64
for j := 0; j < col.Len(); j++ {
sum += col.Value(j)
}
avg := sum / float64(col.Len())
fmt.Printf("Column c%d: Average = %.4f\n", i+1, avg)
}
// Output:
// Number of rows: 100
// Number of columns: 13
//
// Basic statistics for numeric columns:
// Column c2: Average = 2.85
// Column c3: Average = 7.81
// Column c4: Average = 2319.97
// Column c5: Average = 158626279.61
// Column c6: Average = 59276376114661656.00
// Column c7: Average = 130.60
// Column c8: Average = 30176.41
// Column c9: Average = 2220897700.60
// Column c10: Average = -86834033398685392.00
// Column c11: Average = 0.4793
// Column c12: Average = 0.5090
}