Skip to content

Commit a02e54e

Browse files
author
zhangli20
committed
supports parquet parallel reading.
use bufreader to read page header, avoid reading in fragments. add hadoop shim.
1 parent 15751e9 commit a02e54e

15 files changed

Lines changed: 389 additions & 234 deletions

File tree

Cargo.lock

Lines changed: 34 additions & 34 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -66,27 +66,27 @@ serde_json = { version = "1.0.96" }
6666

6767
[patch.crates-io]
6868
# datafusion: branch=v42-blaze
69-
datafusion = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "9a09e14"}
70-
datafusion-common = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "9a09e14"}
71-
datafusion-expr = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "9a09e14"}
72-
datafusion-execution = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "9a09e14"}
73-
datafusion-optimizer = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "9a09e14"}
74-
datafusion-physical-expr = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "9a09e14"}
75-
orc-rust = { git = "https://github.com/blaze-init/datafusion-orc.git", rev = "9c74ac3"}
69+
datafusion = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "2bc42ea73"}
70+
datafusion-common = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "2bc42ea73"}
71+
datafusion-expr = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "2bc42ea73"}
72+
datafusion-execution = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "2bc42ea73"}
73+
datafusion-optimizer = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "2bc42ea73"}
74+
datafusion-physical-expr = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "2bc42ea73"}
75+
orc-rust = { git = "https://github.com/blaze-init/datafusion-orc.git", rev = "7833d7d"}
7676

7777
# arrow: branch=v53-blaze
78-
arrow = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
79-
arrow-arith = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
80-
arrow-array = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
81-
arrow-buffer = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
82-
arrow-cast = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
83-
arrow-data = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
84-
arrow-ord = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
85-
arrow-row = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
86-
arrow-schema = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
87-
arrow-select = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
88-
arrow-string = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
89-
parquet = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "9dbfd9018e"}
78+
arrow = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
79+
arrow-arith = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
80+
arrow-array = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
81+
arrow-buffer = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
82+
arrow-cast = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
83+
arrow-data = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
84+
arrow-ord = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
85+
arrow-row = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
86+
arrow-schema = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
87+
arrow-select = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
88+
arrow-string = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
89+
parquet = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "91dc27dedf"}
9090

9191
# serde_json: branch=v1.0.96-blaze
9292
serde_json = { git = "https://github.com/blaze-init/json", branch = "v1.0.96-blaze" }

hadoop-shim/pom.xml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
5+
<parent>
6+
<groupId>org.blaze</groupId>
7+
<artifactId>blaze-engine</artifactId>
8+
<version>${revision}</version>
9+
<relativePath>../</relativePath>
10+
</parent>
11+
<groupId>org.blaze</groupId>
12+
<artifactId>hadoop-shim</artifactId>
13+
<packaging>jar</packaging>
14+
15+
<dependencies>
16+
<dependency>
17+
<groupId>org.scala-lang</groupId>
18+
<artifactId>scala-library</artifactId>
19+
<scope>provided</scope>
20+
</dependency>
21+
<dependency>
22+
<groupId>org.scala-lang.modules</groupId>
23+
<artifactId>scala-java8-compat_2.12</artifactId>
24+
<version>0.9.1</version>
25+
</dependency>
26+
<dependency>
27+
<groupId>org.scalatest</groupId>
28+
<artifactId>scalatest_${scalaVersion}</artifactId>
29+
<scope>test</scope>
30+
</dependency>
31+
<dependency>
32+
<groupId>org.apache.hadoop</groupId>
33+
<artifactId>hadoop-client-api</artifactId>
34+
<version>3.4.0</version>
35+
<scope>provided</scope>
36+
</dependency>
37+
</dependencies>
38+
</project>

0 commit comments

Comments
 (0)