Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tpcds/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ If benchmarking with Blaze, ensure that the Blaze jar package is correctly built
# use correct SPARK_HOME and data location
SPARK_HOME=$HOME/software/spark ./bin/run \
--data-location /user/hive/data/tpcds-1000 \
--format parquet \
--output-dir ./benchmark-result
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class TPCDSBenchmarkArgs(val args: Array[String]) {
var outputDir: String = _
var queryFilter: Set[String] = Set.empty
var round: Int = 2
var format: String = "parquet"

parseArgs(args.toList)
validateArguments()
Expand Down Expand Up @@ -53,6 +54,10 @@ class TPCDSBenchmarkArgs(val args: Array[String]) {
round = value.toInt
args = tail

case optName :: value :: tail if optionMatch("--format", optName) =>
format = value
args = tail

case _ =>
System.err.println("Unknown/unsupported param " + args)
printUsageAndExit(1)
Expand All @@ -68,6 +73,7 @@ class TPCDSBenchmarkArgs(val args: Array[String]) {
| --output-dir Output directory for results
| --query-filter Queries to filter, e.g., q3,q5,q13
| --round Run each query for a specified number of rounds, default: 2
| --format Data format, e.g. orc,parquet,default: parquet
| """.stripMargin)
System.exit(exitCode)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ import java.util.Date

import org.apache.commons.io.IOUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.Row
import org.apache.spark.sql.{DataFrame, Row, SparkSession}

object TPCDSBenchmarkRunner {
def main(args: Array[String]): Unit = {
Expand Down Expand Up @@ -79,7 +78,13 @@ object TPCDSBenchmarkRunner {
"web_returns", "web_site", "reason", "call_center", "warehouse", "ship_mode", "income_band",
"time_dim", "web_page")
tables.par.foreach { tableName =>
spark.read.parquet(s"$dataLocation/$tableName").createOrReplaceTempView(tableName)
val df: DataFrame = benchmarkArgs.format match {
case "orc" => spark.read.orc(s"$dataLocation/$tableName")
case "parquet" => spark.read.parquet(s"$dataLocation/$tableName")
case _ => throw new RuntimeException(
s"Unknown format , avaliable formats: orc,parquet, current input: ${benchmarkArgs.format}")
}
df.createOrReplaceTempView(tableName)
tableName -> spark.table(tableName).count()
}

Expand Down