Skip to content

Commit 05e53d0

Browse files
committed
add S3 dep to Play server and fix TPC-H generation script
1 parent 8a7cf3f commit 05e53d0

3 files changed

Lines changed: 29 additions & 25 deletions

File tree

deps.edn

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
;; XTDB dependencies
77
com.xtdb/xtdb-api {:mvn/version "2.x-SNAPSHOT"}
88
com.xtdb/xtdb-core {:mvn/version "2.x-SNAPSHOT"}
9+
com.xtdb/xtdb-aws {:mvn/version "2.x-SNAPSHOT"}
910

1011
org.postgresql/postgresql {:mvn/version "42.7.4"}
1112
com.github.seancorfield/next.jdbc {:mvn/version "1.3.939"}

scripts/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,13 @@ The action will generate the dataset and upload it to S3 automatically.
4242
To generate a TPC-H dataset locally at scale factor 0.01:
4343

4444
```bash
45-
clojure -M:generate-tpch xtdb-play-datasets 0.01
45+
clojure -M:generate-tpch xtdb-play-datasets 0.01 eu-west-1
4646
```
4747

4848
**Arguments:**
4949
- `bucket-name`: The S3 bucket to upload to (e.g., `xtdb-play-datasets`)
5050
- `scale-factor`: The TPC-H scale factor (0.01 = ~10MB, 1.0 = ~1GB, etc.)
51+
- `region`: AWS region where the bucket is located (e.g., `eu-west-1`)
5152

5253
**Note:** Local generation requires AWS credentials configured in your environment (see Prerequisites).
5354

scripts/generate_tpch.clj

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,63 +8,65 @@
88
(:require [clojure.tools.logging :as log]
99
[xtdb.api :as xt]
1010
[xtdb.datasets.tpch :as tpch]
11-
[xtdb.node :as xtn]))
11+
[xtdb.node :as xtn]
12+
[xtdb.db-catalog :as db-catalog])
13+
(:import [xtdb.aws.s3 S3Configurator]
14+
[software.amazon.awssdk.regions Region]))
1215

1316
(defn generate-dataset
1417
"Generates TPC-H dataset at given scale factor and stores in S3 bucket."
15-
[bucket-name scale-factor]
16-
(log/info "Generating TPC-H dataset at scale factor" scale-factor "to bucket" bucket-name)
18+
[bucket-name scale-factor region]
19+
(log/info "Generating TPC-H dataset at scale factor" scale-factor "to bucket" bucket-name "in region" region)
1720

18-
(let [node-config {:storage
19-
{:object-store
20-
{:module 'xtdb.s3/s3-object-store
21-
:bucket bucket-name
22-
:prefix (str "tpch-sf" scale-factor "/")
23-
;; Public bucket - no credentials needed for read
24-
;; Write requires AWS credentials from environment
25-
}}
26-
:server {:port 5432}}]
21+
(let [node-config {:storage [:remote {:object-store [:s3 {:bucket bucket-name
22+
:prefix (str "tpch-sf" scale-factor "/")
23+
:configurator (reify S3Configurator
24+
(configureClient [_ builder]
25+
(.region builder (Region/of region))))}]}]
26+
:disk-cache {:path "/tmp/xtdb-tpch-cache"}}]
2727

28-
(log/info "Starting XTDB node with config:" node-config)
28+
(log/info "Starting XTDB node with S3 storage in region:" region)
2929

3030
(with-open [node (xtn/start-node node-config)]
3131
(log/info "Node started, generating TPC-H data...")
3232

33-
;; Generate TPC-H dataset
34-
(tpch/submit-tpch! node {:scale-factor scale-factor})
33+
;; Generate TPC-H dataset using DML (INSERT statements)
34+
(tpch/submit-dml! node scale-factor)
3535

3636
(log/info "TPC-H data submitted, waiting for ingest...")
3737

3838
;; Wait for data to be ingested
3939
(Thread/sleep 5000)
4040

41-
;; Finish chunk to ensure all data is written
42-
(log/info "Calling finish-chunk to ensure data is persisted...")
43-
(.finishChunk node)
41+
;; Finish block to ensure all data is written
42+
(log/info "Calling finish-block to ensure data is persisted...")
43+
(.finishBlock (.getLogProcessor (db-catalog/primary-db node)))
4444

4545
(log/info "Dataset generation complete!"))))
4646

4747
(defn -main [& args]
48-
(when (not= (count args) 2)
49-
(println "Usage: clojure -M:generate-tpch <bucket-name> <scale-factor>")
50-
(println "Example: clojure -M:generate-tpch xtdb-play-datasets 0.01")
48+
(when (not= (count args) 3)
49+
(println "Usage: clojure -M:generate-tpch <bucket-name> <scale-factor> <region>")
50+
(println "Example: clojure -M:generate-tpch xtdb-play-datasets 0.01 eu-west-1")
5151
(System/exit 1))
5252

5353
(let [bucket-name (first args)
54-
scale-factor (Double/parseDouble (second args))]
54+
scale-factor (Double/parseDouble (second args))
55+
region (nth args 2)]
5556

5657
(println "=" (repeat 60 "="))
5758
(println "TPC-H Dataset Generator")
5859
(println "=" (repeat 60 "="))
5960
(println "Bucket:" bucket-name)
6061
(println "Scale Factor:" scale-factor)
62+
(println "Region:" region)
6163
(println)
6264

6365
(try
64-
(generate-dataset bucket-name scale-factor)
66+
(generate-dataset bucket-name scale-factor region)
6567
(println)
6668
(println "✓ Success! Dataset is now available in S3:")
67-
(println " s3://" bucket-name "/tpch-sf" scale-factor "/")
69+
(println (str " s3://" bucket-name "/tpch-sf" scale-factor "/"))
6870
(catch Exception e
6971
(println "✗ Error generating dataset:")
7072
(println (.getMessage e))

0 commit comments

Comments
 (0)