Skip to content

Commit 7c506db

Browse files
authored
Merge pull request #223 from ldbc/minimize-with-duckdb
Minimize edge files using DuckDB
2 parents 8f99fff + c0f1da7 commit 7c506db

File tree

1 file changed

+19
-14
lines changed

1 file changed

+19
-14
lines changed

graphalytics-core/src/main/java/science/atlarge/graphalytics/util/GraphFileManager.java

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,22 @@
1717
*/
1818
package science.atlarge.graphalytics.util;
1919

20+
import org.duckdb.DuckDBConnection;
2021
import science.atlarge.graphalytics.domain.graph.FormattedGraph;
2122
import science.atlarge.graphalytics.domain.graph.PropertyList;
2223
import org.apache.logging.log4j.LogManager;
2324
import org.apache.logging.log4j.Logger;
2425
import science.atlarge.graphalytics.util.io.*;
2526

27+
import java.io.File;
2628
import java.io.FileInputStream;
2729
import java.io.FileOutputStream;
2830
import java.io.IOException;
2931
import java.nio.file.Files;
3032
import java.nio.file.Paths;
33+
import java.sql.DriverManager;
34+
import java.sql.SQLException;
35+
import java.sql.Statement;
3136

3237
/**
3338
* Utility class for managing graph files. Responsible for generating additional graph files from a source dataset
@@ -52,7 +57,7 @@ private GraphFileManager() {
5257
* @param formattedGraph the graph to check the vertex and edge file for
5358
* @throws IOException iff the vertex or edge file can not be generated
5459
*/
55-
public static void ensureGraphFilesExist(FormattedGraph formattedGraph) throws IOException {
60+
public static void ensureGraphFilesExist(FormattedGraph formattedGraph) throws IOException, SQLException {
5661
ensureVertexFileExists(formattedGraph);
5762
ensureEdgeFileExists(formattedGraph);
5863
}
@@ -74,7 +79,7 @@ private static void ensureVertexFileExists(FormattedGraph formattedGraph) throws
7479
LOG.info("Done generating vertex file for graph \"{}\".", formattedGraph.getGraph().getName());
7580
}
7681

77-
private static void ensureEdgeFileExists(FormattedGraph formattedGraph) throws IOException {
82+
private static void ensureEdgeFileExists(FormattedGraph formattedGraph) throws IOException, SQLException {
7883
if (Paths.get(formattedGraph.getEdgeFilePath()).toFile().exists()) {
7984
LOG.info("Found edge file for graph \"{}\" at \"{}\".", formattedGraph.getName(), formattedGraph.getEdgeFilePath());
8085
return;
@@ -109,21 +114,21 @@ private static void generateVertexFile(FormattedGraph formattedGraph) throws IOE
109114
}
110115
}
111116

112-
private static void generateEdgeFile(FormattedGraph formattedGraph) throws IOException {
117+
private static void generateEdgeFile(FormattedGraph formattedGraph) throws IOException, SQLException {
113118
// Ensure that the output directory exists
114119
Files.createDirectories(Paths.get(formattedGraph.getEdgeFilePath()).getParent());
115120

116-
// Generate the edge file
117-
int[] propertyIndices = findPropertyIndices(formattedGraph.getGraph().getSourceGraph().getEdgeProperties(),
118-
formattedGraph.getEdgeProperties());
119-
try (EdgeListStreamWriter writer = new EdgeListStreamWriter(
120-
new EdgeListPropertyFilter(
121-
new EdgeListInputStreamReader(
122-
new FileInputStream(formattedGraph.getGraph().getSourceGraph().getEdgeFilePath())
123-
),
124-
propertyIndices),
125-
new FileOutputStream(formattedGraph.getEdgeFilePath()))) {
126-
writer.writeAll();
121+
String dbFile = String.format("%s/edge_file.duckdb", Paths.get(formattedGraph.getEdgeFilePath()).toFile().getParent());
122+
new File(dbFile).delete();
123+
124+
try (DuckDBConnection conn = (DuckDBConnection) DriverManager.getConnection(
125+
String.format("jdbc:duckdb:%s", dbFile)
126+
)) {
127+
Statement stmt = conn.createStatement();
128+
stmt.execute(String.format("CREATE OR REPLACE TABLE e(source BIGINT NOT NULL, target BIGINT NOT NULL, weight DOUBLE);"));
129+
stmt.execute(String.format("COPY e FROM '%s' (DELIMITER ' ', FORMAT csv)", formattedGraph.getGraph().getSourceGraph().getEdgeFilePath()));
130+
// Drop a lot of weight with this one weird trick
131+
stmt.execute(String.format("COPY e (source, target) TO '%s' (DELIMITER ' ', FORMAT csv)", formattedGraph.getEdgeFilePath()));
127132
}
128133
}
129134

0 commit comments

Comments
 (0)