1717 */
1818package science .atlarge .graphalytics .util ;
1919
20+ import org .duckdb .DuckDBConnection ;
2021import science .atlarge .graphalytics .domain .graph .FormattedGraph ;
2122import science .atlarge .graphalytics .domain .graph .PropertyList ;
2223import org .apache .logging .log4j .LogManager ;
2324import org .apache .logging .log4j .Logger ;
2425import science .atlarge .graphalytics .util .io .*;
2526
27+ import java .io .File ;
2628import java .io .FileInputStream ;
2729import java .io .FileOutputStream ;
2830import java .io .IOException ;
2931import java .nio .file .Files ;
3032import java .nio .file .Paths ;
33+ import java .sql .DriverManager ;
34+ import java .sql .SQLException ;
35+ import java .sql .Statement ;
3136
3237/**
3338 * Utility class for managing graph files. Responsible for generating additional graph files from a source dataset
@@ -52,7 +57,7 @@ private GraphFileManager() {
5257 * @param formattedGraph the graph to check the vertex and edge file for
5358 * @throws IOException iff the vertex or edge file can not be generated
5459 */
55- public static void ensureGraphFilesExist (FormattedGraph formattedGraph ) throws IOException {
60+ public static void ensureGraphFilesExist (FormattedGraph formattedGraph ) throws IOException , SQLException {
5661 ensureVertexFileExists (formattedGraph );
5762 ensureEdgeFileExists (formattedGraph );
5863 }
@@ -74,7 +79,7 @@ private static void ensureVertexFileExists(FormattedGraph formattedGraph) throws
7479 LOG .info ("Done generating vertex file for graph \" {}\" ." , formattedGraph .getGraph ().getName ());
7580 }
7681
77- private static void ensureEdgeFileExists (FormattedGraph formattedGraph ) throws IOException {
82+ private static void ensureEdgeFileExists (FormattedGraph formattedGraph ) throws IOException , SQLException {
7883 if (Paths .get (formattedGraph .getEdgeFilePath ()).toFile ().exists ()) {
7984 LOG .info ("Found edge file for graph \" {}\" at \" {}\" ." , formattedGraph .getName (), formattedGraph .getEdgeFilePath ());
8085 return ;
@@ -109,21 +114,21 @@ private static void generateVertexFile(FormattedGraph formattedGraph) throws IOE
109114 }
110115 }
111116
112- private static void generateEdgeFile (FormattedGraph formattedGraph ) throws IOException {
117+ private static void generateEdgeFile (FormattedGraph formattedGraph ) throws IOException , SQLException {
113118 // Ensure that the output directory exists
114119 Files .createDirectories (Paths .get (formattedGraph .getEdgeFilePath ()).getParent ());
115120
116- // Generate the edge file
117- int [] propertyIndices = findPropertyIndices ( formattedGraph . getGraph (). getSourceGraph (). getEdgeProperties (),
118- formattedGraph . getEdgeProperties ());
119- try (EdgeListStreamWriter writer = new EdgeListStreamWriter (
120- new EdgeListPropertyFilter (
121- new EdgeListInputStreamReader (
122- new FileInputStream ( formattedGraph . getGraph (). getSourceGraph (). getEdgeFilePath ())
123- ),
124- propertyIndices ),
125- new FileOutputStream ( formattedGraph . getEdgeFilePath ()))) {
126- writer . writeAll ( );
121+ String dbFile = String . format ( "%s/edge_file.duckdb" , Paths . get ( formattedGraph . getEdgeFilePath ()). toFile (). getParent ());
122+ new File ( dbFile ). delete ();
123+
124+ try (DuckDBConnection conn = ( DuckDBConnection ) DriverManager . getConnection (
125+ String . format ( "jdbc:duckdb:%s" , dbFile )
126+ )) {
127+ Statement stmt = conn . createStatement ();
128+ stmt . execute ( String . format ( "CREATE OR REPLACE TABLE e(source BIGINT NOT NULL, target BIGINT NOT NULL, weight DOUBLE);" ));
129+ stmt . execute ( String . format ( "COPY e FROM '%s' (DELIMITER ' ', FORMAT csv)" , formattedGraph . getGraph (). getSourceGraph (). getEdgeFilePath ()));
130+ // Drop a lot of weight with this one weird trick
131+ stmt . execute ( String . format ( "COPY e (source, target) TO '%s' (DELIMITER ' ', FORMAT csv)" , formattedGraph . getEdgeFilePath ()) );
127132 }
128133 }
129134
0 commit comments