Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
18c46e5
datastore: add facet support in mongodb datastore, #TASK-7151, #TASK-…
jtarraga Oct 24, 2024
5495d80
datastore: improve code, #TASK-7151, #TASK-7134
jtarraga Oct 25, 2024
304603e
datastore: implement the MongoDB to FacetField converter, #TASK-7151,…
jtarraga Oct 25, 2024
b537e6f
datastore: fix MongoDB document to FacetField converter, #TASK-7151, …
jtarraga Oct 25, 2024
17f83b2
datastore: change long to Long in FacetField, #TASK-7151, #TASK-7134
jtarraga Oct 28, 2024
865b94a
datastore: set range format to field[start..end]:step, #TASK-7151, #T…
jtarraga Oct 28, 2024
880f2c6
datastore: use JsonInclude.Include.NON_NULL, #TASK-7151, #TASK-7134
jtarraga Oct 28, 2024
75dc002
datastore: fix pom.xml, #TASK-7151, #TASK-7134
jtarraga Oct 28, 2024
25cbd91
datastore: restore FacetField to previous change, #TASK-7151, #TASK-7134
jtarraga Oct 28, 2024
9f0d9b9
datastore: change count to Number, #TASK-7151, #TASK-7134
jtarraga Oct 29, 2024
0f3a24d
test: add JUnit tests for facets, #TASK-7151, #TASK-7134
jtarraga Oct 29, 2024
f2b080c
mongodb: rename converter, use Long instead Number, #TASK-7151, #TASL…
jtarraga Nov 11, 2024
ea3906c
mongodb: support lists using accumulators, #TASK-7151, #TASK-7134
jtarraga Nov 12, 2024
e68c30e
mongodb: fix sonnar issues, #TASK-7151, #TASK-7134
jtarraga Nov 13, 2024
84d1f92
mondodb: add 'sum' to aggregation operators enum
imedina Dec 12, 2024
26c9628
mondodb: fix 'sum' aggregation operator
imedina Dec 12, 2024
10a7f0c
mondodb: fix 'sum' aggregation operator
imedina Dec 12, 2024
7943e1b
mondodb: fix 'sum' aggregation operator
imedina Dec 12, 2024
e8159f3
mondodb: fix check style
imedina Dec 12, 2024
005c45e
datastore: fix the accumulator 'sum' in MongoDB facets, #TASK-7151, #…
jtarraga Dec 12, 2024
31424d8
mongodb: aggregation test. To be reverted.
imedina Dec 13, 2024
57f2138
mongodb: aggregation test 2. To be reverted.
imedina Dec 13, 2024
3cca26f
mongodb: aggregation test 3. To be reverted.
imedina Dec 13, 2024
e177dd7
mongodb: aggregation test 4. To be reverted.
imedina Dec 13, 2024
b15ed9a
mongodb: revert all tests
imedina Dec 13, 2024
a7c86e0
mongodb: fix aggregation regex
imedina Dec 13, 2024
eb5b519
mongodb: aggregation style improvement
imedina Dec 13, 2024
a073e84
mongodb: fix aggregation regex
imedina Dec 13, 2024
3f9386f
mongodb: fix aggregation
imedina Dec 13, 2024
13b3e59
mongodb: fix aggregation parse
imedina Dec 13, 2024
dd39812
datastore: implement the facet following the example:bioformat:sum(si…
jtarraga Dec 13, 2024
de98cda
Merge branch 'TASK-7134' of https://github.com/opencb/java-common-lib…
jtarraga Dec 13, 2024
ac66d66
datastore: fix facet 'format:count(size)' to behaviour as 'count(form…
jtarraga Dec 14, 2024
421d5ce
datastore: improve MongoDB facets for arrays by using unwind, #TASK-7…
jtarraga Jan 16, 2025
1184be3
datastore: fix MongoDB facet parser, #TASK-7151, #TASK-7134
jtarraga Jan 17, 2025
7255b42
datastore: fix the converter by replacing '.' by '.' in the facet…
jtarraga Jan 20, 2025
0d6f430
datastore: support facets for 'dates' in MongoDB, #TASK-7151, #TASK-7134
jtarraga Jan 30, 2025
0dde11a
datastore: improve MongoDB facet exception message, #TASK-7151, #TASK…
jtarraga Jan 30, 2025
fc5dd92
datastore: fix checkstyle, #TASK-7151, #TASK-7134
jtarraga Jan 30, 2025
8010403
datastore: improve facets for dates, #TASK-7151, #TASK-7134
jtarraga Jan 30, 2025
9de45bb
datastore: rename the separator '_and_' to '_' in MongoDB facet resul…
jtarraga Jan 31, 2025
6413bed
datastore: use '__' as separator, #TASK-7151, #TASK-7134
jtarraga Jan 31, 2025
52ae7ee
datastore: fix sonnar issues, #TASK-7151, #TASK-7134
jtarraga Jan 31, 2025
9243ebb
datastore: fix MongoDB facets when combining multiple fields, #TASK-7…
jtarraga Jan 31, 2025
d3666fc
datastore: add more JUnit tests for MongoDB facets, #TASK-7151, #TASK…
jtarraga Jan 31, 2025
dc4fc6d
datastore: sort dates facets, #TASK-7151, #TASK-7134
jtarraga Feb 3, 2025
35a6e20
datastore: fix checkstyle, #TASK-7151, #TASK-7134
jtarraga Feb 3, 2025
6373935
datastore: use date format '01 Jan 2025', #TASK-7151, #TASK-7134
jtarraga Feb 3, 2025
7185175
Merge branch 'develop' into TASK-7134
jtarraga Feb 3, 2025
5da23b3
datastore: sort facets results in descending order (counts), #TASK
jtarraga Feb 3, 2025
6278507
datastore: improve MongoDB facets for range, #TASK-7151, #TASK-7134
jtarraga Feb 5, 2025
7b69623
datastore: improve MongoDB facets for ranges by filling with zeros, #…
jtarraga Feb 6, 2025
e0b5d3f
datastore: change Long to long, and fix JUnit tests, #TASK-7151, #TAS…
jtarraga Feb 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@
/**
* Created by jtarraga on 09/03/17.
*/

public class FacetField {
private String name;
private long count;
private Long count;
private List<Bucket> buckets;
private String aggregationName;
private List<Double> aggregationValues;
Expand Down Expand Up @@ -68,17 +67,20 @@ public FacetField setName(String name) {
return this;
}

public long getCount() {
public Long getCount() {
return count;
}

public FacetField setCount(long count) {
public FacetField setCount(Long count) {
this.count = count;
return this;
}

public FacetField addCount(long delta) {
this.count += delta;
if (this.count == null) {
this.count = 0L;
}
this.count = this.count.longValue() + delta;
return this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -331,31 +331,25 @@ public <T> DataResult<T> aggregate(List<? extends Bson> operations, ComplexTypeC
QueryOptions options) {

long start = startQuery();

DataResult<T> queryResult;
MongoDBIterator<T> iterator = mongoDBNativeQuery.aggregate(operations, converter, options);
// MongoCursor<Document> iterator = output.iterator();
List<T> list = new LinkedList<>();
if (queryResultWriter != null) {
try {
queryResultWriter.open();
if (operations != null && operations.size() > 0) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe use CollectionUtils.isNotEmpty() ??

MongoDBIterator<T> iterator = mongoDBNativeQuery.aggregate(operations, converter, options);
if (queryResultWriter != null) {
try {
queryResultWriter.open();
while (iterator.hasNext()) {
queryResultWriter.write(iterator.next());
}
queryResultWriter.close();
} catch (IOException e) {
throw new RuntimeException(e.getMessage(), e);
}
} else {
while (iterator.hasNext()) {
queryResultWriter.write(iterator.next());
list.add((T) iterator.next());
}
queryResultWriter.close();
} catch (IOException e) {
throw new RuntimeException(e.getMessage(), e);
}
} else {
// if (converter != null) {
// while (iterator.hasNext()) {
// list.add(converter.convertToDataModelType(iterator.next()));
// }
// } else {
while (iterator.hasNext()) {
list.add((T) iterator.next());
}
// }
}
queryResult = endQuery(list, start);
return queryResult;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
package org.opencb.commons.datastore.mongodb;

import org.apache.commons.lang3.StringUtils;
import org.bson.Document;
import org.opencb.commons.datastore.core.ComplexTypeConverter;
import org.opencb.commons.datastore.core.FacetField;

import java.util.*;

import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.*;
import static org.opencb.commons.datastore.mongodb.MongoDBQueryUtils.Accumulator.*;

public class MongoDBDocumentToFacetFieldsConverter implements ComplexTypeConverter<List<FacetField>, Document> {

@Override
public List<FacetField> convertToDataModelType(Document document) {
if (document == null || document.entrySet().size() == 0) {
return Collections.emptyList();
}

List<FacetField> facets = new ArrayList<>();
for (Map.Entry<String, Object> entry : document.entrySet()) {
String key = entry.getKey();
List<Document> documentValues = (List<Document>) entry.getValue();
if (key.endsWith(COUNTS_SUFFIX)) {
List<FacetField.Bucket> buckets = new ArrayList<>(documentValues.size());
long total = 0;
for (Document documentValue : documentValues) {
long counter = documentValue.getInteger(count.name());
String bucketValue = "";
Object internalIdValue = documentValue.get(INTERNAL_ID);
if (internalIdValue instanceof String) {
bucketValue = (String) internalIdValue;
} else if (internalIdValue instanceof Boolean
|| internalIdValue instanceof Integer
|| internalIdValue instanceof Double) {
bucketValue = internalIdValue.toString();
} else if (internalIdValue instanceof Document) {
bucketValue = StringUtils.join(((Document) internalIdValue).values(), AND_SEPARATOR);
}
buckets.add(new FacetField.Bucket(bucketValue, counter, null));
total += counter;
}
key = key.substring(0, key.length() - COUNTS_SUFFIX.length());
facets.add(new FacetField(key, total, buckets));
} else if (key.endsWith(RANGES_SUFFIX)) {
List<Double> facetFieldValues = new ArrayList<>();
Number start = null;
Number end = null;
Number step = null;
Double other = null;
for (Document value : documentValues) {
if (value.get(INTERNAL_ID) instanceof String && OTHER.equals(value.getString(INTERNAL_ID))) {
other = 1.0d * value.getInteger(count.name());
} else {
Double range = value.getDouble(INTERNAL_ID);
Integer counter = value.getInteger(count.name());
facetFieldValues.add(1.0d * counter);
if (start == null) {
start = range;
}
end = range;
if (step == null && start != end) {
step = end.doubleValue() - start.doubleValue();
}
}
}
key = key.substring(0, key.length() - RANGES_SUFFIX.length()).replace(GenericDocumentComplexConverter.TO_REPLACE_DOTS, ".");
if (other != null) {
key += " (counts out of range: " + other + ")";
}
FacetField facetField = new FacetField(key, "range", facetFieldValues)
.setStart(start)
.setEnd(end)
.setStep(step);
facets.add(facetField);
} else {
Document documentValue = ((List<Document>) entry.getValue()).get(0);
MongoDBQueryUtils.Accumulator accumulator = getAccumulator(documentValue);
switch (accumulator) {
case max:
case min:
case avg:
case stdDevPop:
case stdDevSamp: {
List<Double> fieldValues = new ArrayList<>();
if (documentValue.get(accumulator.name()) instanceof Integer) {
fieldValues.add(1.0d * documentValue.getInteger(accumulator.name()));
} else if (documentValue.get(accumulator.name()) instanceof Long) {
fieldValues.add(1.0d * documentValue.getLong(accumulator.name()));
} else if (documentValue.get(accumulator.name()) instanceof List) {
List<Number> list = (List<Number>) documentValue.get(accumulator.name());
for (Number number : list) {
fieldValues.add(number.doubleValue());
}
} else {
fieldValues.add(documentValue.getDouble(accumulator.name()));
}
facets.add(new FacetField(documentValue.getString(INTERNAL_ID), accumulator.name(), fieldValues));
break;
}
default: {
// Do nothing, exception is raised
}
}
}
}
return facets;
}

private MongoDBQueryUtils.Accumulator getAccumulator(Document document) {
for (Map.Entry<String, Object> entry : document.entrySet()) {
try {
MongoDBQueryUtils.Accumulator accumulator = MongoDBQueryUtils.Accumulator.valueOf(entry.getKey());
return accumulator;
} catch (IllegalArgumentException e) {
// Do nothing
}
}
throw new IllegalArgumentException("No accumulators found in facet document: " + StringUtils.join(document.keySet(), ",")
+ " Valid accumulator functions: " + StringUtils.join(Arrays.asList(count, max, min, avg, stdDevPop, stdDevSamp), ","));
}

@Override
public Document convertToStorageType(List<FacetField> facetFields) {
throw new RuntimeException("Not yet implemented");
}
}
Loading