Skip to content

Commit 4996aa0

Browse files
committed
Merge remote-tracking branch 'origin/main' into feat-mongodb
# Conflicts: # composer.lock # src/Database/Database.php # src/Database/Validator/Index.php # src/Database/Validator/Sequence.php # tests/e2e/Adapter/Base.php # tests/e2e/Adapter/Scopes/DocumentTests.php # tests/e2e/Adapter/Scopes/IndexTests.php # tests/unit/Validator/StructureTest.php
2 parents b4e3d55 + d8fc1bb commit 4996aa0

32 files changed

+4778
-419
lines changed

composer.lock

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

postgres.dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ RUN apt-get update \
44
&& apt-get install -y --no-install-recommends \
55
postgresql-16-postgis-3 \
66
postgresql-16-postgis-3-scripts \
7+
postgresql-16-pgvector \
78
&& rm -rf /var/lib/apt/lists/*

src/Database/Adapter.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,13 @@ abstract public function getSupportForGetConnectionId(): bool;
10281028
*/
10291029
abstract public function getSupportForUpserts(): bool;
10301030

1031+
/**
1032+
* Is vector type supported?
1033+
*
1034+
* @return bool
1035+
*/
1036+
abstract public function getSupportForVectors(): bool;
1037+
10311038
/**
10321039
* Is Cache Fallback supported?
10331040
*

src/Database/Adapter/MariaDB.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ public function createCollection(string $name, array $attributes = [], array $in
139139
$indexAttributes[$nested] = "`{$indexAttribute}`{$indexLength} {$indexOrder}";
140140

141141
if (!empty($hash[$indexAttribute]['array']) && $this->getSupportForCastIndexArray()) {
142-
$indexAttributes[$nested] = '(CAST(`' . $indexAttribute . '` AS char(' . Database::ARRAY_INDEX_LENGTH . ') ARRAY))';
142+
$indexAttributes[$nested] = '(CAST(`' . $indexAttribute . '` AS char(' . Database::MAX_ARRAY_INDEX_LENGTH . ') ARRAY))';
143143
}
144144
}
145145

@@ -746,7 +746,7 @@ public function createIndex(string $collection, string $id, string $type, array
746746
$attributes[$i] = "`{$attr}`{$length} {$order}";
747747

748748
if ($this->getSupportForCastIndexArray() && !empty($attribute['array'])) {
749-
$attributes[$i] = '(CAST(`' . $attr . '` AS char(' . Database::ARRAY_INDEX_LENGTH . ') ARRAY))';
749+
$attributes[$i] = '(CAST(`' . $attr . '` AS char(' . Database::MAX_ARRAY_INDEX_LENGTH . ') ARRAY))';
750750
}
751751
}
752752

@@ -1890,7 +1890,7 @@ public function getSupportForDistanceBetweenMultiDimensionGeometryInMeters(): bo
18901890

18911891
public function getSpatialSQLType(string $type, bool $required): string
18921892
{
1893-
$srid = Database::SRID;
1893+
$srid = Database::DEFAULT_SRID;
18941894
$nullability = '';
18951895

18961896
if (!$this->getSupportForSpatialIndexNull()) {

src/Database/Adapter/Mongo.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2966,6 +2966,11 @@ public function getSupportForOrderRandom(): bool
29662966
return false;
29672967
}
29682968

2969+
public function getSupportForVectors(): bool
2970+
{
2971+
return false;
2972+
}
2973+
29692974
/**
29702975
* Flattens the array.
29712976
*

src/Database/Adapter/MySQL.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ protected function handleDistanceSpatialQueries(Query $query, array &$binds, str
117117
}
118118

119119
if ($useMeters) {
120-
$attr = "ST_SRID({$alias}.{$attribute}, " . Database::SRID . ")";
120+
$attr = "ST_SRID({$alias}.{$attribute}, " . Database::DEFAULT_SRID . ")";
121121
$geom = $this->getSpatialGeomFromText(":{$placeholder}_0", null);
122122
return "ST_Distance({$attr}, {$geom}, 'metre') {$operator} :{$placeholder}_1";
123123
}

src/Database/Adapter/Pool.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,11 @@ public function getSupportForUpserts(): bool
410410
return $this->delegate(__FUNCTION__, \func_get_args());
411411
}
412412

413+
public function getSupportForVectors(): bool
414+
{
415+
return $this->delegate(__FUNCTION__, \func_get_args());
416+
}
417+
413418
public function getSupportForCacheSkipOnFailure(): bool
414419
{
415420
return $this->delegate(__FUNCTION__, \func_get_args());

src/Database/Adapter/Postgres.php

Lines changed: 106 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -147,15 +147,16 @@ public function create(string $name): bool
147147
->prepare($sql)
148148
->execute();
149149

150-
// extension for supporting spatial types
151-
$this->getPDO()->prepare('CREATE EXTENSION IF NOT EXISTS postgis;')->execute();
150+
// Enable extensions
151+
$this->getPDO()->prepare('CREATE EXTENSION IF NOT EXISTS postgis')->execute();
152+
$this->getPDO()->prepare('CREATE EXTENSION IF NOT EXISTS vector')->execute();
152153

153154
$collation = "
154-
CREATE COLLATION IF NOT EXISTS utf8_ci (
155+
CREATE COLLATION IF NOT EXISTS utf8_ci_ai (
155156
provider = icu,
156-
locale = 'und-u-ks-primary',
157+
locale = 'und-u-ks-level1',
157158
deterministic = false
158-
);
159+
)
159160
";
160161
$this->getPDO()->prepare($collation)->execute();
161162
return $dbCreation;
@@ -193,9 +194,6 @@ public function createCollection(string $name, array $attributes = [], array $in
193194
$namespace = $this->getNamespace();
194195
$id = $this->filter($name);
195196

196-
/** @var array<string> $attributeStrings */
197-
$attributeStrings = [];
198-
199197
/** @var array<string> $attributeStrings */
200198
$attributeStrings = [];
201199
foreach ($attributes as $attribute) {
@@ -244,14 +242,14 @@ public function createCollection(string $name, array $attributes = [], array $in
244242

245243
if ($this->sharedTables) {
246244
$collection .= "
247-
CREATE UNIQUE INDEX \"{$namespace}_{$this->tenant}_{$id}_uid\" ON {$this->getSQLTable($id)} (\"_uid\", \"_tenant\");
245+
CREATE UNIQUE INDEX \"{$namespace}_{$this->tenant}_{$id}_uid\" ON {$this->getSQLTable($id)} (\"_uid\" COLLATE utf8_ci_ai, \"_tenant\");
248246
CREATE INDEX \"{$namespace}_{$this->tenant}_{$id}_created\" ON {$this->getSQLTable($id)} (_tenant, \"_createdAt\");
249247
CREATE INDEX \"{$namespace}_{$this->tenant}_{$id}_updated\" ON {$this->getSQLTable($id)} (_tenant, \"_updatedAt\");
250248
CREATE INDEX \"{$namespace}_{$this->tenant}_{$id}_tenant_id\" ON {$this->getSQLTable($id)} (_tenant, _id);
251249
";
252250
} else {
253251
$collection .= "
254-
CREATE UNIQUE INDEX \"{$namespace}_{$id}_uid\" ON {$this->getSQLTable($id)} (\"_uid\");
252+
CREATE UNIQUE INDEX \"{$namespace}_{$id}_uid\" ON {$this->getSQLTable($id)} (\"_uid\" COLLATE utf8_ci_ai);
255253
CREATE INDEX \"{$namespace}_{$id}_created\" ON {$this->getSQLTable($id)} (\"_createdAt\");
256254
CREATE INDEX \"{$namespace}_{$id}_updated\" ON {$this->getSQLTable($id)} (\"_updatedAt\");
257255
";
@@ -279,7 +277,7 @@ public function createCollection(string $name, array $attributes = [], array $in
279277
} else {
280278
$permissions .= "
281279
CREATE UNIQUE INDEX \"{$namespace}_{$id}_ukey\"
282-
ON {$this->getSQLTable($id . '_perms')} USING btree (_document,_type,_permission);
280+
ON {$this->getSQLTable($id . '_perms')} USING btree (_document COLLATE utf8_ci_ai,_type,_permission);
283281
CREATE INDEX \"{$namespace}_{$id}_permission\"
284282
ON {$this->getSQLTable($id . '_perms')} USING btree (_permission,_type);
285283
";
@@ -443,6 +441,16 @@ public function analyzeCollection(string $collection): bool
443441
*/
444442
public function createAttribute(string $collection, string $id, string $type, int $size, bool $signed = true, bool $array = false, bool $required = false): bool
445443
{
444+
// Ensure pgvector extension is installed for vector types
445+
if ($type === Database::VAR_VECTOR) {
446+
if ($size <= 0) {
447+
throw new DatabaseException('Vector dimensions must be a positive integer');
448+
}
449+
if ($size > Database::MAX_VECTOR_DIMENSIONS) {
450+
throw new DatabaseException('Vector dimensions cannot exceed ' . Database::MAX_VECTOR_DIMENSIONS);
451+
}
452+
}
453+
446454
$name = $this->filter($collection);
447455
$id = $this->filter($id);
448456
$type = $this->getSQLType($type, $size, $signed, $array, $required);
@@ -543,7 +551,23 @@ public function updateAttribute(string $collection, string $id, string $type, in
543551
$name = $this->filter($collection);
544552
$id = $this->filter($id);
545553
$newKey = empty($newKey) ? null : $this->filter($newKey);
546-
$type = $this->getSQLType($type, $size, $signed, $array, $required);
554+
555+
if ($type === Database::VAR_VECTOR) {
556+
if ($size <= 0) {
557+
throw new DatabaseException('Vector dimensions must be a positive integer');
558+
}
559+
if ($size > Database::MAX_VECTOR_DIMENSIONS) {
560+
throw new DatabaseException('Vector dimensions cannot exceed ' . Database::MAX_VECTOR_DIMENSIONS);
561+
}
562+
}
563+
564+
$type = $this->getSQLType(
565+
$type,
566+
$size,
567+
$signed,
568+
$array,
569+
$required,
570+
);
547571

548572
if ($type == 'TIMESTAMP(3)') {
549573
$type = "TIMESTAMP(3) without time zone USING TO_TIMESTAMP(\"$id\", 'YYYY-MM-DD HH24:MI:SS.MS')";
@@ -841,7 +865,6 @@ public function createIndex(string $collection, string $id, string $type, array
841865
$collection = $this->filter($collection);
842866
$id = $this->filter($id);
843867

844-
845868
foreach ($attributes as $i => $attr) {
846869
$order = empty($orders[$i]) || Database::INDEX_FULLTEXT === $type ? '' : $orders[$i];
847870

@@ -852,42 +875,38 @@ public function createIndex(string $collection, string $id, string $type, array
852875
default => $this->filter($attr),
853876
};
854877

855-
if (Database::INDEX_UNIQUE === $type) {
856-
if (isset($indexAttributeTypes[$attr]) && $indexAttributeTypes[$attr] === Database::VAR_STRING) {
857-
$attributes[$i] = "\"{$attr}\" COLLATE utf8_ci {$order}";
858-
} else {
859-
$attributes[$i] = "\"{$attr}\" {$order}";
860-
}
861-
} else {
862-
$attributes[$i] = "\"{$attr}\" {$order}";
863-
}
878+
$attributes[$i] = "\"{$attr}\" {$order}";
864879
}
865880

866881
$sqlType = match ($type) {
867882
Database::INDEX_KEY,
868-
Database::INDEX_FULLTEXT => 'INDEX',
883+
Database::INDEX_FULLTEXT,
884+
Database::INDEX_SPATIAL,
885+
Database::INDEX_HNSW_EUCLIDEAN,
886+
Database::INDEX_HNSW_COSINE,
887+
Database::INDEX_HNSW_DOT => 'INDEX',
869888
Database::INDEX_UNIQUE => 'UNIQUE INDEX',
870-
Database::INDEX_SPATIAL => 'INDEX',
871-
default => throw new DatabaseException('Unknown index type: ' . $type . '. Must be one of ' . Database::INDEX_KEY . ', ' . Database::INDEX_UNIQUE . ', ' . Database::INDEX_FULLTEXT . ', ' . Database::INDEX_SPATIAL),
889+
default => throw new DatabaseException('Unknown index type: ' . $type . '. Must be one of ' . Database::INDEX_KEY . ', ' . Database::INDEX_UNIQUE . ', ' . Database::INDEX_FULLTEXT . ', ' . Database::INDEX_SPATIAL . ', ' . Database::INDEX_HNSW_EUCLIDEAN . ', ' . Database::INDEX_HNSW_COSINE . ', ' . Database::INDEX_HNSW_DOT),
872890
};
873891

874892
$key = "\"{$this->getNamespace()}_{$this->tenant}_{$collection}_{$id}\"";
875893
$attributes = \implode(', ', $attributes);
876894

877-
// Spatial indexes can't include _tenant because GIST indexes require all columns to have compatible operator classes
878-
if ($this->sharedTables && $type !== Database::INDEX_FULLTEXT && $type !== Database::INDEX_SPATIAL) {
895+
if ($this->sharedTables && \in_array($type, [Database::INDEX_KEY, Database::INDEX_UNIQUE])) {
879896
// Add tenant as first index column for best performance
880897
$attributes = "_tenant, {$attributes}";
881898
}
882899

883900
$sql = "CREATE {$sqlType} {$key} ON {$this->getSQLTable($collection)}";
884901

885-
// Add USING GIST for spatial indexes
886-
if ($type === Database::INDEX_SPATIAL) {
887-
$sql .= " USING GIST";
888-
}
889-
890-
$sql .= " ({$attributes});";
902+
// Add USING clause for special index types
903+
$sql .= match ($type) {
904+
Database::INDEX_SPATIAL => " USING GIST ({$attributes})",
905+
Database::INDEX_HNSW_EUCLIDEAN => " USING HNSW ({$attributes} vector_l2_ops)",
906+
Database::INDEX_HNSW_COSINE => " USING HNSW ({$attributes} vector_cosine_ops)",
907+
Database::INDEX_HNSW_DOT => " USING HNSW ({$attributes} vector_ip_ops)",
908+
default => " ({$attributes})",
909+
};
891910

892911
$sql = $this->trigger(Database::EVENT_INDEX_CREATE, $sql);
893912

@@ -908,7 +927,7 @@ public function createIndex(string $collection, string $id, string $type, array
908927
*/
909928
public function deleteIndex(string $collection, string $id): bool
910929
{
911-
$name = $this->filter($collection);
930+
$collection = $this->filter($collection);
912931
$id = $this->filter($id);
913932
$schemaName = $this->getDatabase();
914933

@@ -1488,7 +1507,7 @@ protected function handleDistanceSpatialQueries(Query $query, array &$binds, str
14881507

14891508
if ($meters) {
14901509
$attr = "({$alias}.{$attribute}::geography)";
1491-
$geom = "ST_SetSRID(" . $this->getSpatialGeomFromText(":{$placeholder}_0", null) . ", " . Database::SRID . ")::geography";
1510+
$geom = "ST_SetSRID(" . $this->getSpatialGeomFromText(":{$placeholder}_0", null) . ", " . Database::DEFAULT_SRID . ")::geography";
14921511
return "ST_Distance({$attr}, {$geom}) {$operator} :{$placeholder}_1";
14931512
}
14941513

@@ -1613,6 +1632,11 @@ protected function getSQLCondition(Query $query, array &$binds): string
16131632
$binds[":{$placeholder}_0"] = $this->getFulltextValue($query->getValue());
16141633
return "NOT (to_tsvector(regexp_replace({$attribute}, '[^\w]+',' ','g')) @@ websearch_to_tsquery(:{$placeholder}_0))";
16151634

1635+
case Query::TYPE_VECTOR_DOT:
1636+
case Query::TYPE_VECTOR_COSINE:
1637+
case Query::TYPE_VECTOR_EUCLIDEAN:
1638+
return ''; // Handled in ORDER BY clause
1639+
16161640
case Query::TYPE_BETWEEN:
16171641
$binds[":{$placeholder}_0"] = $query->getValues()[0];
16181642
$binds[":{$placeholder}_1"] = $query->getValues()[1];
@@ -1631,8 +1655,6 @@ protected function getSQLCondition(Query $query, array &$binds): string
16311655
case Query::TYPE_NOT_CONTAINS:
16321656
if ($query->onArray()) {
16331657
$operator = '@>';
1634-
} else {
1635-
$operator = null;
16361658
}
16371659

16381660
// no break
@@ -1673,6 +1695,37 @@ protected function getSQLCondition(Query $query, array &$binds): string
16731695
}
16741696
}
16751697

1698+
/**
1699+
* Get vector distance calculation for ORDER BY clause
1700+
*
1701+
* @param Query $query
1702+
* @param array<string, mixed> $binds
1703+
* @param string $alias
1704+
* @return string|null
1705+
* @throws DatabaseException
1706+
*/
1707+
protected function getVectorDistanceOrder(Query $query, array &$binds, string $alias): ?string
1708+
{
1709+
$query->setAttribute($this->getInternalKeyForAttribute($query->getAttribute()));
1710+
1711+
$attribute = $this->filter($query->getAttribute());
1712+
$attribute = $this->quote($attribute);
1713+
$alias = $this->quote($alias);
1714+
$placeholder = ID::unique();
1715+
1716+
$values = $query->getValues();
1717+
$vectorArray = $values[0] ?? [];
1718+
$vector = \json_encode(\array_map(\floatval(...), $vectorArray));
1719+
$binds[":vector_{$placeholder}"] = $vector;
1720+
1721+
return match ($query->getMethod()) {
1722+
Query::TYPE_VECTOR_DOT => "({$alias}.{$attribute} <#> :vector_{$placeholder}::vector)",
1723+
Query::TYPE_VECTOR_COSINE => "({$alias}.{$attribute} <=> :vector_{$placeholder}::vector)",
1724+
Query::TYPE_VECTOR_EUCLIDEAN => "({$alias}.{$attribute} <-> :vector_{$placeholder}::vector)",
1725+
default => null,
1726+
};
1727+
}
1728+
16761729
/**
16771730
* @param string $value
16781731
* @return string
@@ -1740,15 +1793,17 @@ protected function getSQLType(string $type, int $size, bool $signed = true, bool
17401793
case Database::VAR_DATETIME:
17411794
return 'TIMESTAMP(3)';
17421795

1743-
// in all other DB engines, 4326 is the default SRID
17441796
case Database::VAR_POINT:
1745-
return 'GEOMETRY(POINT,' . Database::SRID . ')';
1797+
return 'GEOMETRY(POINT,' . Database::DEFAULT_SRID . ')';
17461798

17471799
case Database::VAR_LINESTRING:
1748-
return 'GEOMETRY(LINESTRING,' . Database::SRID . ')';
1800+
return 'GEOMETRY(LINESTRING,' . Database::DEFAULT_SRID . ')';
17491801

17501802
case Database::VAR_POLYGON:
1751-
return 'GEOMETRY(POLYGON,' . Database::SRID . ')';
1803+
return 'GEOMETRY(POLYGON,' . Database::DEFAULT_SRID . ')';
1804+
1805+
case Database::VAR_VECTOR:
1806+
return "VECTOR({$size})";
17521807

17531808
default:
17541809
throw new DatabaseException('Unknown Type: ' . $type . '. Must be one of ' . Database::VAR_STRING . ', ' . Database::VAR_INTEGER . ', ' . Database::VAR_FLOAT . ', ' . Database::VAR_BOOLEAN . ', ' . Database::VAR_DATETIME . ', ' . Database::VAR_RELATIONSHIP . ', ' . Database::VAR_POINT . ', ' . Database::VAR_LINESTRING . ', ' . Database::VAR_POLYGON);
@@ -1897,6 +1952,16 @@ public function getSupportForUpserts(): bool
18971952
return true;
18981953
}
18991954

1955+
/**
1956+
* Is vector type supported?
1957+
*
1958+
* @return bool
1959+
*/
1960+
public function getSupportForVectors(): bool
1961+
{
1962+
return true;
1963+
}
1964+
19001965
/**
19011966
* @return string
19021967
*/

0 commit comments

Comments
 (0)