Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 98 additions & 7 deletions src/backend/distributed/deparser/deparse_statistics_stmts.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,15 @@
#include "catalog/namespace.h"
#include "lib/stringinfo.h"
#include "nodes/nodes.h"
#include "parser/parse_expr.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/ruleutils.h"

#include "pg_version_constants.h"

#include "distributed/citus_ruleutils.h"
#include "distributed/commands.h"
#include "distributed/deparser.h"
#include "distributed/listutils.h"
#include "distributed/relay_utility.h"
Expand All @@ -35,6 +39,8 @@ static void AppendStatTypes(StringInfo buf, CreateStatsStmt *stmt);
static void AppendColumnNames(StringInfo buf, CreateStatsStmt *stmt);
static void AppendTableName(StringInfo buf, CreateStatsStmt *stmt);

bool EnableUnsafeStatisticsExpressions = false;

char *
DeparseCreateStatisticsStmt(Node *node)
{
Expand Down Expand Up @@ -231,6 +237,42 @@ AppendStatTypes(StringInfo buf, CreateStatsStmt *stmt)
}


/* See ruleutils.c in postgres for the logic here. */
static bool
looks_like_function(Node *node)
{
if (node == NULL)
{
return false; /* probably shouldn't happen */
}
switch (nodeTag(node))
{
case T_FuncExpr:
{
/* OK, unless it's going to deparse as a cast */
return (((FuncExpr *) node)->funcformat == COERCE_EXPLICIT_CALL ||
((FuncExpr *) node)->funcformat == COERCE_SQL_SYNTAX);
}

case T_NullIfExpr:
case T_CoalesceExpr:
case T_MinMaxExpr:
case T_SQLValueFunction:
case T_XmlExpr:
{
/* these are all accepted by func_expr_common_subexpr */
return true;
}

default:
{
break;
}
}
return false;
}


static void
AppendColumnNames(StringInfo buf, CreateStatsStmt *stmt)
{
Expand All @@ -240,15 +282,64 @@ AppendColumnNames(StringInfo buf, CreateStatsStmt *stmt)
{
if (!column->name)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("only simple column references are allowed "
"in CREATE STATISTICS")));
if (EnableUnsafeStatisticsExpressions)
{
/*
* Since these expressions are parser statements, we first call
* transform to get the transformed Expr tree, and then deparse
* the transformed tree. This is similar to the logic found in
* deparse_table_stmts for check constraints.
*/
if (list_length(stmt->relations) != 1)
{
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg(
"cannot use expressions in CREATE STATISTICS with multiple relations")));
}

RangeVar *rel = (RangeVar *) linitial(stmt->relations);
bool missingOk = false;
Oid relOid = RangeVarGetRelid(rel, AccessShareLock, missingOk);

/* Add table name to the name space in parse state. Otherwise column names
* cannot be found.
*/
Relation relation = table_open(relOid, AccessShareLock);
ParseState *pstate = make_parsestate(NULL);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A take-it-or-leave-it suggestion: acquire the table lock and create the ParseState once at the top of the function so they're done once regardless of number of expressions in the CREATE STATISTICS. Not essential as we're in DDL, and it may not be worth complicating the code for, but just putting forward for consideration.

AddRangeTableEntryToQueryCompat(pstate, relation);
Node *exprCooked = transformExpr(pstate, column->expr,
EXPR_KIND_STATS_EXPRESSION);

char *relationName = get_rel_name(relOid);
List *relationCtx = deparse_context_for(relationName, relOid);

char *exprSql = deparse_expression(exprCooked, relationCtx, false, false);
relation_close(relation, NoLock);

/* Need parens if it's not a bare function call */
if (looks_like_function(exprCooked))
{
appendStringInfoString(buf, exprSql);
}
else
{
appendStringInfo(buf, "(%s)", exprSql);
}
}
else
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("only simple column references are allowed "
"in CREATE STATISTICS")));
}
}
else
{
const char *columnName = quote_identifier(column->name);

const char *columnName = quote_identifier(column->name);

appendStringInfoString(buf, columnName);
appendStringInfoString(buf, columnName);
}

if (column != llast(stmt->exprs))
{
Expand Down
14 changes: 14 additions & 0 deletions src/backend/distributed/shared_library_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -1605,6 +1605,20 @@ RegisterCitusConfigVariables(void)
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);

DefineCustomBoolVariable(
"citus.enable_unsafe_statistics_expressions",
gettext_noop("Enables the use of expressions in CREATE STATISTICS calls"),
gettext_noop(
"CREATE STATISTICS in citus currently only supports column name references."
"Enabling this GUC allows the use of expressions (introduced in PG14),"
"but the additional constraint validation on the expression to fail invalid expressions"
"is not validated, and so it is advised to use with caution."),
&EnableUnsafeStatisticsExpressions,
false,
PGC_USERSET,
GUC_STANDARD,
NULL, NULL, NULL);

DefineCustomBoolVariable(
"citus.enable_unsafe_triggers",
gettext_noop("Enables arbitrary triggers on distributed tables which may cause "
Expand Down
1 change: 1 addition & 0 deletions src/include/distributed/commands.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ extern bool EnableLocalReferenceForeignKeys;
extern bool AllowUnsafeConstraints;

extern bool EnableUnsafeTriggers;
extern bool EnableUnsafeStatisticsExpressions;

extern int MaxMatViewSizeToAutoRecreate;

Expand Down
122 changes: 88 additions & 34 deletions src/test/regress/expected/propagate_statistics.out
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,28 @@ SELECT create_distributed_table ('test_stats3','a');

(1 row)

-- test creating custom stats with expressions and distributing it.
CREATE TABLE sc2.test_stats_expr (
a int,
b int,
c float8
);
CREATE STATISTICS s_expr ON (a + b / 2) FROM sc2.test_stats_expr;
-- succeeds since we replicate it into the shards.
SELECT create_distributed_table('sc2.test_stats_expr', 'a');
create_distributed_table
---------------------------------------------------------------------

(1 row)

-- fails
CREATE STATISTICS s_expr_post ON (a - (b * 2)),round(c) FROM sc2.test_stats_expr;
ERROR: only simple column references are allowed in CREATE STATISTICS
-- succeeds.
set citus.enable_unsafe_statistics_expressions TO on;
-- add another expression stats on the distributed table should work.
CREATE STATISTICS s_expr_post ON (a - (b * 2)), round(c) FROM sc2.test_stats_expr;
reset citus.enable_unsafe_statistics_expressions;
-- test dropping statistics
CREATE TABLE test_stats4 (
a int,
Expand Down Expand Up @@ -112,7 +134,7 @@ WHERE stxnamespace IN (
)
AND stxname SIMILAR TO '%\_\d+'
ORDER BY stxname ASC;
stxname
stxname
---------------------------------------------------------------------
neW'Stat_980096
neW'Stat_980098
Expand Down Expand Up @@ -162,22 +184,54 @@ ORDER BY stxname ASC;
s2_980058
s2_980060
s2_980062
s9_980129
s9_980131
s9_980133
s9_980135
s9_980137
s9_980139
s9_980141
s9_980143
s9_980145
s9_980147
s9_980149
s9_980151
s9_980153
s9_980155
s9_980157
s9_980159
s9_980161
s9_980163
s9_980165
s9_980167
s9_980169
s9_980171
s9_980173
s9_980175
s9_980177
s9_980179
s9_980181
s9_980183
s9_980185
s9_980187
s9_980189
s9_980191
s_expr_980128
s_expr_980130
s_expr_980132
s_expr_980134
s_expr_980136
s_expr_980138
s_expr_980140
s_expr_980142
s_expr_980144
s_expr_980146
s_expr_980148
s_expr_980150
s_expr_980152
s_expr_980154
s_expr_980156
s_expr_980158
s_expr_post_980128
s_expr_post_980130
s_expr_post_980132
s_expr_post_980134
s_expr_post_980136
s_expr_post_980138
s_expr_post_980140
s_expr_post_980142
s_expr_post_980144
s_expr_post_980146
s_expr_post_980148
s_expr_post_980150
s_expr_post_980152
s_expr_post_980154
s_expr_post_980156
s_expr_post_980158
st1_new_980064
st1_new_980066
st1_new_980068
Expand All @@ -194,23 +248,23 @@ ORDER BY stxname ASC;
st1_new_980090
st1_new_980092
st1_new_980094
stats_xy_980161
stats_xy_980163
stats_xy_980165
stats_xy_980167
stats_xy_980169
stats_xy_980171
stats_xy_980173
stats_xy_980175
stats_xy_980177
stats_xy_980179
stats_xy_980181
stats_xy_980183
stats_xy_980185
stats_xy_980187
stats_xy_980189
stats_xy_980191
(96 rows)
stats_xy_980193
stats_xy_980195
stats_xy_980197
stats_xy_980199
stats_xy_980201
stats_xy_980203
stats_xy_980205
stats_xy_980207
stats_xy_980209
stats_xy_980211
stats_xy_980213
stats_xy_980215
stats_xy_980217
stats_xy_980219
stats_xy_980221
stats_xy_980223
(128 rows)

SELECT count(DISTINCT stxnamespace)
FROM pg_statistic_ext
Expand Down
21 changes: 21 additions & 0 deletions src/test/regress/sql/propagate_statistics.sql
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,27 @@ CREATE SCHEMA sc2;
CREATE STATISTICS sc2."neW'Stat" ON a,b FROM test_stats3;
SELECT create_distributed_table ('test_stats3','a');

-- test creating custom stats with expressions and distributing it.
CREATE TABLE sc2.test_stats_expr (
a int,
b int,
c float8
);
CREATE STATISTICS s_expr ON (a + b / 2) FROM sc2.test_stats_expr;

-- succeeds since we replicate it into the shards.
SELECT create_distributed_table('sc2.test_stats_expr', 'a');

-- fails
CREATE STATISTICS s_expr_post ON (a - (b * 2)),round(c) FROM sc2.test_stats_expr;

-- succeeds.
set citus.enable_unsafe_statistics_expressions TO on;

-- add another expression stats on the distributed table should work.
CREATE STATISTICS s_expr_post ON (a - (b * 2)), round(c) FROM sc2.test_stats_expr;
reset citus.enable_unsafe_statistics_expressions;

Comment thread
visridha marked this conversation as resolved.
-- test dropping statistics
CREATE TABLE test_stats4 (
a int,
Expand Down
Loading