Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion src/backend/gpopt/gpdbwrappers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1613,7 +1613,24 @@ gpdb::IsOpHashJoinable(Oid opno, Oid inputtype)
GP_WRAP_START;
{
/* catalog tables: pg_operator */
return op_hashjoinable(opno, inputtype);
if (!op_hashjoinable(opno, inputtype))
return false;

/*
* Even if oprcanhash is true, we need to verify that hash functions
* actually exist for this operator. This is because oprcanhash can be
* set to true while the operator is only registered in a btree opfamily
* and not in a hash opfamily, which would cause execution-time errors
* when trying to build hash tables.
*
* See get_op_hash_functions() in lsyscache.c which requires operators
* to be registered in a hash opfamily (amopmethod == HASH_AM_OID).
*/
RegProcedure hash_proc;
if (!get_op_hash_functions(opno, &hash_proc, NULL))
return false;

return true;
}
GP_WRAP_END;
return false;
Expand Down
166 changes: 166 additions & 0 deletions src/test/regress/expected/gp_hashagg.out
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,169 @@ EXPLAIN (COSTS OFF, VERBOSE) :qry;

(1 row)

drop schema if exists tinc_base_types cascade;
NOTICE: schema "tinc_base_types" does not exist, skipping
create schema tinc_base_types;
set search_path=tinc_base_types;
CREATE FUNCTION int42_in(cstring)
RETURNS int42
AS 'int4in'
LANGUAGE internal IMMUTABLE STRICT;
NOTICE: type "int42" is not yet defined
DETAIL: Creating a shell type definition.
CREATE FUNCTION int42_out(int42)
RETURNS cstring
AS 'int4out'
LANGUAGE internal IMMUTABLE STRICT;
NOTICE: argument type int42 is only a shell
CREATE TYPE int42 (
internallength = 4,
input = int42_in,
output = int42_out,
alignment = int4,
default = 42,
passedbyvalue
);
CREATE TABLE aTABLE(k int, a int42);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'k' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO aTABLE VALUES(1, '21');
INSERT INTO aTABLE VALUES(2, '22');
INSERT INTO aTABLE VALUES(3, '23');
INSERT INTO aTABLE VALUES(4, '24');
SELECT * FROM aTABLE;
k | a
---+----
2 | 22
3 | 23
4 | 24
1 | 21
(4 rows)

CREATE OR REPLACE FUNCTION my_lt(int42, int42)
RETURNS boolean AS
'int4lt'
language internal;
CREATE OR REPLACE FUNCTION my_lteq(int42, int42)
RETURNS boolean AS
'int4le'
language internal;
CREATE OR REPLACE FUNCTION my_gt(int42, int42)
RETURNS boolean AS
'int4gt'
language internal;
CREATE OR REPLACE FUNCTION my_gteq(int42, int42)
RETURNS boolean AS
'int4ge'
language internal;
CREATE OR REPLACE FUNCTION my_eq(int42, int42)
RETURNS boolean AS
'int4eq'
language internal;
CREATE OPERATOR < (
leftarg = int42,
rightarg = int42,
procedure = my_lt,
hashes
);
CREATE OPERATOR <= (
leftarg = int42,
rightarg = int42,
procedure = my_lteq,
hashes
);
CREATE OPERATOR = (
leftarg = int42,
rightarg = int42,
procedure = my_eq,
hashes
);
CREATE OPERATOR > (
leftarg = int42,
rightarg = int42,
procedure = my_gt,
hashes
);
CREATE OPERATOR >= (
leftarg = int42,
rightarg = int42,
procedure = my_gteq,
hashes
);
CREATE OR REPLACE FUNCTION my_comp_func(int42, int42)
RETURNS int AS
$$
BEGIN
IF $1 < $2 THEN
return -1;
ELSIF $1 = $2 THEN
return 0;
ELSE
return 1;
END IF;
END;
$$ LANGUAGE PLPGSQL;
CREATE OPERATOR CLASS my_operator_class_comp_type
DEFAULT
FOR TYPE int42 USING btree
AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 my_comp_func(int42, int42);
CREATE OPERATOR CLASS my_operator_class_comp_type_hash_lt
DEFAULT
FOR TYPE int42 USING hash
AS
OPERATOR 1 <,
FUNCTION 1 my_comp_func(int42, int42);
ERROR: hash function 1 must have one argument
CREATE OPERATOR CLASS my_operator_class_comp_type_hash_eq
FOR TYPE int42 USING hash
AS
OPERATOR 1 =,
FUNCTION 1 my_comp_func(int42, int42);
ERROR: hash function 1 must have one argument
select * from atable group by a,k;
k | a
---+----
1 | 21
2 | 22
3 | 23
4 | 24
(4 rows)

-- Before fix: This would fail at runtime with:
-- ERROR: could not find hash function for hash operator XXXXX (execGrouping.c:118)
-- After fix: ORCA should detect missing hash function at planning time
-- and either use GroupAgg or report a clear planning error
SET optimizer_enable_groupagg=off;
select * from atable group by a,k;
k | a
---+----
1 | 21
2 | 22
3 | 23
4 | 24
(4 rows)

drop schema if exists tinc_base_types cascade;
NOTICE: drop cascades to 16 other objects
DETAIL: drop cascades to function int42_out(int42)
drop cascades to type int42
drop cascades to function int42_in(cstring)
drop cascades to table atable
drop cascades to function my_lt(int42,int42)
drop cascades to function my_lteq(int42,int42)
drop cascades to function my_gt(int42,int42)
drop cascades to function my_gteq(int42,int42)
drop cascades to function my_eq(int42,int42)
drop cascades to operator <(int42,int42)
drop cascades to operator <=(int42,int42)
drop cascades to operator =(int42,int42)
drop cascades to operator >(int42,int42)
drop cascades to operator >=(int42,int42)
drop cascades to function my_comp_func(int42,int42)
drop cascades to operator family my_operator_class_comp_type for access method btree
138 changes: 138 additions & 0 deletions src/test/regress/sql/gp_hashagg.sql
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,141 @@ HAVING max(c) = '31'
$$ AS qry \gset
EXPLAIN (COSTS OFF, VERBOSE) :qry;
:qry;

drop schema if exists tinc_base_types cascade;

create schema tinc_base_types;
set search_path=tinc_base_types;


CREATE FUNCTION int42_in(cstring)
RETURNS int42
AS 'int4in'
LANGUAGE internal IMMUTABLE STRICT;
CREATE FUNCTION int42_out(int42)
RETURNS cstring
AS 'int4out'
LANGUAGE internal IMMUTABLE STRICT;
CREATE TYPE int42 (
internallength = 4,
input = int42_in,
output = int42_out,
alignment = int4,
default = 42,
passedbyvalue
);

CREATE TABLE aTABLE(k int, a int42);
INSERT INTO aTABLE VALUES(1, '21');
INSERT INTO aTABLE VALUES(2, '22');
INSERT INTO aTABLE VALUES(3, '23');
INSERT INTO aTABLE VALUES(4, '24');
SELECT * FROM aTABLE;

CREATE OR REPLACE FUNCTION my_lt(int42, int42)
RETURNS boolean AS
'int4lt'
language internal;

CREATE OR REPLACE FUNCTION my_lteq(int42, int42)
RETURNS boolean AS
'int4le'
language internal;

CREATE OR REPLACE FUNCTION my_gt(int42, int42)
RETURNS boolean AS
'int4gt'
language internal;

CREATE OR REPLACE FUNCTION my_gteq(int42, int42)
RETURNS boolean AS
'int4ge'
language internal;

CREATE OR REPLACE FUNCTION my_eq(int42, int42)
RETURNS boolean AS
'int4eq'
language internal;

CREATE OPERATOR < (
leftarg = int42,
rightarg = int42,
procedure = my_lt,
hashes
);

CREATE OPERATOR <= (
leftarg = int42,
rightarg = int42,
procedure = my_lteq,
hashes
);

CREATE OPERATOR = (
leftarg = int42,
rightarg = int42,
procedure = my_eq,
hashes
);

CREATE OPERATOR > (
leftarg = int42,
rightarg = int42,
procedure = my_gt,
hashes
);

CREATE OPERATOR >= (
leftarg = int42,
rightarg = int42,
procedure = my_gteq,
hashes
);

CREATE OR REPLACE FUNCTION my_comp_func(int42, int42)
RETURNS int AS
$$
BEGIN
IF $1 < $2 THEN
return -1;
ELSIF $1 = $2 THEN
return 0;
ELSE
return 1;
END IF;
END;
$$ LANGUAGE PLPGSQL;

CREATE OPERATOR CLASS my_operator_class_comp_type
DEFAULT
FOR TYPE int42 USING btree
AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 my_comp_func(int42, int42);

CREATE OPERATOR CLASS my_operator_class_comp_type_hash_lt
DEFAULT
FOR TYPE int42 USING hash
AS
OPERATOR 1 <,
FUNCTION 1 my_comp_func(int42, int42);

CREATE OPERATOR CLASS my_operator_class_comp_type_hash_eq
FOR TYPE int42 USING hash
AS
OPERATOR 1 =,
FUNCTION 1 my_comp_func(int42, int42);

select * from atable group by a,k;
-- Before fix: This would fail at runtime with:
-- ERROR: could not find hash function for hash operator XXXXX (execGrouping.c:118)
-- After fix: ORCA should detect missing hash function at planning time
-- and either use GroupAgg or report a clear planning error
SET optimizer_enable_groupagg=off;
select * from atable group by a,k;

drop schema if exists tinc_base_types cascade;
Loading