Skip to content

Spring data relational generates incorrect single query for PostgreSQL with two one-to-many relationships present #2122

@const

Description

@const

The following test fails on PostgreSQL 17.6:

spring-data-jdbc-bug.zip

The script src/main/resources/db.sql in the file is a database schema and test data.

There are two one-to-many relationships in the test. And one is larger than another. The incorrect query is generated:

SELECT 
	case when rn_company_1 = rn THEN c_name_4 else null end as c_name_4, 
	case when rn_company_1 = rn THEN c_url_5 else null end as c_url_5, 
	case when rn_company_1 = rn THEN c_industry_6 else null end as c_industry_6, 
	case when rn_company_1 = rn THEN c_description_7 else null end as c_description_7, 
	case when rn_company_office_9 = rn THEN c_company_id_14 else null end as c_company_id_14, 
	case when rn_company_office_9 = rn THEN c_name_15 else null end as c_name_15, 
	case when rn_company_office_9 = rn THEN c_city_16 else null end as c_city_16, 
	case when rn_company_office_9 = rn THEN c_address_17 else null end as c_address_17, 
	c_office_id_13, 
	key_company_office_12, 
	case when rn_contact_person_19 = rn THEN c_company_id_24 else null end as c_company_id_24, 
	case when rn_contact_person_19 = rn THEN c_name_25 else null end as c_name_25, 
	case when rn_contact_person_19 = rn THEN c_position_26 else null end as c_position_26, 
	c_contact_person_id_23, 
	key_contact_person_22, 
	c_company_id_3 
FROM (
	SELECT c_name_4, c_url_5, c_industry_6, c_description_7, rn_company_1, c_company_id_3, c_company_id_14, 
		c_name_15, c_city_16, c_address_17, rn_company_office_9, c_office_id_13, br_company_office_11, 
		key_company_office_12, c_company_id_24, c_name_25, c_position_26, rn_contact_person_19, 
		c_contact_person_id_23, br_contact_person_21, key_contact_person_22, 
	       GREATEST(COALESCE(rn_company_1, 1), COALESCE(rn_company_office_9, 1), COALESCE(rn_contact_person_19, 1)) AS rn 
	FROM (
		SELECT 1 AS rn_company_1, 1 AS rc_company_2, "company"."company_id" AS c_company_id_3, "company"."name" AS c_name_4, 
			"company"."url" AS c_url_5, "company"."industry" AS c_industry_6, "company"."description" AS c_description_7 
		FROM "company" WHERE "company"."company_id" IN (
		--?, ?, ?, ?, ?, ?, ?, ?, ?, ?
 '0198b795-ce94-78b3-a2bf-847992d3fb68'
)) t_company_8 
			LEFT OUTER JOIN (
				SELECT row_number() OVER(
						PARTITION BY "company_office"."company_id" 
					ORDER BY "company_office"."company_id"
				) AS rn_company_office_9, 
				count(*) OVER(
					PARTITION BY "company_office"."company_id"
				) AS rc_company_office_10, 
				"company_office"."company_id" AS br_company_office_11, 
				row_number() OVER(
					PARTITION BY "company_office"."company_id" 
					ORDER BY "company_office"."company_id"
				) AS key_company_office_12, 
				"company_office"."office_id" AS c_office_id_13, 
				"company_office"."company_id" AS c_company_id_14, "company_office"."name" AS c_name_15, 
				"company_office"."city" AS c_city_16, "company_office"."address" AS c_address_17 
			FROM "company_office") t_company_office_18 ON c_company_id_3 = br_company_office_11
		LEFT OUTER JOIN (
			SELECT 
				row_number() OVER(
					PARTITION BY "contact_person"."company_id" 
					ORDER BY "contact_person"."company_id"
				) AS rn_contact_person_19, 
				count(*) OVER(PARTITION BY "contact_person"."company_id") AS rc_contact_person_20,
				"contact_person"."company_id" AS br_contact_person_21,
				row_number() OVER(
					PARTITION BY "contact_person"."company_id" 
					ORDER BY "contact_person"."company_id"
				) AS key_contact_person_22, 
				"contact_person"."contact_person_id" AS c_contact_person_id_23, 
				"contact_person"."company_id" AS c_company_id_24, "contact_person"."name" AS c_name_25, "contact_person"."position" AS c_position_26 
			FROM "contact_person"
		) t_contact_person_27 ON c_company_id_3 = br_contact_person_21 
		WHERE (rn_company_office_9 = rn_contact_person_19 OR rn_company_office_9 IS NULL OR rn_contact_person_19 IS NULL 
		   OR (rn_company_office_9 > rc_contact_person_20 AND rn_contact_person_19 = 1) 
		   OR (rn_contact_person_19 > rc_company_office_10 AND rn_company_office_9 = 1))
) main 
ORDER BY c_company_id_3, rn         

The problem seems to be that c_office_id_13 is not null when office is missing.

c_name_4 c_url_5 c_industry_6 c_description_7 c_company_id_14 c_name_15 c_city_16 c_address_17 c_office_id_13 key_company_office_12 c_company_id_24 c_name_25 c_position_26 c_contact_person_id_23 key_contact_person_22 c_company_id_3
Farrell-Roob R928229 http://www.colby-wisozk.biz:54185/odit?facilis=provident&labore=incidunt#a Legislative Office Upgradable leading edge project 0198b795-ce94-78b3-a2bf-847992d3fb68 Chanda Lear East Bellaton 37060 Farrell Drives, Strosinburgh, WV 41639 0198b795-ce94-7f16-8496-902393f58e70 1 0198b795-ce94-78b3-a2bf-847992d3fb68 Mrs. Len Homenick Orchestrator 0198b795-ce95-771e-893f-30663574852e 1 0198b795-ce94-78b3-a2bf-847992d3fb68
0198b795-ce94-78b3-a2bf-847992d3fb68 Sal A. Mander New Marlana Apt. 962 19078 Pete Parkways, Rutherfordmouth, SD 14193 0198b795-ce94-7ac9-80fb-1be5fa6890a2 2 0198b795-ce94-78b3-a2bf-847992d3fb68 Miss Carlos Lakin Analyst 0198b795-ce95-7074-b56d-a555705a3a9f 2 0198b795-ce94-78b3-a2bf-847992d3fb68
0198b795-ce94-78b3-a2bf-847992d3fb68 Brandon Cattell Turcotteshire 8019 Murazik Plains, Hanhmouth, NV 59964 0198b795-ce94-72f0-863b-ca44d7447629 3 0198b795-ce94-78b3-a2bf-847992d3fb68 Jacquelynn Kozey Architect 0198b795-ce95-7adf-bbec-f66bcfbf1943 3 0198b795-ce94-78b3-a2bf-847992d3fb68
0198b795-ce94-7f16-8496-902393f58e70 1 0198b795-ce94-78b3-a2bf-847992d3fb68 Laverna Turcotte I Designer 0198b795-ce95-7ff8-891b-a2ae54cc7d6c 4 0198b795-ce94-78b3-a2bf-847992d3fb68

This causes overriding data with nulls.

[ {
  "id" : "0198b795-ce94-78b3-a2bf-847992d3fb68",
  "name" : "Farrell-Roob R928229",
  "url" : "http://www.colby-wisozk.biz:54185/odit?facilis=provident&labore=incidunt#a",
  "industry" : "Legislative Office",
  "description" : "Upgradable leading edge project",
  "contactPersons" : [ {
    "id" : "0198b795-ce95-771e-893f-30663574852e",
    "companyId" : "0198b795-ce94-78b3-a2bf-847992d3fb68",
    "name" : "Mrs. Len Homenick",
    "position" : "Orchestrator"
  }, {
    "id" : "0198b795-ce95-7074-b56d-a555705a3a9f",
    "companyId" : "0198b795-ce94-78b3-a2bf-847992d3fb68",
    "name" : "Miss Carlos Lakin",
    "position" : "Analyst"
  }, {
    "id" : "0198b795-ce95-7adf-bbec-f66bcfbf1943",
    "companyId" : "0198b795-ce94-78b3-a2bf-847992d3fb68",
    "name" : "Jacquelynn Kozey",
    "position" : "Architect"
  }, {
    "id" : "0198b795-ce95-7ff8-891b-a2ae54cc7d6c",
    "companyId" : "0198b795-ce94-78b3-a2bf-847992d3fb68",
    "name" : "Laverna Turcotte I",
    "position" : "Designer"
  } ],
  "offices" : [ {
    "id" : "0198b795-ce94-7f16-8496-902393f58e70",
    "companyId" : null,
    "name" : null,
    "city" : null,
    "address" : null
  }, {
    "id" : "0198b795-ce94-7ac9-80fb-1be5fa6890a2",
    "companyId" : "0198b795-ce94-78b3-a2bf-847992d3fb68",
    "name" : "Sal A. Mander",
    "city" : "New Marlana",
    "address" : "Apt. 962 19078 Pete Parkways, Rutherfordmouth, SD 14193"
  }, {
    "id" : "0198b795-ce94-72f0-863b-ca44d7447629",
    "companyId" : "0198b795-ce94-78b3-a2bf-847992d3fb68",
    "name" : "Brandon Cattell",
    "city" : "Turcotteshire",
    "address" : "8019 Murazik Plains, Hanhmouth, NV 59964"
  } ]
} ]

The query also contains useless order by in fragments like:

				row_number() OVER(
					PARTITION BY "contact_person"."company_id" 
					ORDER BY "contact_person"."company_id"
				) AS key_contact_person_22, 

In the window "contact_person"."company_id" has the same value, so it makes no sense to sort by it.

I also think that on PostgreSQL the simpler strategy with CTE would have worked that would have produced more readable queries, I think other database should support it as well:

with company_data as (
    select
        row_number() over (order by c.name, c.company_id) level1,
        1 level2,
        c.*
    from company c
    where company_id in ('0198b795-ce94-78b3-a2bf-847992d3fb68')
), company_office_data as (
    select
        level1,
        row_number() over (
            partition by cd.level1
            order by co.name desc, co.office_id desc
        ) as level2,
        co.*
    from company_data cd
    join company_office co on co.company_id = cd.company_id
), contact_person_data as (
    select
        level1,
        row_number() over (
            partition by cd.level1
            order by cp.name, cp.contact_person_id
        ) as level2,
        cp.*
    from company_data cd
    join contact_person cp on cp.company_id = cd.company_id
)    
select
    cd.company_id as c_company_id,
    cd.description as c_description,
    cd.industry as c_industry,
    cd."name" as c_name,
    cd.url as c_url,
    cod.office_id as o_office_id,
    cod.address as o_address,
    cod.city as o_city,
    cod."name" as o_name,
    cpd.contact_person_id as p_contact_person_id,
    cpd.name as p_name,
    cpd.position as p_position
from company_data cd
full outer join company_office_data cod
    on cd.level1 = cod.level1 and cod.level2 = cd.level2
full outer join contact_person_data cpd
    on coalesce(cd.level1, cod.level1) = cpd.level1
    and coalesce(cod.level2, cd.level2) = cpd.level2
order by coalesce(cd.level1, cod.level1, cpd.level1), coalesce(cd.level2, cod.level2, cpd.level2, 1)

This strategy could be naturally extended to the nested one-to-many relationships and and one-to-one relationships.

If schema from test is extended as:

-- public.verification_info definition

-- Drop table

-- DROP TABLE public.verification_info;

CREATE TABLE IF NOT EXISTS public.verification_info (
	"comment" varchar(255) NULL,
	status varchar(255) NOT NULL,
	"timestamp" timestamptz(6) NOT NULL,
	username varchar(255) NOT NULL,
	company_id uuid NOT NULL,
	CONSTRAINT verification_info_pkey PRIMARY KEY (company_id),
	CONSTRAINT verification_info_status_check CHECK (((status)::text = ANY ((ARRAY['VERIFIED'::character varying, 'INVALID'::character varying])::text[]))),
	CONSTRAINT fk2631d1desupjf5fo8mtgd7srv FOREIGN KEY (company_id) REFERENCES public.company(company_id)
);
CREATE INDEX IF NOT EXISTS verification_info_by_company_id_idx ON public.verification_info USING btree (company_id);


-- public.contact_detail definition

-- Drop table

-- DROP TABLE public.contact_detail;

CREATE TABLE IF NOT EXISTS public.contact_detail (
	contact_person_id uuid NOT NULL,
	contact_type varchar(255) NOT NULL,
	value varchar(255) NOT NULL,
	CONSTRAINT contact_detail_contact_type_check CHECK (((contact_type)::text = ANY ((ARRAY['EMAIL'::character varying, 'PHONE'::character varying, 'TELEGRAM'::character varying])::text[]))),
	CONSTRAINT contact_detail_pkey PRIMARY KEY (contact_person_id, contact_type, value),
	CONSTRAINT fkh7yxxcj9rqwapyld8ll4kswqs FOREIGN KEY (contact_person_id) REFERENCES public.contact_person(contact_person_id)
);
CREATE INDEX IF NOT EXISTS contact_detail_by_contact_person_id_idx ON public.contact_detail USING btree (contact_person_id);
CREATE INDEX IF NOT EXISTS contact_detail_by_value_contact_person_id_idx ON public.contact_detail USING btree (value, contact_person_id);


INSERT INTO contact_detail (contact_person_id,contact_type,value) VALUES
	 ('0198b795-ce95-7074-b56d-a555705a3a9f'::uuid,'EMAIL','[email protected]'),
	 ('0198b795-ce95-771e-893f-30663574852e'::uuid,'PHONE','(983) 409-5386'),
	 ('0198b795-ce95-771e-893f-30663574852e'::uuid,'EMAIL','[email protected]'),
	 ('0198b795-ce95-7adf-bbec-f66bcfbf1943'::uuid,'PHONE','(936) 459-0173'),
	 ('0198b795-ce95-7ff8-891b-a2ae54cc7d6c'::uuid,'PHONE','(730) 214-9396'),
	 ('0198b795-ce95-7ff8-891b-a2ae54cc7d6c'::uuid,'EMAIL','[email protected]') ON CONFLICT DO NOTHING;
INSERT INTO verification_info
(company_id, "comment", status, "timestamp", username)
VALUES( '0198b795-ce94-78b3-a2bf-847992d3fb68'::uuid, NULL, 'VERIFIED', '2024-09-01 18:12:23.528', 'maira.schimmel') ON CONFLICT DO NOTHING;

The full query is only incrementally more complex, as the level 3 is needed to be reduced to the level 2.

with company_data as (
    select
        row_number() over (order by c.name, c.company_id) level1,
        1 level2,
        c.*
    from company c
    where company_id in ('0198b795-ce94-78b3-a2bf-847992d3fb68')
), company_office_data as (
    select
        level1,
        row_number() over (
            partition by cd.level1
            order by co.name, co.office_id
        ) as level2,
        co.*
    from company_data cd
    join company_office co on co.company_id = cd.company_id
), contact_person_data as (
    select
        level1,
        row_number() over (
            partition by cd.level1
            order by cp.name, cp.contact_person_id
        ) as level2,
        1 level3,
        cp.*
    from company_data cd
    join contact_person cp on cp.company_id = cd.company_id
), contact_detail_data as (
    select
        level1,
        level2,
        row_number() over (
            partition by cpd.level1, cpd.level2
            order by cd.contact_type, cd.value
        ) level3,
        cd.*
    from contact_person_data cpd
    join contact_detail cd on cpd.contact_person_id = cd.contact_person_id
), contact_person_detail_data as (
    select
        coalesce(cpd.level1, cdd.level1) as level1,
        row_number() over (
            partition by coalesce(cpd.level1, cdd.level1)
            order by coalesce(cpd.level2, cdd.level2), coalesce(cdd.level3, 1)
        ) as level2,
        cpd.contact_person_id as p_contact_person_id,
        cpd."name" as p_name,
    		cpd."position" as p_position,
	    cdd.contact_type as d_contact_type,
        cdd.value as d_value
    from contact_person_data cpd
    full outer join contact_detail_data cdd
      on cpd.level1 = cdd.level1 and cpd.level2 = cdd.level2 and cdd.level3 = cpd.level3
)
select
	cd.company_id as c_company_id,
	cd.description as c_description,
	cd.industry as c_industry,
	cd."name" as c_name,
	vi.company_id as vi_company_id,
	vi."comment" as vi_comment,
	vi.status as vi_status,
	vi."timestamp" as vi_timestamp,
	vi.username as vi_username,
	cd.url as c_url,
	cod.office_id as o_office_id,
	cod.address as o_address,
	cod.city as o_city,
	cod."name" as o_name,
	cpd.p_contact_person_id,
	cpd.p_name,
	cpd.p_position,
	cpd.d_contact_type,
	cpd.d_value
from company_data cd
left join verification_info vi on cd.company_id = vi.company_id
full outer join company_office_data cod
    on cd.level1 = cod.level1 and cod.level2 = cd.level2
full outer join contact_person_detail_data cpd
    on coalesce(cd.level1, cod.level1) = cpd.level1
    and coalesce(cod.level2, cd.level2) = cpd.level2
order by coalesce(cd.level1, cod.level1, cpd.level1), coalesce(cd.level2, cod.level2, cpd.level2, 1)

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions