RANK() query fails with two derived tables

These first two queries both work:

select *
FROM
(
    -- leaf_node(a, fact_sales)
    SELECT fact_sales."store_id" AS "store__id", max_dim_date."the_month" AS "month_of_year__id",
        SUM(fact_sales."sales_amt") AS "fact_sales_sales_amt", SUM(fact_sales."tax_amt") AS "fact_sales_tax_amt"
    FROM fact_sales
    JOIN max_dim_date
      ON max_dim_date.the_date = fact_sales.the_date
    GROUP BY fact_sales."store_id", max_dim_date."the_month"
) AS r2_1
LEFT OUTER JOIN
(
    -- leaf_node(a, fact_discounts)
    SELECT fact_discounts."store_id" AS "store__id", max_dim_date."the_month" AS "month_of_year__id",
        SUM(fact_discounts."discount_amt") AS "fact_discounts_discount_amt"
    FROM fact_discounts
    JOIN max_dim_date
      ON max_dim_date.the_date = fact_discounts.the_date
    GROUP BY fact_discounts."store_id", max_dim_date."the_month"
) AS r2_2
  ON r2_2."store__id" = r2_1."store__id"
 AND r2_2."month_of_year__id" = r2_1."month_of_year__id"
select *,
    RANK() OVER (PARTITION BY r2_1."store__id" ORDER BY r2_1."fact_sales_sales_amt" DESC) AS "__window_pred1"
FROM
(
    -- leaf_node(a, fact_sales)
    SELECT fact_sales."store_id" AS "store__id", max_dim_date."the_month" AS "month_of_year__id",
        SUM(fact_sales."sales_amt") AS "fact_sales_sales_amt", SUM(fact_sales."tax_amt") AS "fact_sales_tax_amt"
    FROM fact_sales
    JOIN max_dim_date
      ON max_dim_date.the_date = fact_sales.the_date
    GROUP BY fact_sales."store_id", max_dim_date."the_month"
) AS r2_1

But this next one fails with an error (I’ll try to get the server log info in the future):

(TIOStreamTransport.java:read:130:org.apache.thrift.transport.TTransportException: Socket is closed by peer.

select *,
    RANK() OVER (PARTITION BY r2_1."store__id" ORDER BY r2_1."fact_sales_sales_amt" DESC) AS "__window_pred1"
FROM
(
    -- leaf_node(a, fact_sales)
    SELECT fact_sales."store_id" AS "store__id", max_dim_date."the_month" AS "month_of_year__id",
        SUM(fact_sales."sales_amt") AS "fact_sales_sales_amt", SUM(fact_sales."tax_amt") AS "fact_sales_tax_amt"
    FROM fact_sales
    JOIN max_dim_date
      ON max_dim_date.the_date = fact_sales.the_date
    GROUP BY fact_sales."store_id", max_dim_date."the_month"
) AS r2_1
LEFT OUTER JOIN
(
    -- leaf_node(a, fact_discounts)
    SELECT fact_discounts."store_id" AS "store__id", max_dim_date."the_month" AS "month_of_year__id",
        SUM(fact_discounts."discount_amt") AS "fact_discounts_discount_amt"
    FROM fact_discounts
    JOIN max_dim_date
      ON max_dim_date.the_date = fact_discounts.the_date
    GROUP BY fact_discounts."store_id", max_dim_date."the_month"
) AS r2_2
  ON r2_2."store__id" = r2_1."store__id"
 AND r2_2."month_of_year__id" = r2_1."month_of_year__id"

FWIW, the following query uses partitioned RANK() but only against a single derived table, and it fails with a generic server error.

-- dataset_join_node(out, [out_out])
SELECT r2_1."month__id" AS "month__id", r2_1."store__id" AS "store__id",
    r2_1."sales_rating" AS "sales_rating",
    RANK() OVER (PARTITION BY r2_1."month__id" ORDER BY r2_1."sales_rating" DESC) AS "__window_pred1"
FROM
(
    -- dataset_join_node(out_out, [out_t])
    SELECT r3_1."month__id" AS "month__id", r3_1."store__id" AS "store__id",
        (CASE WHEN 6 = 0 THEN NULL ELSE (r3_1."a" + r3_1."b") / (6 * 1.0) END) * 100 AS "sales_rating"
    FROM
    (
        -- leaf_join_node(out_t, [fact_sales, fact_discounts])
        SELECT COALESCE(r4_1."month__id", r4_2."month__id") AS "month__id", COALESCE(r4_1."store__id", r4_2."store__id") AS "store__id",
            (r4_1."fact_sales_sales_amt" - .3) * 5 AS "a", (CASE WHEN (r4_1."fact_sales_tax_amt" - r4_2."fact_discounts_discount_amt") = 0 THEN NULL ELSE r4_1."fact_sales_sales_amt" / ((r4_1."fact_sales_tax_amt" - r4_2."fact_discounts_discount_amt") * 1.0) END - 3) * .25 AS "b"
        FROM
        (
            -- leaf_node(out_t, fact_sales)
            SELECT max_dim_date."month_year_name" AS "month__id", fact_sales."store_id" AS "store__id",
                SUM(fact_sales."sales_amt") AS "fact_sales_sales_amt", SUM(fact_sales."tax_amt") AS "fact_sales_tax_amt"
            FROM fact_sales
            JOIN max_dim_date
              ON max_dim_date.the_date = fact_sales.the_date
             AND (max_dim_date.the_year = 2015)
            GROUP BY max_dim_date."month_year_name", fact_sales."store_id"
        ) AS r4_1
        LEFT OUTER JOIN
        (
            -- leaf_node(out_t, fact_discounts)
            SELECT max_dim_date."month_year_name" AS "month__id", fact_discounts."store_id" AS "store__id",
                SUM(fact_discounts."discount_amt") AS "fact_discounts_discount_amt"
            FROM fact_discounts
            JOIN max_dim_date
              ON max_dim_date.the_date = fact_discounts.the_date
             AND (max_dim_date.the_year = 2015)
            GROUP BY max_dim_date."month_year_name", fact_discounts."store_id"
        ) AS r4_2
          ON r4_2."month__id" = r4_1."month__id"
         AND r4_2."store__id" = r4_1."store__id"
    ) AS r3_1
) AS r2_1

-Larry

Seeing something similar with LAG() when there’s a join.

SELECT r1_1."month__id" AS "month__id", r1_1."region__id" AS "region__id",
    LAG(r1_1."fact_sales_sales_amt", 2) OVER(PARTITION BY r1_1."region__id" ORDER BY max_dim_date_seq."month_year_sequence") AS "metric1"
FROM
(
    -- leaf_node(out, fact_sales)
    SELECT max_dim_date."month_year_name" AS "month__id", dim_store."region_id" AS "region__id",
        SUM(fact_sales."sales_amt") AS "fact_sales_sales_amt"
    FROM fact_sales
    JOIN max_dim_date
      ON max_dim_date.the_date = fact_sales.the_date
     AND (max_dim_date.the_year = 2015)
    JOIN dim_store
      ON dim_store.store_id = fact_sales.store_id
    GROUP BY max_dim_date."month_year_name", dim_store."region_id"
) AS r1_1
JOIN
(
    SELECT DISTINCT month_year_name, month_year_sequence
    FROM max_dim_date
) AS max_dim_date_seq
  ON max_dim_date_seq.month_year_name = r1_1.month__id