Custom expression: use an improved field resolver (metabase#19100)

Field references in custom expression must be correctly resolved as dimensions, segments, or metrics. For instance, `[Rating] > 4 AND [Expensive]` means that [Rating] is a dimension and [Expensive] is a segment. Before this change, this process was carried out inside the parser. This led to various corner cases which were not handled well, e.g. `COALESCE(CASE([X], [Y]))` apparently resolved both [X] and [Y] as dimensions. This was incorrect, as [X] has to be a segment, since it's part of the conditional clause in CASE(). With this change, all field references are resolved as an additional pass. Thus, the resolver has the full visibility of the operand types for both unary and binary operators, as well as argument types of all functions, including variadic ones such as CONCAT, COALESCE, and CASE.
driphub · Nov 30, 2021 · 8ea40b6 · 8ea40b6
1 parent 5455029
commit 8ea40b6
Show file tree

Hide file tree

Showing 3 changed files with 276 additions and 3 deletions.
diff --git a/frontend/src/metabase/lib/expressions/compile.js b/frontend/src/metabase/lib/expressions/compile.js
@@ -10,6 +10,7 @@ import {
 
 import { MBQL_CLAUSES } from "./config";
 import { ExpressionCstVisitor, parse } from "./parser";
+import { resolve } from "./resolver";
 
 const NEGATIVE_FILTER_SHORTHANDS = {
   contains: "does-not-contain",
@@ -171,6 +172,15 @@ export function compile({ cst, ...options }) {
   if (!cst) {
     ({ cst } = parse(options));
   }
+  const { startRule } = options;
+
+  const stubResolve = (kind, name) => [kind || "dimension", name];
+  const vistor = new ExpressionMBQLCompilerVisitor({
+    ...options,
+    resolve: stubResolve,
+  });
+  const expr = vistor.visit(cst);
+
   function resolveMBQLField(kind, name) {
     if (kind === "metric") {
       const metric = parseMetric(name, options);
@@ -193,9 +203,12 @@ export function compile({ cst, ...options }) {
       return dimension.mbql();
     }
   }
-  const resolve = options.resolve ? options.resolve : resolveMBQLField;
-  const vistor = new ExpressionMBQLCompilerVisitor({ ...options, resolve });
-  return vistor.visit(cst);
+
+  return resolve(
+    expr,
+    startRule,
+    options.resolve ? options.resolve : resolveMBQLField,
+  );
 }
 
 export function parseOperators(operands, operators) {

diff --git a/frontend/src/metabase/lib/expressions/resolver.js b/frontend/src/metabase/lib/expressions/resolver.js
@@ -0,0 +1,92 @@
+import { OPERATOR as OP } from "./tokenizer";
+import { MBQL_CLAUSES } from "./index";
+
+const FIELD_MARKERS = ["dimension", "segment", "metric"];
+const LOGICAL_OPS = [OP.Not, OP.And, OP.Or];
+const NUMBER_OPS = [OP.Plus, OP.Minus, OP.Star, OP.Slash];
+const COMPARISON_OPS = [
+  OP.Equal,
+  OP.NotEqual,
+  OP.GreaterThan,
+  OP.LessThan,
+  OP.GreaterThanEqual,
+  OP.LessThanEqual,
+];
+
+const MAP_TYPE = {
+  boolean: "segment",
+  aggregation: "metric",
+};
+
+const EQUIVALENT_FILTERS = {
+  "does-not-contain": "contains",
+  "not-null": "is-null",
+  "not-empty": "is-empty",
+};
+
+function findMBQL(op) {
+  let clause = MBQL_CLAUSES[op];
+  if (!clause) {
+    const alt = EQUIVALENT_FILTERS[op];
+    if (alt) {
+      clause = MBQL_CLAUSES[alt];
+    }
+  }
+  return clause;
+}
+
+export function resolve(expression, type, fn) {
+  if (Array.isArray(expression)) {
+    const [op, ...operands] = expression;
+
+    if (FIELD_MARKERS.includes(op)) {
+      const kind = MAP_TYPE[type] || "dimension";
+      const [name] = operands;
+      return fn ? fn(kind, name) : [kind, name];
+    }
+
+    let operandType = null;
+    if (LOGICAL_OPS.includes(op)) {
+      operandType = "boolean";
+    } else if (NUMBER_OPS.includes(op) || op === "coalesce") {
+      operandType = type;
+    } else if (COMPARISON_OPS.includes(op)) {
+      operandType = "expression";
+    } else if (op === "concat") {
+      operandType = "string";
+    } else if (op === "case") {
+      const [pairs, options] = operands;
+
+      const resolvedPairs = pairs.map(([tst, val]) => [
+        resolve(tst, "boolean", fn),
+        resolve(val, type, fn),
+      ]);
+
+      if (options && "default" in options) {
+        const resolvedOptions = {
+          default: resolve(options.default, type, fn),
+        };
+        return [op, resolvedPairs, resolvedOptions];
+      }
+
+      return [op, resolvedPairs];
+    }
+
+    if (operandType) {
+      return [
+        op,
+        ...operands.map(operand => resolve(operand, operandType, fn)),
+      ];
+    }
+
+    const clause = findMBQL(op);
+    if (clause) {
+      const { args } = clause;
+      return [
+        op,
+        ...operands.map((operand, i) => resolve(operand, args[i], fn)),
+      ];
+    }
+  }
+  return expression;
+}
diff --git a/frontend/test/metabase/lib/expressions/resolver.unit.spec.js b/frontend/test/metabase/lib/expressions/resolver.unit.spec.js
@@ -0,0 +1,168 @@
+import { resolve } from "metabase/lib/expressions/resolver";
+
+describe("metabase/lib/expressions/resolve", () => {
+  function collect(expr, startRule = "expression") {
+    const dimensions = [];
+    const segments = [];
+    const metrics = [];
+
+    resolve(expr, startRule, (kind, name) => {
+      switch (kind) {
+        case "dimension":
+          dimensions.push(name);
+          break;
+        case "segment":
+          segments.push(name);
+          break;
+        case "metric":
+          metrics.push(name);
+          break;
+      }
+      return [kind, name];
+    });
+
+    return { dimensions, segments, metrics };
+  }
+
+  // handy references
+  const A = ["dimension", "A"];
+  const B = ["dimension", "B"];
+  const C = ["dimension", "C"];
+  const P = ["dimension", "P"];
+  const Q = ["dimension", "Q"];
+  const R = ["dimension", "R"];
+  const S = ["dimension", "S"];
+
+  describe("for filters", () => {
+    const filter = e => collect(e, "boolean");
+
+    it("should resolve segments correctly", () => {
+      expect(filter(A).segments).toEqual(["A"]);
+      expect(filter(["not", B]).segments).toEqual(["B"]);
+      expect(filter(["not", ["not", C]]).segments).toEqual(["C"]);
+      expect(filter([">", P, 3]).segments).toEqual([]);
+      expect(filter(["and", ["<", Q, 1], R]).segments).toEqual(["R"]);
+      expect(filter(["is-null", S]).segments).toEqual([]);
+      expect(filter(["not-empty", S]).segments).toEqual([]);
+      expect(filter(["lower", A]).segments).toEqual([]);
+      expect(filter(["sqrt", B]).segments).toEqual([]);
+      expect(filter(["contains", C, "SomeString"]).segments).toEqual([]);
+      expect(filter(["or", P, [">", Q, 3]]).segments).toEqual(["P"]);
+    });
+
+    it("should resolve dimensions correctly", () => {
+      expect(filter(A).dimensions).toEqual([]);
+      expect(filter(["not", B]).dimensions).toEqual([]);
+      expect(filter(["not", ["not", C]]).dimensions).toEqual([]);
+      expect(filter([">", P, 3]).dimensions).toEqual(["P"]);
+      expect(filter(["and", ["<", Q, 1], R]).dimensions).toEqual(["Q"]);
+      expect(filter(["is-null", Q]).dimensions).toEqual(["Q"]);
+      expect(filter(["not-empty", S]).dimensions).toEqual(["S"]);
+      expect(filter(["lower", A]).dimensions).toEqual(["A"]);
+      expect(filter(["sqrt", B]).dimensions).toEqual(["B"]);
+      expect(filter(["contains", C, "SomeString"]).dimensions).toEqual(["C"]);
+      expect(filter(["or", P, [">", Q, 3]]).dimensions).toEqual(["Q"]);
+    });
+  });
+
+  describe("for expressions (for custom columns)", () => {
+    const expr = e => collect(e, "expression");
+
+    it("should resolve segments correctly", () => {
+      expect(expr(["trim", A]).segments).toEqual([]);
+      expect(expr(["round", B]).segments).toEqual([]);
+      expect(expr(["concat", S]).segments).toEqual([]);
+      expect(expr(["concat", A, B]).segments).toEqual([]);
+      expect(expr(["coalesce", P]).segments).toEqual([]);
+      expect(expr(["coalesce", P, Q, R]).segments).toEqual([]);
+    });
+
+    it("should resolve dimensions correctly", () => {
+      expect(expr(["trim", A]).dimensions).toEqual(["A"]);
+      expect(expr(["round", B]).dimensions).toEqual(["B"]);
+      expect(expr(["concat", S]).dimensions).toEqual(["S"]);
+      expect(expr(["concat", A, B]).dimensions).toEqual(["A", "B"]);
+      expect(expr(["coalesce", P]).dimensions).toEqual(["P"]);
+      expect(expr(["coalesce", P, Q, R]).dimensions).toEqual(["P", "Q", "R"]);
+    });
+  });
+
+  describe("for aggregations", () => {
+    const aggregation = e => collect(e, "aggregation");
+
+    it("should resolve dimensions correctly", () => {
+      expect(aggregation(A).dimensions).toEqual([]);
+      expect(aggregation(["cum-sum", B]).dimensions).toEqual(["B"]);
+      expect(aggregation(["-", 5, ["avg", C]]).dimensions).toEqual(["C"]);
+      expect(aggregation(["share", [">", P, 3]]).dimensions).toEqual(["P"]);
+      expect(aggregation(["max", ["*", 4, Q]]).dimensions).toEqual(["Q"]);
+      expect(aggregation(["+", R, ["median", S]]).dimensions).toEqual(["S"]);
+    });
+
+    it("should resolve metrics correctly", () => {
+      expect(aggregation(A).metrics).toEqual(["A"]);
+      expect(aggregation(["cum-sum", B]).metrics).toEqual([]);
+      expect(aggregation(["-", 5, ["avg", C]]).metrics).toEqual([]);
+      expect(aggregation(["share", [">", P, 3]]).metrics).toEqual([]);
+      expect(aggregation(["max", ["*", 4, Q]]).metrics).toEqual([]);
+      expect(aggregation(["+", R, ["median", S]]).metrics).toEqual(["R"]);
+    });
+  });
+
+  describe("for CASE expressions", () => {
+    const expr = e => collect(e, "expression");
+    it("should handle CASE with two arguments", () => {
+      // CASE(A,B)
+      expect(expr(["case", [[A, B]]]).segments).toEqual(["A"]);
+      expect(expr(["case", [[A, B]]]).dimensions).toEqual(["B"]);
+    });
+    it("should handle CASE with three arguments", () => {
+      // CASE(P, Q, R)
+      const opt = { default: R };
+      expect(expr(["case", [[P, Q]], opt]).segments).toEqual(["P"]);
+      expect(expr(["case", [[P, Q]], opt]).dimensions).toEqual(["Q", "R"]);
+    });
+    it("should handle CASE with four arguments", () => {
+      // CASE(A, B, P, Q)
+      const ab = [A, B];
+      const pq = [P, Q];
+      expect(expr(["case", [ab, pq]]).segments).toEqual(["A", "P"]);
+      expect(expr(["case", [ab, pq]]).dimensions).toEqual(["B", "Q"]);
+    });
+    it("should handle CASE with five arguments", () => {
+      // CASE(A, B, P, Q, R)
+      const ab = [A, B];
+      const pq = [P, Q];
+      const opt = { default: R };
+      expect(expr(["case", [ab, pq], opt]).segments).toEqual(["A", "P"]);
+      expect(expr(["case", [ab, pq], opt]).dimensions).toEqual(["B", "Q", "R"]);
+    });
+    it("should handle CASE with two complex arguments", () => {
+      // CASE(P < 2, Q)
+      expect(expr(["case", [[["<", P, 2], Q]]]).segments).toEqual([]);
+      expect(expr(["case", [[["<", P, 2], Q]]]).dimensions).toEqual(["P", "Q"]);
+    });
+    it("should handle nested CASE", () => {
+      // CASE(P, Q, CASE(A, B))
+      const opt = { default: ["case", [[A, B]]] };
+      expect(expr(["case", [[P, Q]], opt]).segments).toEqual(["P", "A"]);
+      expect(expr(["case", [[P, Q]], opt]).dimensions).toEqual(["Q", "B"]);
+    });
+    it("should handle CASE inside COALESCE", () => {
+      // COALESCE(CASE(A, B))
+      expect(expr(["coalesce", ["case", [[A, B]]]]).segments).toEqual(["A"]);
+      expect(expr(["coalesce", ["case", [[A, B]]]]).dimensions).toEqual(["B"]);
+    });
+  });
+
+  it("should handle unknown MBQL gracefully", () => {
+    expect(() => collect(["abc-xyz", B])).not.toThrow();
+  });
+
+  it("should not fail on literal 0", () => {
+    const opt = { default: 0 };
+    expect(resolve(["case", [[1, 0]]])).toEqual(["case", [[1, 0]]]);
+    expect(resolve(["case", [[1, 0]], opt])).toEqual(["case", [[1, 0]], opt]);
+    expect(resolve(["case", [[1, 2]], opt])).toEqual(["case", [[1, 2]], opt]);
+  });
+});