From bd8beadba14c52b68ab6a4bbe8b3d7a91867227e Mon Sep 17 00:00:00 2001 From: Cam Saul <1455846+camsaul@users.noreply.github.com> Date: Thu, 26 Mar 2020 11:17:01 -1000 Subject: [PATCH] Attempt to infer better base type than type/* for native queries; fix H2 NPE (#12197) * If a driver returns base type `:type/*` (i.e., unknown type) in column metadata for native query results, attempt to infer better type by scanning values sample * Fix NPE in H2 native query results for columns where JDBC metadata `.getColumnClassName` returned nil * Rename `annotate/column-info*` to `annotate/merged-column-info` (makes the purpose of the function clearer) --- .../src/metabase/driver/druid/execute.clj | 2 +- src/metabase/driver/h2.clj | 3 +- src/metabase/query_processor.clj | 2 +- .../query_processor/middleware/annotate.clj | 78 ++++++++++++------- test/metabase/driver/h2_test.clj | 10 +++ .../middleware/annotate_test.clj | 39 ++++++---- 6 files changed, 85 insertions(+), 49 deletions(-) diff --git a/modules/drivers/druid/src/metabase/driver/druid/execute.clj b/modules/drivers/druid/src/metabase/driver/druid/execute.clj index 8f23c4b521085..d7cf740566463 100644 --- a/modules/drivers/druid/src/metabase/driver/druid/execute.clj +++ b/modules/drivers/druid/src/metabase/driver/druid/execute.clj @@ -132,7 +132,7 @@ vec) (-> result :results first keys)) metadata (result-metadata col-names) - annotate-col-names (map (comp keyword :name) (annotate/column-info* outer-query metadata))] + annotate-col-names (map (comp keyword :name) (annotate/merged-column-info outer-query metadata))] (respond metadata (result-rows result col-names annotate-col-names)))) (defn execute-reducible-query diff --git a/src/metabase/driver/h2.clj b/src/metabase/driver/h2.clj index 8ee61b68d73b5..2453a52ece1ab 100644 --- a/src/metabase/driver/h2.clj +++ b/src/metabase/driver/h2.clj @@ -297,7 +297,8 @@ ;; de-CLOB any CLOB values that come back (defmethod sql-jdbc.execute/read-column-thunk :h2 [_ ^ResultSet rs ^ResultSetMetaData rsmeta ^Integer i] - (let [classname (Class/forName (.getColumnClassName rsmeta i) true (classloader/the-classloader))] + (let [classname (some-> (.getColumnClassName rsmeta i) + (Class/forName true (classloader/the-classloader)))] (if (isa? classname Clob) (fn [] (jdbc-protocols/clob->str (.getObject rs i))) diff --git a/src/metabase/query_processor.clj b/src/metabase/query_processor.clj index 7c75d95f55366..9546c679b2d13 100644 --- a/src/metabase/query_processor.clj +++ b/src/metabase/query_processor.clj @@ -162,7 +162,7 @@ (qp.store/with-store (let [preprocessed (query->preprocessed query)] (driver/with-driver (driver.u/database->driver (:database preprocessed)) - (seq (annotate/column-info* preprocessed nil)))))) + (seq (annotate/merged-column-info preprocessed nil)))))) (defn query->native "Return the native form for `query` (e.g. for a MBQL query on Postgres this would return a map containing the compiled diff --git a/src/metabase/query_processor/middleware/annotate.clj b/src/metabase/query_processor/middleware/annotate.clj index 898d53a904cef..d4d8e259c30c9 100644 --- a/src/metabase/query_processor/middleware/annotate.clj +++ b/src/metabase/query_processor/middleware/annotate.clj @@ -73,30 +73,39 @@ :first-row (first rows) :type error-type/qp})))))) +(defn- native-column-info-for-a-single-column + "Determine column metadata for a single column for native query results." + [{col-name :name, driver-base-type :base_type, :as col} values-sample] + ;; Native queries don't have the type information from the original `Field` objects used in the query. + ;; + ;; If the driver returned a base type more specific than :type/*, use that; otherwise look at the sample + ;; of rows and infer the base type based on the classes of the values + (let [col-name (name col-name) + base-type (or (when-not (= driver-base-type :type/*) + driver-base-type) + (driver.common/values->base-type values-sample) + :type/*)] + (merge + {:display_name (u/qualified-name col-name) + :base_type base-type + :source :native} + ;; It is perfectly legal for a driver to return a column with a blank name; for example, SQL Server does this + ;; for aggregations like `count(*)` if no alias is used. However, it is *not* legal to use blank names in MBQL + ;; `:field-literal` clauses, because `SELECT ""` doesn't make any sense. So if we can't return a valid + ;; `:field-literal`, omit the `:field_ref`. + (when (seq col-name) + {:field_ref [:field-literal col-name base-type]}) + col + {:base_type base-type}))) + (defmethod column-info :native [_ {:keys [cols rows]}] (check-driver-native-columns cols rows) - ;; Infer the types of columns by looking at the first value for each in the results. Native queries don't have the - ;; type information from the original `Field` objects used in the query. - (vec - (for [i (range (count cols)) - :let [{col-name :name, :as col} (nth cols i) - col-name (name col-name) - base-type (or (:base_type col) - (driver.common/values->base-type (for [row rows] - (nth row i))) - :type/*)]] - (merge - {:display_name (u/qualified-name col-name) - :base_type base-type - :source :native} - ;; It is perfectly legal for a driver to return a column with a blank name; for example, SQL Server does this - ;; for aggregations like `count(*)` if no alias is used. However, it is *not* legal to use blank names in MBQL - ;; `:field-literal` clauses, because `SELECT ""` doesn't make any sense. So if we can't return a valid - ;; `:field-literal`, omit the `:field_ref`. - (when (seq col-name) - {:field_ref [:field-literal col-name base-type]}) - col)))) + (mapv native-column-info-for-a-single-column + cols + (for [i (range (count cols))] + (for [row rows] + (nth row i))))) ;;; +----------------------------------------------------------------------------------------------------------------+ @@ -517,19 +526,28 @@ It's the responsibility of the driver to make sure the `:cols` are returned in the correct number and order." [cols cols-returned-by-driver] (if (seq cols-returned-by-driver) - (map (fn [col driver-col] + (map (fn [our-col-metadata driver-col-metadata] ;; 1. Prefer our `:name` if it's something different that what's returned by the driver ;; (e.g. for named aggregations) - ;; 2. Then, prefer any non-nil keys returned by the driver - ;; 3. Finally, merge in any of our other keys - (merge col (m/filter-vals some? driver-col) (u/select-non-nil-keys col [:name]))) + ;; 2. Prefer our inferred base type if the driver returned `:type/*` and ours is more specific + ;; 3. Then, prefer any non-nil keys returned by the driver + ;; 4. Finally, merge in any of our other keys + (let [non-nil-driver-col-metadata (m/filter-vals some? driver-col-metadata) + our-base-type (when (= (:base_type driver-col-metadata) :type/*) + (u/select-non-nil-keys our-col-metadata [:base_type])) + our-name (u/select-non-nil-keys our-col-metadata [:name])] + (merge our-col-metadata + non-nil-driver-col-metadata + our-base-type + our-name))) cols cols-returned-by-driver) cols)) -(s/defn column-info* :- ColsWithUniqueNames - "Returns deduplicated `:cols` metadata given a query and the initial results metadata returned by the driver's impl - of `execute-reducible-query`." +(s/defn merged-column-info :- ColsWithUniqueNames + "Returns deduplicated and merged column metadata (`:cols`) for query results by combining (a) the initial results + metadata returned by the driver's impl of `execute-reducible-query` and (b) column metadata inferred by logic in + this namespace." [query {cols-returned-by-driver :cols, :as result}] ;; merge in `:cols` if returned by the driver, then make sure the `:name` of each map in `:cols` is unique, since ;; the FE uses it as a key for stuff like column settings @@ -547,7 +565,7 @@ (fn combine [result sampled-rows] (rf (cond-> result (map? result) - (assoc-in [:data :cols] (column-info* query (assoc metadata :rows sampled-rows)))))))) + (assoc-in [:data :cols] (merged-column-info query (assoc metadata :rows sampled-rows)))))))) (defn add-column-info "Middleware for adding type information about the columns in the query results (the `:cols` key)." @@ -557,7 +575,7 @@ query (fn [metadata] (if (= query-type :query) - (rff (assoc metadata :cols (column-info* query metadata))) + (rff (assoc metadata :cols (merged-column-info query metadata))) ;; rows sampling is only needed for native queries! TODO ­ not sure we really even need to do for native ;; queries... (add-column-info-xform query metadata (rff metadata)))) diff --git a/test/metabase/driver/h2_test.clj b/test/metabase/driver/h2_test.clj index 350601e9fdc4a..f843dbf565c81 100644 --- a/test/metabase/driver/h2_test.clj +++ b/test/metabase/driver/h2_test.clj @@ -97,3 +97,13 @@ :field_ref [:field-literal "NAME" :type/Text] :name "NAME"}] (mt/cols results)))))))) + +(deftest native-query-date-trunc-test + (mt/test-driver :h2 + (testing "A native query that doesn't return a column class name metadata should work correctly (#12150)" + (is (= [{:display_name "D" + :base_type :type/DateTime + :source :native + :field_ref [:field-literal "D" :type/DateTime] + :name "D"}] + (mt/cols (qp/process-query (mt/native-query {:query "SELECT date_trunc('day', DATE) AS D FROM CHECKINS LIMIT 5;"})))))))) diff --git a/test/metabase/query_processor/middleware/annotate_test.clj b/test/metabase/query_processor/middleware/annotate_test.clj index 97ab9982c1308..4545ce2c44a3d 100644 --- a/test/metabase/query_processor/middleware/annotate_test.clj +++ b/test/metabase/query_processor/middleware/annotate_test.clj @@ -21,22 +21,29 @@ ;;; +----------------------------------------------------------------------------------------------------------------+ (deftest native-column-info-test - (testing (str "make sure that `column-info` for `:native` queries can still infer types even if the initial value(s) " - "are `nil` (#4256)") - (is (= [{:name "a", :display_name "a", :base_type :type/Integer, :source :native, :field_ref [:field-literal "a" :type/Integer]} - {:name "b", :display_name "b", :base_type :type/Integer, :source :native, :field_ref [:field-literal "b" :type/Integer]}] - (annotate/column-info - {:type :native} - {:cols [{:name "a"} {:name "b"}] - :rows [[1 nil] [2 nil] [3 nil] [4 5] [6 7]]})))) - - (testing (str "make sure that `column-info` for `:native` queries defaults `base_type` to `type/*` if there are no " - "non-nil values when we peek.") - (is (= [{:name "a", :display_name "a", :base_type :type/*, :source :native, :field_ref [:field-literal "a" :type/*]}] - (annotate/column-info - {:type :native} - {:cols [{:name "a"}] - :rows [[nil]]}))))) + (testing "native column info" + (testing "should still infer types even if the initial value(s) are `nil` (#4256)" + (is (= [{:name "a", :display_name "a", :base_type :type/Integer, :source :native, :field_ref [:field-literal "a" :type/Integer]} + {:name "b", :display_name "b", :base_type :type/Integer, :source :native, :field_ref [:field-literal "b" :type/Integer]}] + (annotate/column-info + {:type :native} + {:cols [{:name "a"} {:name "b"}] + :rows [[1 nil] [2 nil] [3 nil] [4 5] [6 7]]})))) + + (testing "should use default `base_type` of `type/*` if there are no non-nil values in the sample" + (is (= [{:name "a", :display_name "a", :base_type :type/*, :source :native, :field_ref [:field-literal "a" :type/*]}] + (annotate/column-info + {:type :native} + {:cols [{:name "a"}] + :rows [[nil]]})))) + + (testing "should attempt to infer better base type if driver returns :type/* (#12150)" + ;; `merged-column-info` handles merging info returned by driver & inferred by annotate + (is (= [{:name "a", :display_name "a", :base_type :type/Integer, :source :native, :field_ref [:field-literal "a" :type/Integer]}] + (annotate/merged-column-info + {:type :native} + {:cols [{:name "a", :base_type :type/*}] + :rows [[1] [2] [nil] [3]]})))))) ;;; +----------------------------------------------------------------------------------------------------------------+