Skip to content

Commit

Permalink
Attempt to infer better base type than type/* for native queries; fix…
Browse files Browse the repository at this point in the history
… H2 NPE (metabase#12197)

*  If a driver returns base type `:type/*` (i.e., unknown type) in column metadata for native query results, attempt to infer better type by scanning values sample
*  Fix NPE in H2 native query results for columns where JDBC metadata `.getColumnClassName` returned nil
*  Rename `annotate/column-info*` to `annotate/merged-column-info` (makes the purpose of the function clearer)
  • Loading branch information
camsaul authored Mar 26, 2020
1 parent 10d4e04 commit bd8bead
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@
vec)
(-> result :results first keys))
metadata (result-metadata col-names)
annotate-col-names (map (comp keyword :name) (annotate/column-info* outer-query metadata))]
annotate-col-names (map (comp keyword :name) (annotate/merged-column-info outer-query metadata))]
(respond metadata (result-rows result col-names annotate-col-names))))

(defn execute-reducible-query
Expand Down
3 changes: 2 additions & 1 deletion src/metabase/driver/h2.clj
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,8 @@
;; de-CLOB any CLOB values that come back
(defmethod sql-jdbc.execute/read-column-thunk :h2
[_ ^ResultSet rs ^ResultSetMetaData rsmeta ^Integer i]
(let [classname (Class/forName (.getColumnClassName rsmeta i) true (classloader/the-classloader))]
(let [classname (some-> (.getColumnClassName rsmeta i)
(Class/forName true (classloader/the-classloader)))]
(if (isa? classname Clob)
(fn []
(jdbc-protocols/clob->str (.getObject rs i)))
Expand Down
2 changes: 1 addition & 1 deletion src/metabase/query_processor.clj
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@
(qp.store/with-store
(let [preprocessed (query->preprocessed query)]
(driver/with-driver (driver.u/database->driver (:database preprocessed))
(seq (annotate/column-info* preprocessed nil))))))
(seq (annotate/merged-column-info preprocessed nil))))))

(defn query->native
"Return the native form for `query` (e.g. for a MBQL query on Postgres this would return a map containing the compiled
Expand Down
78 changes: 48 additions & 30 deletions src/metabase/query_processor/middleware/annotate.clj
Original file line number Diff line number Diff line change
Expand Up @@ -73,30 +73,39 @@
:first-row (first rows)
:type error-type/qp}))))))

(defn- native-column-info-for-a-single-column
"Determine column metadata for a single column for native query results."
[{col-name :name, driver-base-type :base_type, :as col} values-sample]
;; Native queries don't have the type information from the original `Field` objects used in the query.
;;
;; If the driver returned a base type more specific than :type/*, use that; otherwise look at the sample
;; of rows and infer the base type based on the classes of the values
(let [col-name (name col-name)
base-type (or (when-not (= driver-base-type :type/*)
driver-base-type)
(driver.common/values->base-type values-sample)
:type/*)]
(merge
{:display_name (u/qualified-name col-name)
:base_type base-type
:source :native}
;; It is perfectly legal for a driver to return a column with a blank name; for example, SQL Server does this
;; for aggregations like `count(*)` if no alias is used. However, it is *not* legal to use blank names in MBQL
;; `:field-literal` clauses, because `SELECT ""` doesn't make any sense. So if we can't return a valid
;; `:field-literal`, omit the `:field_ref`.
(when (seq col-name)
{:field_ref [:field-literal col-name base-type]})
col
{:base_type base-type})))

(defmethod column-info :native
[_ {:keys [cols rows]}]
(check-driver-native-columns cols rows)
;; Infer the types of columns by looking at the first value for each in the results. Native queries don't have the
;; type information from the original `Field` objects used in the query.
(vec
(for [i (range (count cols))
:let [{col-name :name, :as col} (nth cols i)
col-name (name col-name)
base-type (or (:base_type col)
(driver.common/values->base-type (for [row rows]
(nth row i)))
:type/*)]]
(merge
{:display_name (u/qualified-name col-name)
:base_type base-type
:source :native}
;; It is perfectly legal for a driver to return a column with a blank name; for example, SQL Server does this
;; for aggregations like `count(*)` if no alias is used. However, it is *not* legal to use blank names in MBQL
;; `:field-literal` clauses, because `SELECT ""` doesn't make any sense. So if we can't return a valid
;; `:field-literal`, omit the `:field_ref`.
(when (seq col-name)
{:field_ref [:field-literal col-name base-type]})
col))))
(mapv native-column-info-for-a-single-column
cols
(for [i (range (count cols))]
(for [row rows]
(nth row i)))))


;;; +----------------------------------------------------------------------------------------------------------------+
Expand Down Expand Up @@ -517,19 +526,28 @@
It's the responsibility of the driver to make sure the `:cols` are returned in the correct number and order."
[cols cols-returned-by-driver]
(if (seq cols-returned-by-driver)
(map (fn [col driver-col]
(map (fn [our-col-metadata driver-col-metadata]
;; 1. Prefer our `:name` if it's something different that what's returned by the driver
;; (e.g. for named aggregations)
;; 2. Then, prefer any non-nil keys returned by the driver
;; 3. Finally, merge in any of our other keys
(merge col (m/filter-vals some? driver-col) (u/select-non-nil-keys col [:name])))
;; 2. Prefer our inferred base type if the driver returned `:type/*` and ours is more specific
;; 3. Then, prefer any non-nil keys returned by the driver
;; 4. Finally, merge in any of our other keys
(let [non-nil-driver-col-metadata (m/filter-vals some? driver-col-metadata)
our-base-type (when (= (:base_type driver-col-metadata) :type/*)
(u/select-non-nil-keys our-col-metadata [:base_type]))
our-name (u/select-non-nil-keys our-col-metadata [:name])]
(merge our-col-metadata
non-nil-driver-col-metadata
our-base-type
our-name)))
cols
cols-returned-by-driver)
cols))

(s/defn column-info* :- ColsWithUniqueNames
"Returns deduplicated `:cols` metadata given a query and the initial results metadata returned by the driver's impl
of `execute-reducible-query`."
(s/defn merged-column-info :- ColsWithUniqueNames
"Returns deduplicated and merged column metadata (`:cols`) for query results by combining (a) the initial results
metadata returned by the driver's impl of `execute-reducible-query` and (b) column metadata inferred by logic in
this namespace."
[query {cols-returned-by-driver :cols, :as result}]
;; merge in `:cols` if returned by the driver, then make sure the `:name` of each map in `:cols` is unique, since
;; the FE uses it as a key for stuff like column settings
Expand All @@ -547,7 +565,7 @@
(fn combine [result sampled-rows]
(rf (cond-> result
(map? result)
(assoc-in [:data :cols] (column-info* query (assoc metadata :rows sampled-rows))))))))
(assoc-in [:data :cols] (merged-column-info query (assoc metadata :rows sampled-rows))))))))

(defn add-column-info
"Middleware for adding type information about the columns in the query results (the `:cols` key)."
Expand All @@ -557,7 +575,7 @@
query
(fn [metadata]
(if (= query-type :query)
(rff (assoc metadata :cols (column-info* query metadata)))
(rff (assoc metadata :cols (merged-column-info query metadata)))
;; rows sampling is only needed for native queries! TODO ­ not sure we really even need to do for native
;; queries...
(add-column-info-xform query metadata (rff metadata))))
Expand Down
10 changes: 10 additions & 0 deletions test/metabase/driver/h2_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,13 @@
:field_ref [:field-literal "NAME" :type/Text]
:name "NAME"}]
(mt/cols results))))))))

(deftest native-query-date-trunc-test
(mt/test-driver :h2
(testing "A native query that doesn't return a column class name metadata should work correctly (#12150)"
(is (= [{:display_name "D"
:base_type :type/DateTime
:source :native
:field_ref [:field-literal "D" :type/DateTime]
:name "D"}]
(mt/cols (qp/process-query (mt/native-query {:query "SELECT date_trunc('day', DATE) AS D FROM CHECKINS LIMIT 5;"}))))))))
39 changes: 23 additions & 16 deletions test/metabase/query_processor/middleware/annotate_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,29 @@
;;; +----------------------------------------------------------------------------------------------------------------+

(deftest native-column-info-test
(testing (str "make sure that `column-info` for `:native` queries can still infer types even if the initial value(s) "
"are `nil` (#4256)")
(is (= [{:name "a", :display_name "a", :base_type :type/Integer, :source :native, :field_ref [:field-literal "a" :type/Integer]}
{:name "b", :display_name "b", :base_type :type/Integer, :source :native, :field_ref [:field-literal "b" :type/Integer]}]
(annotate/column-info
{:type :native}
{:cols [{:name "a"} {:name "b"}]
:rows [[1 nil] [2 nil] [3 nil] [4 5] [6 7]]}))))

(testing (str "make sure that `column-info` for `:native` queries defaults `base_type` to `type/*` if there are no "
"non-nil values when we peek.")
(is (= [{:name "a", :display_name "a", :base_type :type/*, :source :native, :field_ref [:field-literal "a" :type/*]}]
(annotate/column-info
{:type :native}
{:cols [{:name "a"}]
:rows [[nil]]})))))
(testing "native column info"
(testing "should still infer types even if the initial value(s) are `nil` (#4256)"
(is (= [{:name "a", :display_name "a", :base_type :type/Integer, :source :native, :field_ref [:field-literal "a" :type/Integer]}
{:name "b", :display_name "b", :base_type :type/Integer, :source :native, :field_ref [:field-literal "b" :type/Integer]}]
(annotate/column-info
{:type :native}
{:cols [{:name "a"} {:name "b"}]
:rows [[1 nil] [2 nil] [3 nil] [4 5] [6 7]]}))))

(testing "should use default `base_type` of `type/*` if there are no non-nil values in the sample"
(is (= [{:name "a", :display_name "a", :base_type :type/*, :source :native, :field_ref [:field-literal "a" :type/*]}]
(annotate/column-info
{:type :native}
{:cols [{:name "a"}]
:rows [[nil]]}))))

(testing "should attempt to infer better base type if driver returns :type/* (#12150)"
;; `merged-column-info` handles merging info returned by driver & inferred by annotate
(is (= [{:name "a", :display_name "a", :base_type :type/Integer, :source :native, :field_ref [:field-literal "a" :type/Integer]}]
(annotate/merged-column-info
{:type :native}
{:cols [{:name "a", :base_type :type/*}]
:rows [[1] [2] [nil] [3]]}))))))


;;; +----------------------------------------------------------------------------------------------------------------+
Expand Down

0 comments on commit bd8bead

Please sign in to comment.