Skip to content

Commit

Permalink
Implement user-defined casting rules support for DB3.
Browse files Browse the repository at this point in the history
The casting support for DB3 was hand-crafted and didn't get upgraded to
using the current CAST grammar and facilities, for no other reasons than
lack of time and interest. It so happens what implementing it now fixes two
bug reports.

Bug dimitri#938 is about conversion defaulting to "not null" column, and that's due
to the usage of the internal pgloader catalogs where the target column's
nullable field is NIL by default, which doesn't make much sense. With
support for user-defined casting rules, the default is nullable columns, so
that's kind of a free fix.

Fixes dimitri#927.
Fixes dimitri#938.
  • Loading branch information
dimitri committed Apr 14, 2019
1 parent efe70ba commit 39fc78e
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 59 deletions.
1 change: 1 addition & 0 deletions pgloader.asd
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@
:depends-on ("common" "csv")
:components
((:file "db3-schema")
(:file "db3-cast-rules")
(:file "db3" :depends-on ("db3-schema"))))

(:module "ixf"
Expand Down
4 changes: 3 additions & 1 deletion src/package.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,7 @@
#:create-tables
#:format-vector-row)
(:export #:dbf-connection
#:*db3-default-cast-rules*
#:copy-db3
#:map-rows
#:copy-to
Expand Down Expand Up @@ -850,7 +851,8 @@
#:*sqlite-default-cast-rules*)
(:import-from #:pgloader.source.db3
#:copy-db3
#:dbf-connection)
#:dbf-connection
#:*db3-default-cast-rules*)
(:import-from #:pgloader.source.ixf
#:copy-ixf
#:ixf-connection)
Expand Down
10 changes: 7 additions & 3 deletions src/parsers/command-dbf.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@

(defrule load-dbf-optional-clauses (* (or dbf-options
gucs
casts
before-load
after-load))
(:lambda (clauses-list)
Expand Down Expand Up @@ -93,10 +94,12 @@
&key
target-table-name
(encoding :ascii)
gucs before after options
gucs casts before after options
&allow-other-keys)
`(lambda ()
(let* (,@(pgsql-connection-bindings pg-db-conn gucs)
(let* ((*default-cast-rules* ',*db3-default-cast-rules*)
(*cast-rules* ',casts)
,@(pgsql-connection-bindings pg-db-conn gucs)
,@(batch-control-bindings options)
,@(identifier-case-binding options)
(on-error-stop (getf ',options :on-error-stop))
Expand Down Expand Up @@ -124,14 +127,15 @@
(defrule load-dbf-file load-dbf-command
(:lambda (command)
(bind (((source encoding pg-db-uri table-name
&key options gucs before after) command))
&key options gucs casts before after) command))
(cond (*dry-run*
(lisp-code-for-dbf-dry-run source pg-db-uri))
(t
(lisp-code-for-loading-from-dbf source pg-db-uri
:target-table-name table-name
:encoding encoding
:gucs gucs
:casts casts
:before before
:after after
:options options))))))
72 changes: 72 additions & 0 deletions src/sources/db3/db3-cast-rules.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
;;;
;;; Tools to handle MySQL data type casting rules
;;;

(in-package :pgloader.source.db3)

;;;
;;; The default DB3 Type Casting Rules
;;;
(defparameter *db3-default-cast-rules*
`((:source (:type "C")
:target (:type "text")
:using db3-trim-string)

(:source (:type "N")
:target (:type "numeric")
:using db3-numeric-to-pgsql-numeric)

(:source (:type "L")
:target (:type "boolean")
:using logical-to-boolean)

(:source (:type "D")
:target (:type "date")
:using db3-date-to-pgsql-date)

(:source (:type "M")
:target (:type "text")
:using db3-trim-string))
"Data Type Casting rules to migrate from DB3 to PostgreSQL")

(defstruct (db3-field
(:constructor make-db3-field (name type length)))
name type length default (nullable t) extra)

(defmethod cast ((field db3-field) &key table)
"Return the PostgreSQL type definition given the DB3 one."
(let ((table-name (table-name table)))
(with-slots (name type length default nullable extra) field
(apply-casting-rules table-name name type type default nullable extra))))

;;;
;;; Transformation functions
;;;
(declaim (inline logical-to-boolean
db3-trim-string
db3-numeric-to-pgsql-numeric
db3-date-to-pgsql-date))

(defun logical-to-boolean (value)
"Convert a DB3 logical value to a PostgreSQL boolean."
(if (string= value "?") nil value))

(defun db3-trim-string (value)
"DB3 Strings a right padded with spaces, fix that."
(string-right-trim '(#\Space) value))

(defun db3-numeric-to-pgsql-numeric (value)
"DB3 numerics should be good to go, but might contain spaces."
(let ((trimmed-string (string-right-trim '(#\Space) value)))
(unless (string= "" trimmed-string)
trimmed-string)))

(defun db3-date-to-pgsql-date (value)
"Convert a DB3 date to a PostgreSQL date."
(when (and value (string/= "" value) (= 8 (length value)))
(let ((year (parse-integer (subseq value 0 4) :junk-allowed t))
(month (parse-integer (subseq value 4 6) :junk-allowed t))
(day (parse-integer (subseq value 6 8) :junk-allowed t)))
(when (and year month day)
(format nil "~4,'0d-~2,'0d-~2,'0d" year month day)))))

55 changes: 1 addition & 54 deletions src/sources/db3/db3-schema.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -33,63 +33,10 @@
(setf (fd-db3 clone) (fd-db3 c))
clone))

(defvar *db3-pgsql-type-mapping*
'(("C" . "text") ; ignore field-length
("N" . "numeric") ; handle both integers and floats
("L" . "boolean") ; PostgreSQL compatible representation
("D" . "date") ; no TimeZone in DB3 files
("M" . "text"))) ; not handled yet

(defstruct (db3-field
(:constructor make-db3-field (name type length)))
name type length)

(defun list-all-columns (db3 table)
"Return the list of columns for the given DB3-FILE-NAME."
(loop
:for field :in (db3::fields db3)
:do (add-field table (make-db3-field (db3::field-name field)
(db3::field-type field)
(string (db3::field-type field))
(db3::field-length field)))))

(defmethod cast ((field db3-field) &key &allow-other-keys)
"Return the PostgreSQL type definition given the DB3 one."
(let* ((type (db3-field-type field))
(transform
(cond ((string= type "C") #'db3-trim-string)
((string= type "N") #'db3-numeric-to-pgsql-numeric)
((string= type "L") #'logical-to-boolean)
((string= type "D") #'db3-date-to-pgsql-date)
(t nil))))
(make-column :name (apply-identifier-case (db3-field-name field))
:type-name (cdr (assoc type *db3-pgsql-type-mapping*
:test #'string=))
:transform transform)))

(declaim (inline logical-to-boolean
db3-trim-string
db3-date-to-pgsql-date))

(defun logical-to-boolean (value)
"Convert a DB3 logical value to a PostgreSQL boolean."
(if (string= value "?") nil value))

(defun db3-trim-string (value)
"DB3 Strings a right padded with spaces, fix that."
(string-right-trim '(#\Space) value))

(defun db3-numeric-to-pgsql-numeric (value)
"DB3 numerics should be good to go, but might contain spaces."
(let ((trimmed-string (string-right-trim '(#\Space) value)))
(unless (string= "" trimmed-string)
trimmed-string)))

(defun db3-date-to-pgsql-date (value)
"Convert a DB3 date to a PostgreSQL date."
(when (and value (string/= "" value) (= 8 (length value)))
(let ((year (parse-integer (subseq value 0 4) :junk-allowed t))
(month (parse-integer (subseq value 4 6) :junk-allowed t))
(day (parse-integer (subseq value 6 8) :junk-allowed t)))
(when (and year month day)
(format nil "~4,'0d-~2,'0d-~2,'0d" year month day)))))

5 changes: 4 additions & 1 deletion test/dbf.load
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@
LOAD DBF
FROM data/reg2013.dbf with encoding cp850
INTO postgresql:///pgloader?public.reg2013
WITH truncate, create table, disable triggers;
WITH truncate, create table, disable triggers

CAST column reg2013.region to integer,
column reg2013.tncc to smallint;

0 comments on commit 39fc78e

Please sign in to comment.