Skip to content

Commit

Permalink
Merge pull request metabase#8397 from metabase/store-sync-metrics
Browse files Browse the repository at this point in the history
Add TaskHistory for storage of sync metrics
  • Loading branch information
senior authored Aug 30, 2018
2 parents 82c591b + 921d375 commit 461992e
Show file tree
Hide file tree
Showing 15 changed files with 509 additions and 124 deletions.
61 changes: 61 additions & 0 deletions resources/migrations/000_migrations.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4988,3 +4988,64 @@ databaseChangeLog:
- dropColumn:
tableName: metabase_field
columnName: raw_column_id

# Create the TaskHistory table, intended to provide debugging info on our background/quartz processes
- changeSet:
id: 94
author: senior
comment: 'Added 0.31.0'
changes:
- createTable:
tableName: task_history
remarks: 'Timing and metadata info about background/quartz processes'
columns:
- column:
name: id
type: int
autoIncrement: true
constraints:
primaryKey: true
nullable: false
- column:
name: task
type: VARCHAR(254)
remarks: 'Name of the task'
constraints:
nullable: false
# The sync tasks all have a db_id, but there are others that won't, such as the pulses
# task or task history cleanup. The way around this is to create a join table between
# TASK_HISTORY and METABASE_DATABASE, but that doesn't seem worth it right now.
- column:
name: db_id
type: integer
- column:
name: started_at
type: datetime
constraints:
nullable: false
- column:
name: ended_at
type: datetime
constraints:
nullable: false
- column:
name: duration
type: int
constraints:
nullable: false
- column:
name: task_details
remarks: 'JSON string with additional info on the task'
type: text
- createIndex:
indexName: idx_task_history_end_time
tableName: task_history
columns:
- column:
name: ended_at
- createIndex:
indexName: idx_task_history_db_id
tableName: task_history
columns:
- column:
name: db_id
2 changes: 2 additions & 0 deletions src/metabase/cmd/load_from_h2.clj
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
[session :refer [Session]]
[setting :refer [Setting]]
[table :refer [Table]]
[task-history :refer [TaskHistory]]
[user :refer [User]]
[view-log :refer [ViewLog]]]))

Expand Down Expand Up @@ -93,6 +94,7 @@
CollectionRevision
DashboardFavorite
Dimension
TaskHistory
;; migrate the list of finished DataMigrations as the very last thing (all models to copy over should be listed
;; above this line)
DataMigrations])
Expand Down
31 changes: 31 additions & 0 deletions src/metabase/models/task_history.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
(ns metabase.models.task-history
(:require [metabase.models.interface :as i]
[metabase.util :as u]
[toucan
[db :as db]
[models :as models]]))

(models/defmodel TaskHistory :task_history)

(defn cleanup-task-history!
"Deletes older TaskHistory rows. Will order TaskHistory by `ended_at` and delete everything after
`num-rows-to-keep`. This is intended for a quick cleanup of old rows."
[num-rows-to-keep]
;; Ideally this would be one query, but MySQL does not allow nested queries with a limit. The query below orders the
;; tasks by the time they finished, newest first. Then finds the first row after skipping `num-rows-to-keep`. Using
;; the date that task finished, it deletes everything after that. As we continue to add TaskHistory entries, this
;; ensures we'll have a good amount of history for debugging/troubleshooting, but not grow too large and fill the
;; disk.
(when-let [clean-before-date (db/select-one-field :ended_at TaskHistory {:limit 1
:offset num-rows-to-keep
:order-by [[:ended_at :desc]]})]
(db/simple-delete! TaskHistory :ended_at [:<= clean-before-date])))

(u/strict-extend (class TaskHistory)
models/IModel
(merge models/IModelDefaults
{:types (constantly {:task_details :json})})
i/IObjectPermissions
(merge i/IObjectPermissionsDefaults
{:can-read? i/superuser?
:can-write? i/superuser?}))
81 changes: 57 additions & 24 deletions src/metabase/sync/util.clj
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,20 @@
[driver :as driver]
[events :as events]
[util :as u]]
[metabase.models.table :refer [Table]]
[metabase.models
[table :refer [Table]]
[task-history :refer [TaskHistory]]]
[metabase.query-processor.interface :as qpi]
[metabase.sync.interface :as i]
[metabase.util.date :as du]
[metabase.util
[date :as du]
[schema :as su]]
[puppetlabs.i18n.core :refer [trs]]
[ring.util.codec :as codec]
[schema.core :as s]
[taoensso.nippy :as nippy]
[toucan.db :as db]))
[toucan.db :as db])
(:import org.joda.time.DateTime))

;;; +----------------------------------------------------------------------------------------------------------------+
;;; | SYNC OPERATION "MIDDLEWARE" |
Expand Down Expand Up @@ -281,11 +286,11 @@
;; Convert the hash bytes to a string for storage/comparison with the hash in the database
codec/base64-encode))

(s/defn calculate-duration :- s/Str
(s/defn calculate-duration-str :- s/Str
"Given two datetimes, caculate the time between them, return the result as a string"
[begin-time :- (s/protocol tcoerce/ICoerce)
end-time :- (s/protocol tcoerce/ICoerce)]
(-> (- (tcoerce/to-long end-time) (tcoerce/to-long begin-time))
(-> (du/calculate-duration begin-time end-time)
;; Millis -> Nanos
(* 1000000)
du/format-nanoseconds))
Expand All @@ -297,9 +302,8 @@

(def ^:private TimedSyncMetadata
"Metadata common to both sync steps and an entire sync/analyze operation run"
{:start-time s/Str
:end-time s/Str
:duration s/Str})
{:start-time DateTime
:end-time DateTime})

(def StepRunMetadata
"Map with metadata about the step. Contains both generic information like `start-time` and `end-time` and step
Expand Down Expand Up @@ -350,16 +354,15 @@
#(sync-fn database))
end-time (time/now)]
[step-name (assoc results
:start-time (datetime->str start-time)
:end-time (datetime->str end-time)
:duration (calculate-duration start-time end-time)
:start-time start-time
:end-time end-time
:log-summary-fn log-summary-fn)]))

(s/defn ^:private log-sync-summary
"Log a sync/analyze summary message with info from each step"
[operation :- s/Str
database :- i/DatabaseInstance
{:keys [start-time end-time duration steps log-summary-fn]} :- SyncOperationMetadata]
{:keys [start-time end-time steps log-summary-fn]} :- SyncOperationMetadata]
;; Note this needs to either stay nested in the `debug` macro call or be guarded by an log/enabled?
;; call. Constructing the log below requires some work, no need to incur that cost debug logging isn't enabled
(log/debug
Expand All @@ -371,10 +374,10 @@
"# %s\n"
"# %s\n")
(trs "Completed {0} on {1}" operation (:name database))
(trs "Start: {0}" start-time)
(trs "End: {0}" end-time)
(trs "Duration: {0}" duration))
(apply str (for [[step-name {:keys [start-time end-time duration log-summary-fn] :as step-info}] steps]
(trs "Start: {0}" (datetime->str start-time))
(trs "End: {0}" (datetime->str end-time))
(trs "Duration: {0}" (calculate-duration-str start-time end-time)))
(apply str (for [[step-name {:keys [start-time end-time log-summary-fn] :as step-info}] steps]
(format (str "# ---------------------------------------------------------------\n"
"# %s\n"
"# %s\n"
Expand All @@ -383,23 +386,53 @@
(when log-summary-fn
(format "# %s\n" (log-summary-fn step-info))))
(trs "Completed step ''{0}''" step-name)
(trs "Start: {0}" start-time)
(trs "End: {0}" end-time)
(trs "Duration: {0}" duration))))
(trs "Start: {0}" (datetime->str start-time))
(trs "End: {0}" (datetime->str end-time))
(trs "Duration: {0}" (calculate-duration-str start-time end-time)))))
"#################################################################\n")))

(def ^:private SyncOperationOrStepRunMetadata
(s/conditional
#(contains? % :steps)
SyncOperationMetadata
:else
StepRunMetadata))

(s/defn ^:private create-task-history
[task-name :- su/NonBlankString
database :- i/DatabaseInstance
{:keys [start-time end-time]} :- SyncOperationOrStepRunMetadata]
{:task task-name
:db_id (u/get-id database)
:started_at (du/->Timestamp start-time)
:ended_at (du/->Timestamp end-time)
:duration (du/calculate-duration start-time end-time)})

(s/defn ^:private store-sync-summary!
[operation :- s/Str
database :- i/DatabaseInstance
{:keys [steps] :as sync-md} :- SyncOperationMetadata]
(db/insert-many! TaskHistory
(cons (create-task-history operation database sync-md)
(for [[step-name step-info] steps
:let [task-details (dissoc step-info :start-time :end-time :log-summary-fn)]]
(assoc (create-task-history step-name database step-info)
:task_details (when (seq task-details)
task-details))))))

(s/defn run-sync-operation
"Run `sync-steps` and log a summary message"
[operation :- s/Str
database :- i/DatabaseInstance
sync-steps :- [StepDefinition]]
(let [start-time (time/now)
step-metadata (mapv #(run-step-with-metadata database %) sync-steps)
end-time (time/now)]
(log-sync-summary operation database {:start-time (datetime->str start-time)
:end-time (datetime->str end-time)
:duration (calculate-duration start-time end-time)
:steps step-metadata})))
end-time (time/now)
sync-metadata {:start-time start-time
:end-time end-time
:steps step-metadata}]
(store-sync-summary! operation database sync-metadata)
(log-sync-summary operation database sync-metadata)))

(defn sum-numbers
"Similar to a 2-arg call to `map`, but will add all numbers that result from the invocations of `f`"
Expand Down
58 changes: 58 additions & 0 deletions src/metabase/task/task_history_cleanup.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
(ns metabase.task.task-history-cleanup
(:require [clj-time.core :as time]
[clojure.tools.logging :as log]
[clojurewerkz.quartzite
[jobs :as jobs]
[triggers :as triggers]]
[clojurewerkz.quartzite.schedule.cron :as cron]
[metabase.models.task-history :as thist :refer [TaskHistory]]
[metabase.task :as task]
[metabase.util.date :as du]
[puppetlabs.i18n.core :refer [trs]]
[toucan.db :as db]))

(def ^:private ^:const job-name "task-history-cleanup")
(def ^:private ^:const job-key (format "metabase.task.%s.job" job-name))
(def ^:private ^:const trigger-key (format "metabase.task.%s.trigger" job-name))

(defonce ^:private job (atom nil))
(defonce ^:private trigger (atom nil))

(def ^:private history-rows-to-keep
"Maximum number of TaskHistory rows. This is not a `const` so that we can redef it in tests"
100000)

(defn- task-history-cleanup!
[]
(log/debug "Cleaning up task history")
(let [before-cleanup (time/now)
result (thist/cleanup-task-history! history-rows-to-keep)
after-cleanup (time/now)]
(db/insert! TaskHistory {:task job-name
:started_at (du/->Timestamp before-cleanup)
:ended_at (du/->Timestamp after-cleanup)
:duration (du/calculate-duration before-cleanup after-cleanup)})
(log/debug (trs "Task history cleanup successful, rows were {0}deleted"
(when-not result (str (trs "not")
" "))))))

(jobs/defjob TaskHistoryCleanup
[_]
(task-history-cleanup!))

(defn task-init
"Job initialization"
[]
;; build our job
(reset! job (jobs/build
(jobs/of-type TaskHistoryCleanup)
(jobs/with-identity (jobs/key job-key))))
;; build our trigger
(reset! trigger (triggers/build
(triggers/with-identity (triggers/key trigger-key))
(triggers/start-now)
(triggers/with-schedule
;; run every day at midnight
(cron/cron-schedule "0 0 * * * ? *"))))
;; submit ourselves to the scheduler
(task/schedule-task! @job @trigger))
10 changes: 9 additions & 1 deletion src/metabase/util/date.clj
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
[clojure.math.numeric-tower :as math]
[clojure.tools.logging :as log]
[metabase.util :as u]
[puppetlabs.i18n.core :refer [trs]])
[metabase.util.schema :as su]
[puppetlabs.i18n.core :refer [trs]]
[schema.core :as s])
(:import clojure.lang.Keyword
[java.sql Time Timestamp]
[java.util Calendar Date TimeZone]
Expand Down Expand Up @@ -456,3 +458,9 @@
(some-> (str->date-time-with-formatters ordered-time-parsers date-str tz)
coerce/to-long
Time.)))

(s/defn calculate-duration :- su/NonNegativeInt
"Given two datetimes, caculate the time between them, return the result in millis"
[begin-time :- (s/protocol coerce/ICoerce)
end-time :- (s/protocol coerce/ICoerce)]
(- (coerce/to-long end-time) (coerce/to-long begin-time)))
6 changes: 6 additions & 0 deletions src/metabase/util/schema.clj
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@
(s/constrained s/Int (partial < 0) (tru "Integer greater than zero"))
(tru "value must be an integer greater than zero.")))

(def NonNegativeInt
"Schema representing an integer 0 or greater"
(with-api-error-message
(s/constrained s/Int (partial <= 0) (tru "Integer greater than or equal to zero"))
(tru "value must be an integer zero or greater.")))

(def PositiveNum
"Schema representing a numeric value greater than zero. This allows floating point numbers and integers."
(with-api-error-message
Expand Down
Loading

0 comments on commit 461992e

Please sign in to comment.