Support for R (mlflow#370)

* mlflow_log_artifact() support for wasb using azure cli * support relative artifacts paths across storage services * consolidate remote artifact helpers in single file * expand file path if needed when running mlflow_run() * add rstudio section to readme * basic model serialization using serving function formals as context * update rstudio integration screenshot * mlflow_source() should end run even if not explicitly instructed * fix installation instruction * check port available when choosing port * fix mlflow_ui() when using local service * fix unsused param in mlflow_log_artifact() * add pointer to advanced r style guidelines * update tests with model and mlflow_source() changes * import _get_model_log_dir from mlflow.tracking.utils * rebuild docs * allow unquoted uri in `mlflow_source()` * implement type checking in `mlflow_param()` * rename experiment.R and experiment-rest.R to tracking.R and tracking-rest.R * remove `mlflow_experiment()` * add `mlflow_set_active_experiment()` and add `activate` parameter to `mlflow_create_experiment()` * rebuild docs * export `mlflow_set_active_run()` and have it return `experiment_id` invisibly * fix r cmd check file warnings * add mlflow_ui() test * add openssl to r description * attempt to run r and python mlflow tests in travis fix travis * explicitly list r packages since description file is not top level * use standard r installation script in travis * use bash while calling travis script helpers * switch to r directory for package dependencies to install * fix travis bash calls * fix travis bash script typos * install mlflow for r tests and latest sources * explicit return of url when launching mlflow_ui() * activate test environment for python tests * mlflow_load_model() does not need to be public * source commands to avoid reactivating virtualenv * fix to source command * add bash directives to install-travis * add exec permissions to travis bash scripts * pick recent changes made to .travis.yml * fix artifact related r cmd check warnings * fix mlflow_connect() handle r cmd check warning * fix mlflow_predict() related r cmd check warnings * rebuild docs * fix additional r cmd check warnings in python and connection * additional r cmd check fixes for missing doc params * activate virtualenv for python travis tests * additional r cmd check fixes * add forge to description file * support for re-activating conda environment in travis test * merge recent changes to travis scripts * fix mlflow_active_run() signature to be consistent * rename mlflow_restore() to mlflow_restore_snapshot() * add warning when snapshot restore not set to true * update readme * use mlflow_home in tests if set * print working directory in travis r tests * set mlflow_home before running r travis script * fix mlflow_home under travis r script * rename mlflow_predict() to mlflow_rfunc_predict() * rename mlflow_serve() to mlflow_rfunc_serve() * code coverage with codecov * fix travis merge conflict * rename train_save example * add params test case * update readme with params section * validate params dir in params test * minor fix to mlflow_cli() doc * fix r cmd check --as-cran warning * run test script to display all error messages * add mlflow_serve() tests * remove appveyor from r readme * add encoding to rfunc init file * disable code coverage for user interface and subprocesses not tracked * fix rfunc python formatting * fix minor styling issues * add lintr file to r project * add code styling test * update readme with lintr::lint_package() pointer * fix typo in namespace introduced by style cleanup * remove api notes * support to generate rst diocs from rd files * clean up r doc headers * ignore a couple r doc support files * remove reference manual intermediate file * ignore reference manual intermediate file * remove external objects section * add intro for r api docs * build rst docs from r docs using document.r * add docs entry to r api * turn off lintr commenting * import examplecode extension from sphinxcontrib project * patch examplecode to handle notranslate class updates * add r code block to quick start * couple more quick start entries for R * only toggle one element at a time * update description file * only one creator allowed in description * remove unused import sys * fix string assertion under python 3 * remove character assertion to avoid branching into python 2.7 and 3 * rebuild docs * add test for `mlflow_create_experiment()` * add `artifact_location` to `mlflow_create_experiment()` * test for mlflow_create_experiment() `active` parameter * tweak message for existing experiment in `mlflow_create_experiment()` and add test * mlflow_disconnect() shouldn't take argument, and removes active connection * mlflow_clear_test_dir() should also clear connection * add test for mlflow_list_experiments() * add test for mlflow_get_experiment() * fix `tags` parameter in mlflow_create_run() * update mlflow_create_run() and add test * add test for tags in mlflow_create_run() * rebuild docs * fix typo and dead comment * running mlflow_install() requires installing R package * print diagnostics for test_make_tarfile test * fix compare logic in test_make_tarfile() diagnostics * fix typo in travis troubleshooting change * use expect_setequal() instead of expect_identical for mlflow_create_experiment() test * ensure same copy using shutil.copytree while creating test tars * attempt to revert travis changes to traoubleshoot build * revert test_make_tarfile() travis investigation changes * attempt to build r package without changing mlflow script * fix before_install for r travis script * remove refactored travis scripts * improve diagnostics for mlflow_ui() test * fix connection wait_for() issue * disable failing test assertion * skip mlflow_ui() test in travis while troubleshooting * rebuild docs * reexport crate() * refactor model saving/serving to assume crate * fix model saving test * attempt to fix model serving test * fix serving test * fix DESCRIPTION * missing comma in remotes * add mlflow_get_param() * disconnecting should reset globals * avoid reinstalling packages and support for uninstall * rebuild docs * mlflow_get_param() and mlflow_get_metric() should return data frames * only istall once mlflow while testing * no linting during code coverage * add support for plain sections in documents * install section for r tutorial * split training section per language * add train.R to mirror train.py tutorial * add train.R to tutorial doc * abort saving and loading models if they are not created using crate() * use crate() in r tutorial * fail early while logging metrics to avoid internal server error in rest api * add r notebook into tutorial folder * basic implementation for mlflow_log_artifact() * increase default server startup wait time * log r2 as numeric since matrix is unsupported * allow mlflow_run() to use the current directory * improve logging with mlflow_verbose helpers * complete model training tutorial section * fix mlflow_is_verbose() blocking tests * use correct syntax in train.R example * fix `no_conda` parameter in mlflow_run() * style * use inherits() for testing object class * default mlflow_run() uri to "." * don't persist model locally in mlflow_log_model() * fix mlflow_start_run() syntax in examples * run train.R from R but avoid using conda deps in same folder * don't create run in mlflow_get_metric_history() and add test * turn off standardization to match existing tutorial example * add compare models section * move r tutorial files to each own folder to customize mlproject file * packaging the code section for r tutorial * reuse type checks from prediciton when serving * fix * match more closely mlflow predictions response signature * make crated prediction function from tutorial servable * add serving model section to r tutorial * adjust serve r model test to use the new signature * start refactoring mlflow_update_run() * simplify glmnet predictor function in tutorial * refactor run_info data frame conversion with tidy_run_info() * rename `model_dir` to `model_path` to be consistent with Python * rename `mlflow_ensure_run()` to `mlfloe_ensure_run_id()` and refactor * add `run_id` parameter to rfunc predict/serve, also rename params to be consistent with python * fix predict test * mlflow_log_artifact to grab an active connection * consistently use run_uuid vs run_id * overwrite when logging local artifact * attempt to write run_id in MLmodel yaml * test mlflow_log_model() * update parameters for mlflow_log_model() and mlflow_log_artifact() * use explicit entry_point in tutorial docs * switch entire language section in docs when toggling * fix js styling in docs language section script * fix mlflow can save model function test * fix tracking into remote server from recent connection changes * use active experiement in tracking when available * also reset the run when changing remote tracking servers * move also r wine tutorial to examples * upload and save artifacts based on cli to avoid reimplenting uploads in R * support remote tracking servers and artifact upload * fix lintr issue * extensibility support for mlflow_save_model() * support to extend mlflow_load_model() * support for extending model flavor predicitons and refactor into flavor.R * improve serve logging and test diagnostics * support loading models even when mlflow not imported * rebuild r and python docs * add support for including custom dependencies in model * fix model flavor structure * fix parameter typo * retrieve user name when using rest api * return run in mlflow_run() * rebuild r and python docs * remove redundant flavor check * add model timestamp in utc * fix expected tags test from new mlflow.runName added in mlflow * fix parameter name in call inside mlflow_connect()
Subramanya3585 · Sep 11, 2018 · 29f5e6e · 29f5e6e
1 parent a9752c6
commit 29f5e6e
Show file tree

Hide file tree

Showing 119 changed files with 11,404 additions and 163 deletions.
diff --git a/.gitignore b/.gitignore
@@ -88,3 +88,7 @@ venv.bak/
 
 # java targets
 target/
+
+# R notebooks
+.Rproj.user
+example/tutorial/R/*.nb.html
diff --git a/.travis.yml b/.travis.yml
@@ -8,6 +8,27 @@ matrix:
   include:
     - python: 2.7
     - python: 3.6
+    - language: r
+      dist: trusty
+      cache: packages
+      before_install:
+        - export NOT_CRAN=true
+        - export MLFLOW_TEST_REQUIREMENTS=false
+        - cd R/mlflow
+        - Rscript -e 'install.packages("devtools")'
+        - Rscript -e 'devtools::install_deps(dependencies = TRUE)'
+        - cd ../..
+      script:
+        - cd R/mlflow
+        - R CMD build .
+        - R CMD check --no-build-vignettes --no-manual --no-tests mlflow*tar.gz
+        - cd tests
+        - export LINTR_COMMENT_BOT=false
+        - Rscript ../.travis.R
+      after_success:
+        - export COVR_RUNNING=true
+        - Rscript -e 'covr::codecov()'
+
 install:
   - sudo mkdir -p /travis-install
   - sudo chown travis /travis-install
@@ -31,7 +52,9 @@ install:
   - python --version
   - pip install --upgrade pip
   - pip install -r dev-requirements.txt -q
-  - pip install -r test-requirements.txt -q
+  - if [[ "$MLFLOW_TEST_REQUIREMENTS" != "false" ]]; then
+      pip install -r test-requirements.txt -q;
+    fi
   - pip install .
   - export MLFLOW_HOME=$(pwd)
   # Remove boto config present in Travis VMs (https://github.com/travis-ci/travis-ci/issues/7940)

diff --git a/R/mlflow/.Rbuildignore b/R/mlflow/.Rbuildignore
@@ -0,0 +1,13 @@
+^.*\.Rproj$
+^\.Rproj\.user$
+mlruns
+^mlflow-model$
+packrat
+^\.travis\.R$
+^README.Rmd$
+^docs$
+^model$
+^r-dependencies.txt$
+document.R
+Reference_Manual_mlflow.md
+.lintr
diff --git a/R/mlflow/.gitignore b/R/mlflow/.gitignore
@@ -0,0 +1,18 @@
+# History files
+.Rhistory
+.Rapp.history
+# Session Data files
+.RData
+# User Data files
+.Ruserdata
+# Example code in package build process
+*-Ex.R
+# RStudio files
+.Rproj.user
+mlruns/
+.DS_Store
+model/
+internal
+packrat.lock
+r-dependencies.txt
+Reference_Manual_mlflow.md
diff --git a/R/mlflow/.lintr b/R/mlflow/.lintr
@@ -0,0 +1 @@
+linters: with_defaults(line_length_linter(120), closed_curly_linter = NULL, open_curly_linter = NULL, absolute_paths_linter = NULL)
diff --git a/R/mlflow/.travis.R b/R/mlflow/.travis.R
@@ -0,0 +1,5 @@
+parent_dir <- dir("../", full.names = TRUE)
+package <- parent_dir[grepl("mlflow_", parent_dir)]
+install.packages(package, repos = NULL, type = "source")
+
+source("testthat.R")
diff --git a/R/mlflow/DESCRIPTION b/R/mlflow/DESCRIPTION
@@ -0,0 +1,47 @@
+Package: mlflow
+Type: Package
+Title: Interface to MLflow
+Version: 0.1.0
+Authors@R: c(
+    person("Javier", "Luraschi", email = "[email protected]", role = c("aut", "cre")),
+    person("Kevin", "Kuo", role = c("aut"), email = "[email protected]",
+           comment = c(ORCID = "0000-0001-7803-7901")),
+    person(family = "RStudio", role = c("cph"))
+    )
+Maintainer: The package maintainer <[email protected]>
+Description: R interface to MLflow, open source platform for the complete machine
+    learning lifecycle, see <https://mlflow.org/>. This package supports installing
+    MLflow, tracking experiments, creating and running projects, saving and serving
+    models.
+License: Apache License 2.0
+Encoding: UTF-8
+LazyData: true
+Depends:
+  R (>= 3.1.2)
+Imports:
+    aws.s3,
+    forge,
+    fs,
+    git2r,
+    httpuv,
+    httr,
+    jsonlite,
+    openssl,
+    processx,
+    reticulate,
+    rlang,
+    packrat,
+    purrr,
+    swagger,
+    withr,
+    xml2,
+    yaml,
+    carrier (>= 0.0.0.9000)
+RoxygenNote: 6.1.0
+Suggests: 
+  covr,
+  lintr,
+  testthat
+Remotes: 
+  rstudio/forge,
+  r-lib/carrier
diff --git a/R/mlflow/NAMESPACE b/R/mlflow/NAMESPACE
@@ -0,0 +1,77 @@
+# Generated by roxygen2: do not edit by hand
+
+S3method(mlflow_load_flavor,crate)
+S3method(mlflow_predict_flavor,crate)
+S3method(mlflow_save_flavor,crate)
+S3method(mlflow_ui,"NULL")
+S3method(mlflow_ui,character)
+S3method(mlflow_ui,mlflow_connection)
+S3method(with,mlflow_active_run)
+export(crate)
+export(mlflow_active_experiment)
+export(mlflow_active_run)
+export(mlflow_cli)
+export(mlflow_connect)
+export(mlflow_create_experiment)
+export(mlflow_create_run)
+export(mlflow_disconnect)
+export(mlflow_end_run)
+export(mlflow_get_experiment)
+export(mlflow_get_metric)
+export(mlflow_get_metric_history)
+export(mlflow_get_param)
+export(mlflow_get_run)
+export(mlflow_install)
+export(mlflow_list_experiments)
+export(mlflow_load_flavor)
+export(mlflow_log_artifact)
+export(mlflow_log_metric)
+export(mlflow_log_model)
+export(mlflow_log_param)
+export(mlflow_param)
+export(mlflow_predict_flavor)
+export(mlflow_restore_snapshot)
+export(mlflow_rfunc_predict)
+export(mlflow_rfunc_serve)
+export(mlflow_run)
+export(mlflow_save_flavor)
+export(mlflow_save_model)
+export(mlflow_server)
+export(mlflow_set_active_experiment)
+export(mlflow_set_active_run)
+export(mlflow_set_tracking_uri)
+export(mlflow_snapshot)
+export(mlflow_source)
+export(mlflow_start_run)
+export(mlflow_tracking_uri)
+export(mlflow_ui)
+export(mlflow_uninstall)
+export(mlflow_update_run)
+import(forge)
+import(jsonlite)
+import(swagger)
+importFrom(carrier,crate)
+importFrom(httpuv,runServer)
+importFrom(httpuv,startDaemonizedServer)
+importFrom(httpuv,stopServer)
+importFrom(httr,GET)
+importFrom(httr,POST)
+importFrom(httr,add_headers)
+importFrom(httr,content)
+importFrom(httr,timeout)
+importFrom(jsonlite,fromJSON)
+importFrom(openssl,rand_num)
+importFrom(processx,process)
+importFrom(processx,run)
+importFrom(purrr,"%>%")
+importFrom(reticulate,conda_binary)
+importFrom(reticulate,conda_create)
+importFrom(reticulate,conda_install)
+importFrom(reticulate,conda_list)
+importFrom(rlang,"%||%")
+importFrom(utils,browseURL)
+importFrom(utils,read.csv)
+importFrom(utils,write.csv)
+importFrom(withr,with_envvar)
+importFrom(xml2,as_list)
+importFrom(yaml,write_yaml)
diff --git a/R/mlflow/R/artifact.R b/R/mlflow/R/artifact.R
@@ -0,0 +1,59 @@
+#' Log Artifact
+#'
+#' Logs an specific file or directory as an artifact.
+#'
+#' @param path The file or directory to log as an artifact.
+#' @param artifact_path Destination path within the run’s artifact URI.
+#' @param run_uuid The run associated with this artifact.
+#'
+#' @details
+#'
+#' When logging to Amazon S3, ensure that the user has a proper policy
+#' attach to it, for instance:
+#'
+#' \code{
+#' {
+#' "Version": "2012-10-17",
+#' "Statement": [
+#'   {
+#'     "Sid": "VisualEditor0",
+#'     "Effect": "Allow",
+#'     "Action": [
+#'       "s3:PutObject",
+#'       "s3:GetObject",
+#'       "s3:ListBucket",
+#'       "s3:GetBucketLocation"
+#'       ],
+#'     "Resource": [
+#'       "arn:aws:s3:::mlflow-test/*",
+#'       "arn:aws:s3:::mlflow-test"
+#'       ]
+#'   }
+#'   ]
+#' }
+#' }
+#'
+#' Additionally, at least the \code{AWS_ACCESS_KEY_ID} and \code{AWS_SECRET_ACCESS_KEY}
+#' environment variables must be set to the corresponding key and secrets provided
+#' by Amazon IAM.
+#'
+#' @export
+mlflow_log_artifact <- function(path, artifact_path = NULL, run_uuid = NULL) {
+  run_uuid <- run_uuid %||%
+    mlflow_active_run()$run_info$run_uuid %||%
+    stop("`run_uuid` must be specified when there is no active run.")
+
+  artifact_param <- NULL
+  if (!is.null(artifact_path)) artifact_param <- "--artifact-path"
+
+  mlflow_cli("artifacts",
+             "log-artifact",
+             "--local-file",
+             path,
+             artifact_param,
+             artifact_path,
+             "--run-id",
+             run_uuid)
+
+  invisible(NULL)
+}
diff --git a/R/mlflow/R/cli.R b/R/mlflow/R/cli.R
@@ -0,0 +1,56 @@
+#' MLflow Command
+#'
+#' Executes a generic MLflow command through the commmand line interface.
+#'
+#' @param ... The parameters to pass to the command line.
+#' @param background Should this command be triggered as a background task?
+#'   Defaults to \code{FALSE}.
+#' @param echo Print the standard output and error to the screen? Defaults to
+#'   \code{TRUE}, does not apply to background tasks.
+#'
+#' @return A \code{processx} task.
+#'
+#' @examples
+#' \dontrun{
+#' library(mlflow)
+#' mlflow_install()
+#'
+#' mlflow_cli("server", "--help")
+#' }
+#'
+#' @importFrom processx run
+#' @importFrom processx process
+#' @importFrom withr with_envvar
+#' @export
+mlflow_cli <- function(..., background = FALSE, echo = TRUE) {
+  args <- list(...)
+
+  verbose <- mlflow_is_verbose()
+
+  python <- dirname(python_bin())
+  mlflow_bin <- file.path(python, "mlflow")
+
+  env <- list(
+    PATH = paste(Sys.getenv("PATH"), python, sep = ":"),
+    MLFLOW_CONDA_HOME = python_conda_home(),                      # devel version
+    MLFLOW_MLFLOW_CONDA = file.path(python_conda_bin(), "conda"), # pip version (deprecated)
+    MLFLOW_TRACKING_URI = mlflow_tracking_uri()
+  )
+
+  with_envvar(env, {
+    if (background) {
+      result <- process$new(mlflow_bin, args = unlist(args), echo_cmd = verbose, supervise = TRUE)
+    }
+    else {
+      result <- run(mlflow_bin, args = unlist(args), echo = echo, echo_cmd = verbose)
+    }
+  })
+
+  invisible(result)
+}
+
+mlflow_cli_file_output <- function(response) {
+  temp_file <- tempfile(fileext = ".txt")
+  writeLines(response$stdout, temp_file)
+  temp_file
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		linters: with_defaults(line_length_linter(120), closed_curly_linter = NULL, open_curly_linter = NULL, absolute_paths_linter = NULL)