up

svilupp · Jul 8, 2021 · b51c3df · b51c3df
1 parent 7aa80a2
commit b51c3df
Show file tree

Hide file tree

Showing 4 changed files with 124 additions and 4 deletions.
diff --git a/docs/make.jl b/docs/make.jl
@@ -7,7 +7,8 @@ makedocs(
     sitename="EvoTrees.jl",
     authors = "Jeremie Desgagne-Bouchard and contributors.",
     format=Documenter.HTML(),
-    pages = ["Home" => "index.md"],)
+    pages = ["Home" => "index.md"],
+    modules = [EvoTrees],)
 
 deploydocs(repo="https://github.com/Evovest/EvoTrees.jl", 
     target="build",

diff --git a/docs/src/examples.md b/docs/src/examples.md
@@ -0,0 +1,115 @@
+## Regression
+
+Minimal example to fit a noisy sinus wave.
+
+![](figures/regression_sinus.png)
+
+```julia
+using EvoTrees
+using EvoTrees: sigmoid, logit
+
+# prepare a dataset
+features = rand(10000) .* 20 .- 10
+X = reshape(features, (size(features)[1], 1))
+Y = sin.(features) .* 0.5 .+ 0.5
+Y = logit(Y) + randn(size(Y))
+Y = sigmoid(Y)
+𝑖 = collect(1:size(X, 1))
+
+# train-eval split
+𝑖_sample = sample(𝑖, size(𝑖, 1), replace = false)
+train_size = 0.8
+𝑖_train = 𝑖_sample[1:floor(Int, train_size * size(𝑖, 1))]
+𝑖_eval = 𝑖_sample[floor(Int, train_size * size(𝑖, 1))+1:end]
+
+X_train, X_eval = X[𝑖_train, :], X[𝑖_eval, :]
+Y_train, Y_eval = Y[𝑖_train], Y[𝑖_eval]
+
+params1 = EvoTreeRegressor(
+    loss=:linear, metric=:mse,
+    nrounds=100, nbins = 100,
+    λ = 0.5, γ=0.1, η=0.1,
+    max_depth = 6, min_weight = 1.0,
+    rowsample=0.5, colsample=1.0)
+model = fit_evotree(params1, X_train, Y_train, X_eval = X_eval, Y_eval = Y_eval, print_every_n = 25)
+pred_eval_linear = predict(model, X_eval)
+
+# logistic / cross-entropy
+params1 = EvoTreeRegressor(
+    loss=:logistic, metric = :logloss,
+    nrounds=100, nbins = 100,
+    λ = 0.5, γ=0.1, η=0.1,
+    max_depth = 6, min_weight = 1.0,
+    rowsample=0.5, colsample=1.0)
+model = fit_evotree(params1, X_train, Y_train, X_eval = X_eval, Y_eval = Y_eval, print_every_n = 25)
+pred_eval_logistic = predict(model, X_eval)
+
+# Poisson
+params1 = EvoTreeCount(
+    loss=:poisson, metric = :poisson,
+    nrounds=100, nbins = 100,
+    λ = 0.5, γ=0.1, η=0.1,
+    max_depth = 6, min_weight = 1.0,
+    rowsample=0.5, colsample=1.0)
+model = fit_evotree(params1, X_train, Y_train, X_eval = X_eval, Y_eval = Y_eval, print_every_n = 25)
+@time pred_eval_poisson = predict(model, X_eval)
+
+# L1
+params1 = EvoTreeRegressor(
+    loss=:L1, α=0.5, metric = :mae,
+    nrounds=100, nbins=100,
+    λ = 0.5, γ=0.0, η=0.1,
+    max_depth = 6, min_weight = 1.0,
+    rowsample=0.5, colsample=1.0)
+model = fit_evotree(params1, X_train, Y_train, X_eval = X_eval, Y_eval = Y_eval, print_every_n = 25)
+pred_eval_L1 = predict(model, X_eval)
+```
+
+## Quantile Regression
+
+![](figures/quantiles_sinus.png)
+
+```julia
+# q50
+params1 = EvoTreeRegressor(
+    loss=:quantile, α=0.5, metric = :quantile,
+    nrounds=200, nbins = 100,
+    λ = 0.1, γ=0.0, η=0.05,
+    max_depth = 6, min_weight = 1.0,
+    rowsample=0.5, colsample=1.0)
+model = fit_evotree(params1, X_train, Y_train, X_eval = X_eval, Y_eval = Y_eval, print_every_n = 25)
+pred_train_q50 = predict(model, X_train)
+
+# q20
+params1 = EvoTreeRegressor(
+    loss=:quantile, α=0.2, metric = :quantile,
+    nrounds=200, nbins = 100,
+    λ = 0.1, γ=0.0, η=0.05,
+    max_depth = 6, min_weight = 1.0,
+    rowsample=0.5, colsample=1.0)
+model = fit_evotree(params1, X_train, Y_train, X_eval = X_eval, Y_eval = Y_eval, print_every_n = 25)
+pred_train_q20 = predict(model, X_train)
+
+# q80
+params1 = EvoTreeRegressor(
+    loss=:quantile, α=0.8,
+    nrounds=200, nbins = 100,
+    λ = 0.1, γ=0.0, η=0.05,
+    max_depth = 6, min_weight = 1.0,
+    rowsample=0.5, colsample=1.0)
+model = fit_evotree(params1, X_train, Y_train, X_eval = X_eval, Y_eval = Y_eval, print_every_n = 25)
+pred_train_q80 = predict(model, X_train)
+```
+
+## Gaussian Max Likelihood
+
+![](figures/gaussian_sinus.png)
+
+```julia
+params1 = EvoTreeGaussian(
+    loss=:gaussian, metric=:gaussian,
+    nrounds=100, nbins=100,
+    λ = 0.0, γ=0.0, η=0.1,
+    max_depth = 6, min_weight = 1.0,
+    rowsample=0.5, colsample=1.0, seed=123)
+```
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -1,8 +1,7 @@
-## EvoTrees.jl
-
 Docs under development, see [README](https://github.com/Evovest/EvoTrees.jl/blob/master/README.md) in the meantime.
 
 ```@docs
 fit_evotree
 predict
+importance
 ```
diff --git a/src/importance.jl b/src/importance.jl
@@ -15,10 +15,15 @@ function importance!(gain::AbstractVector, tree::Tree)
     end
 end
 
-# loop importance over all trees and sort results
+"""
+    importance(model::GBTree, vars::AbstractVector)
+
+Sorted normalized feature importance based on loss function gain.
+"""
 function importance(model::GBTree, vars::AbstractVector)
     gain = zeros(length(vars))
 
+    # Loop importance over all trees and sort results.
     for tree in model.trees
         importance!(gain, tree)
     end