forked from AdrianAntico/AutoQuant
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAutoWord2VecModeler.Rd
99 lines (85 loc) · 3.39 KB
/
AutoWord2VecModeler.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AutoWord2VecModeler.R
\name{AutoWord2VecModeler}
\alias{AutoWord2VecModeler}
\title{Automated word2vec data generation via H2O}
\usage{
AutoWord2VecModeler(
data,
BuildType = "Combined",
stringCol = c("Text_Col1", "Text_Col2"),
KeepStringCol = FALSE,
model_path = NULL,
vects = 100,
SaveStopWords = FALSE,
MinWords = 1,
WindowSize = 12,
Epochs = 25,
StopWords = NULL,
SaveModel = "standard",
Threads = max(1, parallel::detectCores() - 2),
MaxMemory = "28G",
SaveOutput = FALSE
)
}
\arguments{
\item{data}{Source data table to merge vects onto}
\item{BuildType}{Choose from "individual" or "combined". Individual will build a model for every text column. Combined will build a single model for all columns.}
\item{stringCol}{A string name for the column to convert via word2vec}
\item{KeepStringCol}{Set to TRUE if you want to keep the original string column that you convert via word2vec}
\item{model_path}{A string path to the location where you want the model and metadata stored}
\item{vects}{The number of vectors to retain from the word2vec model}
\item{SaveStopWords}{Set to TRUE to save the stop words used}
\item{MinWords}{For H2O word2vec model}
\item{WindowSize}{For H2O word2vec model}
\item{Epochs}{For H2O word2vec model}
\item{StopWords}{For H2O word2vec model}
\item{SaveModel}{Set to "standard" to save normally; set to "mojo" to save as mojo. NOTE: while you can save a mojo, I haven't figured out how to score it in the AutoH20Scoring function.}
\item{Threads}{Number of available threads you want to dedicate to model building}
\item{MaxMemory}{Amount of memory you want to dedicate to model building}
\item{SaveOutput}{Set to TRUE to save your models to file}
}
\description{
This function allows you to automatically build a word2vec model and merge the data onto your supplied dataset
}
\examples{
\donttest{
data <- AutoWord2VecModeler(data,
BuildType = "individual",
stringCol = c("Text_Col1","Text_Col2"),
KeepStringCol = FALSE,
model_path = normalizePath("./"),
vects = 100,
SaveStopWords = FALSE,
MinWords = 1,
WindowSize = 1,
Epochs = 25,
StopWords = NULL,
SaveModel = "standard",
Threads = max(1,parallel::detectCores()-2),
MaxMemory = "28G",
SaveOutput = TRUE)
}
}
\seealso{
Other Feature Engineering:
\code{\link{AutoDataPartition}()},
\code{\link{AutoFourierFeatures}()},
\code{\link{AutoLagRollStatsScoring}()},
\code{\link{AutoLagRollStats}()},
\code{\link{AutoTransformationCreate}()},
\code{\link{AutoTransformationScore}()},
\code{\link{ContinuousTimeDataGenerator}()},
\code{\link{CreateCalendarVariables}()},
\code{\link{CreateHolidayVariables}()},
\code{\link{DT_GDL_Feature_Engineering}()},
\code{\link{DummifyDT}()},
\code{\link{H2oAutoencoder}()},
\code{\link{ModelDataPrep}()},
\code{\link{Partial_DT_GDL_Feature_Engineering}()},
\code{\link{TimeSeriesFill}()}
}
\author{
Adrian Antico
}
\concept{Feature Engineering}