Skip to content

Commit

Permalink
Sync to make Github current with Distributed R development
Browse files Browse the repository at this point in the history
  • Loading branch information
shreya2k7 committed Jul 17, 2015
1 parent 7be3562 commit 77f0b6c
Show file tree
Hide file tree
Showing 83 changed files with 3,060 additions and 851 deletions.
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ all: third_party ${ATOMICIO_LIB} ${WORKER_BIN} ${MASTER_BIN} ${MASTER_RLIB} ${EX
lint:
tools/lint.sh ${PRESTO_WORKER_SRC} ${PRESTO_MASTER_SRC} ${PRESTO_WORKER_HEADERS} ${PRESTO_MASTER_HEADERS} ${PRESTO_COMMON_HEADERS} ${PRESTO_COMMON_SRC} ${PRESTO_EXECUTOR_HEADERS} ${PRESTO_EXECUTOR_SRC}

.PHONY: clean third_party test boost docs manual tutorial faq distclean install
.PHONY: clean third_party test boost docs manual tutorial faq distclean install blkin trace_build

${ATOMICIO_LIB}:
$(MAKE) -C third_party/atomicio
Expand All @@ -41,6 +41,12 @@ install:
$(MAKE)
sudo bin/install_distributedR.sh

blkin:
$(MAKE) -C third_party -j8 blkin

trace_build: GCC_FLAGS += -I ${BLKIN_INCLUDE} -DPERF_TRACE ${BLKIN_LINKER_FLAGS}

trace_build: clean blkin all

## === Test targets

Expand Down
2 changes: 1 addition & 1 deletion algorithms/HPdata/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: HPdata
Type: Package
Title: Distributed Data Package
Version: 1.1.0
Version: 1.2.0
Date: 2015-04-17
Author: HP Vertica Analytics Team
Maintainer: HP Vertica Analytics Team <[email protected]>
Expand Down
27 changes: 11 additions & 16 deletions algorithms/HPdata/R/db2darray.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@

## A simple function for reading a darray from a table
# tableName: name of the table
# features: a list containing the name of columns corresponding to attributes of the darray (features of samples)
# dsn: ODBC DSN name
# features: a list containing the name of columns corresponding to attributes of the darray (features of samples)
# except: the list of column names that should be excluded (optional)
# npartitions: number of partitions in the darray (it is an optional argument)
# verticaConnector: when it is TRUE (default), Vertica Connector for Distributed R will be used
# loadPolicy: it determines the policy of the Vertica Connector for Distributed R
db2darray <- function(tableName, dsn, features = list(...), npartitions, verticaConnector=TRUE, loadPolicy="local") {
db2darray <- function(tableName, dsn, features = list(...), except=list(...), npartitions, verticaConnector=TRUE, loadPolicy="local") {

if(!is.character(tableName))
stop("The name of the table should be specified")
Expand All @@ -43,12 +44,14 @@ db2darray <- function(tableName, dsn, features = list(...), npartitions, vertica
db_connect <- odbcConnect(dsn)
loadPolicy <- tolower(loadPolicy)

#Close the connection on exit
on.exit(odbcClose(db_connect))

#Validate table name
table <- ""
schema <- ""
table_info <- unlist(strsplit(tableName, split=".", fixed=TRUE))
if(length(table_info) > 2) {
odbcClose(db_connect)
stop("Invalid table name. Table name should be in format <schema_name>.<table_name>. If the table is in 'public' schema, Schema name can be ignored while specifying table name")
} else if(length(table_info) == 2){
schema <- table_info[1]
Expand All @@ -62,18 +65,17 @@ db2darray <- function(tableName, dsn, features = list(...), npartitions, vertica
feature_columns <- ""
norelation <- FALSE
relation_type <- ""

if(missing(features) || length(features)==0 || features=="") {
table_columns <- sqlQuery(db_connect, paste("select column_name from columns where table_schema ILIKE '", schema ,"' and table_name ILIKE '", table,"'", sep=""))
if(!is.data.frame(table_columns)) {
odbcClose(db_connect)
stop(table_columns)
}

if(nrow(table_columns) == 0) {
## check if its a view
view_columns <- sqlQuery(db_connect, paste("select column_name from view_columns where table_schema ILIKE '", schema ,"' and table_name ILIKE '", table,"'", sep=""))
if(!is.data.frame(view_columns)) {
odbcClose(db_connect)
stop(view_columns)
}

Expand All @@ -87,7 +89,6 @@ db2darray <- function(tableName, dsn, features = list(...), npartitions, vertica
# get type of table - external or regular
table_type <- sqlQuery(db_connect, paste("select table_definition from tables where table_schema ILIKE '", schema, "' and table_name ILIKE '", table, "'", sep=""))
if(!is.data.frame(table_type)) {
odbcClose(db_connect)
stop(table_columns)
}
relation_type <- ifelse((is.null(table_type[[1]][[1]]) || is.na(table_type[[1]][[1]])), "table", "external_table")
Expand All @@ -105,7 +106,6 @@ db2darray <- function(tableName, dsn, features = list(...), npartitions, vertica
# get type of table - external or regular
table_type <- sqlQuery(db_connect, paste("select table_definition from tables where table_schema ILIKE '", schema, "' and table_name ILIKE '", table, "'", sep=""))
if(!is.data.frame(table_type)) {
odbcClose(db_connect)
stop(table_columns)
}
relation_type <- ifelse((is.null(table_type[[1]][[1]]) || is.na(table_type[[1]][[1]])), "table", "external_table")
Expand All @@ -115,10 +115,13 @@ db2darray <- function(tableName, dsn, features = list(...), npartitions, vertica
}

if(norelation) {
odbcClose(db_connect)
stop(paste("Table/View ", tableName, " does not exist", sep=""))
}

# excluding the elements in the except list
if(!missing(except) && length(except)!=0 && except!="")
feature_columns <- feature_columns[sapply(feature_columns, function(x) !(x %in% except))]

# we have columns, construct column string
nFeatures <- length(feature_columns) # number of features
columns <- ""
Expand All @@ -141,7 +144,6 @@ db2darray <- function(tableName, dsn, features = list(...), npartitions, vertica
npartitions <- round(npartitions)
if(npartitions <= 0) {
stop("npartitions should be a positive integer number.")
odbcClose(db_connect)
}
}

Expand All @@ -150,11 +152,9 @@ db2darray <- function(tableName, dsn, features = list(...), npartitions, vertica
oneLine <- sqlQuery(db_connect, qryString)
# check valid response from the database
if (! is.data.frame(oneLine) ) {
odbcClose(db_connect)
stop(oneLine)
}
if (! all(sapply(oneLine, function(x ) is.numeric(x) || is.logical(x))) ) {
odbcClose(db_connect)
stop("Only numeric and logical types are supported for darray")
}

Expand All @@ -163,11 +163,9 @@ db2darray <- function(tableName, dsn, features = list(...), npartitions, vertica
nobs <- sqlQuery(db_connect, qryString)
# check valid response from the database
if (! is.data.frame(nobs) ) {
odbcClose(db_connect)
stop(nobs)
}
if(nobs == 0) {
odbcClose(db_connect)
stop("The table is empty!")
}
X <- FALSE
Expand Down Expand Up @@ -262,7 +260,6 @@ db2darray <- function(tableName, dsn, features = list(...), npartitions, vertica
.vertica.connector(e)
} , finally = {
stopDataLoader()
try({ odbcClose(db_connect)}, silent=TRUE)
})

# end of verticaConnector
Expand All @@ -271,7 +268,6 @@ db2darray <- function(tableName, dsn, features = list(...), npartitions, vertica
qryString <- paste("select count(distinct rowid) from", tableName, "where rowid >=0 and rowid <", nobs)
distinct_nobs <- sqlQuery(db_connect, qryString)
if( nobs != distinct_nobs ) {
odbcClose(db_connect)
stop("There is something wrong with rowid. Check the assumptions about rowid column in the manual.")
}

Expand Down Expand Up @@ -324,7 +320,6 @@ db2darray <- function(tableName, dsn, features = list(...), npartitions, vertica

update(x)
})
odbcClose(db_connect)

} # if-else (verticaConnector)

Expand Down
26 changes: 9 additions & 17 deletions algorithms/HPdata/R/db2darrays.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@
## dsn: ODBC DSN name
## resp: a list containing the name of columns corresponding to responses
## pred: a list containing the name of columns corresponding to predictors (optional)
# except: the list of column names that should be excluded from pred (optional)
## npartitions: number of partitions in darrays (it is an optional argument)
# verticaConnector: when it is TRUE (default), Vertica Connector for Distributed R will be used
# loadPolicy: it determines the data loading policy of the Vertica Connector for Distributed R
db2darrays <- function(tableName, dsn, resp = list(...), pred = list(...), npartitions, verticaConnector=TRUE, loadPolicy="local") {
db2darrays <- function(tableName, dsn, resp = list(...), pred = list(...), except=list(...), npartitions, verticaConnector=TRUE, loadPolicy="local") {

if(!is.character(tableName))
stop("The name of the table should be specified")
Expand All @@ -46,12 +47,14 @@ db2darrays <- function(tableName, dsn, resp = list(...), pred = list(...), npart
db_connect <- odbcConnect(dsn)
loadPolicy <- tolower(loadPolicy)

#Close the connection on exit
on.exit(odbcClose(db_connect))

#get projection_name
table <- ""
schema <- ""
table_info <- unlist(strsplit(tableName, split=".", fixed=TRUE))
if(length(table_info) > 2) {
odbcClose(db_connect)
stop("Invalid table name. Table name should be in format <schema_name>.<table_name>. If the table is in 'public' schema, Schema name can be ignored while specifying table name")
} else if(length(table_info) == 2){
schema <- table_info[1]
Expand All @@ -70,15 +73,13 @@ db2darrays <- function(tableName, dsn, resp = list(...), pred = list(...), npart
if(missing(pred) || length(pred)==0 || pred=="") {
table_columns <- sqlQuery(db_connect, paste("select column_name from columns where table_schema ILIKE '", schema ,"' and table_name ILIKE '", table,"'", sep=""))
if(!is.data.frame(table_columns)) {
odbcClose(db_connect)
stop(table_columns)
}

if(nrow(table_columns) == 0) {
## check if its a view
view_columns <- sqlQuery(db_connect, paste("select column_name from view_columns where table_schema ILIKE '", schema ,"' and table_name ILIKE '", table,"'", sep=""))
if(!is.data.frame(view_columns)) {
odbcClose(db_connect)
stop(view_columns)
}

Expand All @@ -97,7 +98,6 @@ db2darrays <- function(tableName, dsn, resp = list(...), pred = list(...), npart
# get type of table - external or regular
table_type <- sqlQuery(db_connect, paste("select table_definition from tables where table_schema ILIKE '", schema, "' and table_name ILIKE '", table, "'", sep=""))
if(!is.data.frame(table_type)) {
odbcClose(db_connect)
stop(table_columns)
}
relation_type <- ifelse((is.null(table_type[[1]][[1]]) || is.na(table_type[[1]][[1]])), "table", "external_table")
Expand All @@ -119,7 +119,6 @@ db2darrays <- function(tableName, dsn, resp = list(...), pred = list(...), npart
# get type of table - external or regular
table_type <- sqlQuery(db_connect, paste("select table_definition from tables where table_schema ILIKE '", schema, "' and table_name ILIKE '", table, "'", sep=""))
if(!is.data.frame(table_type)) {
odbcClose(db_connect)
stop(table_columns)
}
relation_type <- ifelse((is.null(table_type[[1]][[1]]) || is.na(table_type[[1]][[1]])), "table", "external_table")
Expand All @@ -129,20 +128,21 @@ db2darrays <- function(tableName, dsn, resp = list(...), pred = list(...), npart
}

if(norelation) {
odbcClose(db_connect)
stop(paste("Table/View ", tableName, " does not exist", sep=""))
}

# excluding the elements in the except list
if(!missing(except) && length(except)!=0 && except!="")
pred_columns <- pred_columns[sapply(pred_columns, function(x) !(x %in% except))]

# we have columns, construct column string
nResponses <- length(resp) # number of responses (1 for 'binomial logistic' and 'multiple linear' regression)
nPredictors <- length(pred_columns) # number of predictors

if(nResponses == 0) {
odbcClose(db_connect)
stop("No response columns to fetch from table/view")
}
if(nPredictors == 0) {
odbcClose(db_connect)
stop("No predictor columns to fetch from table/view")
}

Expand All @@ -168,7 +168,6 @@ db2darrays <- function(tableName, dsn, resp = list(...), pred = list(...), npart
} else {
npartitions <- round(npartitions)
if(npartitions <= 0) {
odbcClose(db_connect)
stop("npartitions should be a positive integer number.")
}
}
Expand All @@ -178,11 +177,9 @@ db2darrays <- function(tableName, dsn, resp = list(...), pred = list(...), npart
oneLine <- sqlQuery(db_connect, qryString)
# check valid response from the database
if (! is.data.frame(oneLine) ) {
odbcClose(db_connect)
stop(oneLine)
}
if (! all(sapply(oneLine, function(x ) is.numeric(x) || is.logical(x))) ) {
odbcClose(db_connect)
stop("Only numeric and logical types are supported for darray")
}

Expand All @@ -191,11 +188,9 @@ db2darrays <- function(tableName, dsn, resp = list(...), pred = list(...), npart
nobs <- sqlQuery(db_connect, qryString)
# check valid response from the database
if (! is.data.frame(nobs) ) {
odbcClose(db_connect)
stop(nobs)
}
if(nobs == 0) {
odbcClose(db_connect)
stop("The table is empty!")
}
dResult <- FALSE
Expand Down Expand Up @@ -291,7 +286,6 @@ db2darrays <- function(tableName, dsn, resp = list(...), pred = list(...), npart
.vertica.connector(e)
} , finally = {
stopDataLoader()
try({ odbcClose(db_connect)}, silent=TRUE)
})

# end of verticaConnector
Expand All @@ -300,7 +294,6 @@ db2darrays <- function(tableName, dsn, resp = list(...), pred = list(...), npart
qryString <- paste("select count(distinct rowid) from", tableName, "where rowid >=0 and rowid <", nobs)
distinct_nobs <- sqlQuery(db_connect, qryString)
if( nobs != distinct_nobs ) {
odbcClose(db_connect)
stop("There is something wrong with rowid. Check the assumptions about rowid column in the manual.")
}

Expand Down Expand Up @@ -368,7 +361,6 @@ db2darrays <- function(tableName, dsn, resp = list(...), pred = list(...), npart
})

dResult <- list(Y=Y, X=X)
odbcClose(db_connect)

} # if-else (verticaConnector)

Expand Down
Loading

0 comments on commit 77f0b6c

Please sign in to comment.