Skip to content

Commit

Permalink
updated pc_synonyms cts_convert, and cir_query to use match i…
Browse files Browse the repository at this point in the history
…nstead of `choices` ropensci#263
  • Loading branch information
Aariq committed Jun 23, 2020
1 parent a41f299 commit d0defe7
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 67 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* The `"type"` argument in `ci_query()` and `aw_query()` has been changed to `"from"` for consistency with other functions
* `fn_percept()` and `cts_compinfo()` now have `"query"` and `"from"` arguments for consistency with other functions
* Possible values for `"from"` have been made more consistent across functions
* `pc_synonyms()` and `cir_query()` have been changed to use the `match` argument instead of `choices` for consistency with other functions

# webchem 1.0.0

Expand Down
51 changes: 26 additions & 25 deletions R/cir.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@
#' @param resolver character; what resolver should be used? If NULL (default)
#' the identifier type is detected and the different resolvers are used in turn.
#' See details for possible resolvers.
#' @param first deprecated, use choices = 1 to return only the first result
#' @param choices if \code{choices = 1}, returns only the first result. To get a
#' number of results to choose from in an interactive menu, provide the number
#' of choices you want or "all" to choose from all synonyms.
#' @param match character; How should multiple hits be handled? \code{"all"}
#' returns all matches, \code{"first"} returns only the first result,
#' \code{"ask"} enters an interactive mode and the user is asked for input,
#' \code{"na"} returns \code{NA} if multiple hits are found.
#' @param choices deprecated. Use the \code{match} argument instead.
#' @param verbose logical; should a verbose output be printed on the console?
#' @param ... currently not used.
#' @return A list of character vectors. If first = TRUE a vector.
#' @return A list of character vectors.
#' @details
#' CIR can resolve can be of the following \code{identifier}: Chemical Names,
#' IUPAC names,
Expand Down Expand Up @@ -61,14 +62,14 @@
#' \item \code{'protonable_group_count'} (Number of protonable groups).
#' }
#'
#' CIR first tries to determine the indetifier type submitted and then
#' CIR first tries to determine the identifier type submitted and then
#' uses 'resolvers' to look up the data.
#' If no \code{resolver} is supplied, CIR tries different resolvers in
#' turn till a hit is found.
#' E.g. for names CIR tries first to look up in OPSIN and if this fails
#' the local name index of CIR.
#' However, it can be also specified which resolvers to use
#' (if you know e.g. know your indentifier type)
#' (if you know e.g. know your identifier type)
#' Possible \code{resolvers} are:
#' \itemize{
#' \item \code{'name_by_cir'} (Lookup in name index of CIR),
Expand Down Expand Up @@ -100,24 +101,29 @@
#' @examples
#' \donttest{
#' # might fail if API is not available
#' cir_query('Triclosan', 'cas')
#' cir_query("3380-34-5", 'cas', first = TRUE)
#' cir_query("3380-34-5", 'cas', resolver = 'cas_number')
#' cir_query("3380-34-5", 'smiles')
#' cir_query('Triclosan', 'mw')
#' cir_query("Triclosan", "cas")
#' cir_query("3380-34-5", "cas", match = "first")
#' cir_query("3380-34-5", "cas", resolver = "cas_number")
#' cir_query("3380-34-5", "smiles")
#' cir_query("Triclosan", "mw")
#'
#' # multiple inputs
#' comp <- c('Triclosan', 'Aspirin')
#' cir_query(comp, 'cas', first = TRUE)
#' comp <- c("Triclosan", "Aspirin")
#' cir_query(comp, "cas", match = "first")
#'
#'}
#' @export
cir_query <- function(identifier, representation = 'smiles', resolver = NULL,
first = FALSE, choices = NULL, verbose = TRUE, ...){
if (first == TRUE) {
message("`first` is deprecated. Using `choices = 1` instead.")
choices = 1
cir_query <- function(identifier, representation = "smiles",
resolver = NULL,
first = FALSE,
match = c("all", "first", "ask", "na"),
verbose = TRUE,
choices = NULL,
...){
if (!missing("choices")) {
stop("`choices` is deprecated. Use `match` instead.")
}
match <- match.arg(match)
foo <- function(identifier, representation, resolver, first, verbose) {
if (is.na(identifier)) {
return(NA)
Expand All @@ -144,9 +150,7 @@ cir_query <- function(identifier, representation = 'smiles', resolver = NULL,
message('No representation found... Returning NA.')
return(NA)
}
# if (first)
# out <- out[1]
out <- chooser(out, choices)
out <- matcher(out, query = identifier, match = match, verbose = verbose)
# convert to numeric
if (representation %in% c('mw', 'monoisotopic_mass', 'h_bond_donor_count',
'h_bond_acceptor_count', 'h_bond_center_count',
Expand All @@ -162,8 +166,5 @@ cir_query <- function(identifier, representation = 'smiles', resolver = NULL,
out <- lapply(identifier, foo, representation = representation,
resolver = resolver, first = first, verbose = verbose)
out <- setNames(out, identifier)
# if (first)
if(!is.null(choices))
out <- unlist(out)
return(out)
}
27 changes: 18 additions & 9 deletions R/cts.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,11 @@ cts_compinfo <- function(query, from = "inchikey", verbose = TRUE, inchikey){
#' @param from character; type of query ID, e.g. \code{'Chemical Name'} , \code{'InChIKey'},
#' \code{'PubChem CID'}, \code{'ChemSpider'}, \code{'CAS'}.
#' @param to character; type to convert to.
#' @param first deprecated. Use choices = 1 instead.
#' @param choices to return only the first result, use 'choices = 1'. To choose a result from an interactive menu, provide a number of choices to choose from or "all".
#' @param match character; How should multiple hits be handled? \code{"all"}
#' returns all matches, \code{"first"} returns only the first result,
#' \code{"ask"} enters an interactive mode and the user is asked for input,
#' \code{"na"} returns \code{NA} if multiple hits are found.
#' @param choices deprecated. Use the \code{match} argument instead.
#' @param verbose logical; should a verbose output be printed on the console?
#' @param ... currently not used.
#' @return a list of character vectors or if \code{choices} is used, then a single named vector.
Expand All @@ -95,15 +98,22 @@ cts_compinfo <- function(query, from = "inchikey", verbose = TRUE, inchikey){
#' comp <- c("triclosan", "hexane")
#' cts_convert(comp, "Chemical Name", "cas")
#' }
cts_convert <- function(query, from, to, first = FALSE, choices = NULL, verbose = TRUE, ...){
if(!missing("first"))
stop('"first" is deprecated. Use "choices = 1" instead.')
cts_convert <- function(query,
from,
to,
match = c("all", "first", "ask", "na"),
verbose = TRUE,
choices = NULL,
...){
if(!missing("choices"))
stop('"choices" is deprecated. Use "match" instead.')
if (length(from) > 1 | length(to) > 1) {
stop('Cannot handle multiple input or output types. Please provide only one argument for `from` and `to`.')
}

from <- match.arg(tolower(from), c(cts_from(), "name"))
to <- match.arg(tolower(to), c(cts_to(), "name"))
match <- match.arg(match)

if (from == "name") {
from <- "chemical name"
Expand All @@ -113,7 +123,7 @@ cts_convert <- function(query, from, to, first = FALSE, choices = NULL, verbose
to <- "chemical name"
}

foo <- function(query, from, to , first, verbose){
foo <- function(query, from, to, first, verbose){
if (is.na(query)) return(NA)
baseurl <- "http://cts.fiehnlab.ucdavis.edu/service/convert"
qurl <- paste0(baseurl, '/', from, '/', to, '/', query)
Expand All @@ -131,13 +141,12 @@ cts_convert <- function(query, from, to, first = FALSE, choices = NULL, verbose
return(NA)
}
out <- out$result[[1]]
out <- chooser(out, choices)
out <- matcher(out, match = match, query = query, verbose = verbose)
return(out)
}
out <- lapply(query, foo, from = from, to = to, first = first, verbose = verbose)
out <- setNames(out, query)
if(!is.null(choices))
out <- unlist(out)

return(out)
}

Expand Down
33 changes: 19 additions & 14 deletions R/pubchem.R
Original file line number Diff line number Diff line change
Expand Up @@ -375,15 +375,16 @@ pc_prop <- function(cid, properties = NULL, verbose = TRUE, ...) {
#' @param query character; search term.
#' @param from character; type of input, can be one of "name" (default), "cid",
#' "sid", "aid", "smiles", "inchi", "inchikey"
#' @param interactive deprecated. Use the \code{choices} argument instead
#' @param choices to get only the first synonym, use \code{choices = 1}, to get
#' a number of synonyms to choose from in an interactive menu, provide the
#' number of choices you want or "all" to choose from all synonyms.
#' @param match character; How should multiple hits be handled? \code{"all"}
#' returns all matches, \code{"first"} returns only the first result,
#' \code{"ask"} enters an interactive mode and the user is asked for input,
#' \code{"na"} returns \code{NA} if multiple hits are found.
#' @param choices deprecated. Use the \code{match} argument instead.
#' @param verbose logical; should a verbose output be printed on the console?
#' @param arg character; optinal arguments like "name_type=word" to match
#' @param arg character; optional arguments like "name_type=word" to match
#' individual words.
#' @param ... optional arguments
#' @return a list of character vectors (one per query). If \code{choices} is used, a single named vector is returned instead.
#' @param ... currently unused
#' @return a named list.
#'
#' @references Wang, Y., J. Xiao, T. O. Suzek, et al. 2009 PubChem: A Public
#' Information System for
Expand Down Expand Up @@ -411,16 +412,20 @@ pc_prop <- function(cid, properties = NULL, verbose = TRUE, ...) {
#' pc_synonyms("Aspirin")
#' pc_synonyms(c("Aspirin", "Triclosan"))
#' pc_synonyms(5564, from = "cid")
#' pc_synonyms(c("Aspirin", "Triclosan"), choices = 10)
#' pc_synonyms(c("Aspirin", "Triclosan"), match = "ask")
#' }
pc_synonyms <- function(query, from = c("name", "cid", "sid", "aid", "smiles", "inchi", "inchikey"), choices = NULL, verbose = TRUE,
arg = NULL, interactive = 0, ...) {
pc_synonyms <- function(query,
from = c("name", "cid", "sid", "aid", "smiles", "inchi", "inchikey"),
match = c("all", "first", "ask", "na"),
verbose = TRUE,
arg = NULL, choices = NULL, ...) {
# from can be cid | name | smiles | inchi | sdf | inchikey | formula
# query <- c("Aspirin")
# from = "name"
from <- match.arg(from)
if (!missing("interactive"))
stop("'interactive' is deprecated. Use 'choices' instead.")
match <- match.arg(match)
if (!missing("choices"))
stop("'choices' is deprecated. Use 'match' instead.")
foo <- function(query, from, verbose, ...) {
if (is.na(query)) return(NA)
prolog <- "https://pubchem.ncbi.nlm.nih.gov/rest/pug"
Expand All @@ -444,10 +449,10 @@ pc_synonyms <- function(query, from = c("name", "cid", "sid", "aid", "smiles", "
warning(cont$Fault$Details, ". Returning NA.")
return(NA)
}
out <- unlist(cont)
out <- unlist(cont)[-1] #first result is always an ID number
names(out) <- NULL

out <- chooser(out, choices)
out <- matcher(out, query = query, match = match, verbose = verbose)

}
out <- lapply(query, foo, from = from, verbose = verbose)
Expand Down
2 changes: 1 addition & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ chooser <- function(x, choices){
#' @param x a vector
#' @param query what the query was, only used if match = "best"
#' @param result what the result of the query was, only used if match = "best
#' @param match haracter; How should multiple hits be handeled? "all" returns
#' @param match character; How should multiple hits be handled? "all" returns
#' all matched IDs, "first" only the first match, "best" the best matching (by
#' name) ID, "ask" is a interactive mode and the user is asked for input, "na"
#' @param verbose print messages?
Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-cir.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ test_that("cir_query()", {
expect_equal(cir_query("3380-34-5", 'stdinchikey', resolver = 'cas_number', verbose = FALSE)[[1]],
"InChIKey=XEFQLINVKFYRCS-UHFFFAOYSA-N")
expect_true(length(cir_query('Triclosan', 'cas', verbose = FALSE)[[1]]) > 1)
expect_message(cir_query("acetic acid", "mw", first = TRUE))
expect_length(cir_query('Triclosan', 'cas', choices = 1, verbose = FALSE)[[1]], 1)
expect_message(cir_query("acetic acid", "mw", match = "first"))
expect_length(cir_query('Triclosan', 'cas', match = "first", verbose = FALSE)[[1]], 1)
expect_length(cir_query(c('Triclosan', 'Aspirin'), 'cas', verbose = FALSE), 2)

skip("I have no clue why this one fails on R CMD check. It works when run in the console!")
expect_equivalent(cir_query('acetic acid', 'mw', choices = 1), c(`acetic acid` = 60.0524))
# skip("I have no clue why this one fails on R CMD check. It works when run in the console!")
expect_equivalent(cir_query('acetic acid', 'mw', match = "first"), c(`acetic acid` = 60.0524))

})

Expand Down
13 changes: 1 addition & 12 deletions tests/testthat/test-cts.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,14 @@ test_that("cts_convert()", {
expect_error(cts_convert(comp, c('Chemical Name', 'CAS'), 'CAS'))
expect_error(cts_convert('Triclosan', 'CAS'))
expect_true(is.na(suppressWarnings(cts_convert('xxxx', 'Chemical Name', 'inchikey'))[[1]]))
o1 <- cts_convert(comp, 'Chemical Name', 'inchikey', choices = 1, verbose = FALSE)
o1 <- cts_convert(comp, 'Chemical Name', 'inchikey', match = "first", verbose = FALSE)
expect_length(o1, 2)

expect_equal(o1[[1]], 'XEFQLINVKFYRCS-UHFFFAOYSA-N')
# cts_convert('acetic acid', 'Chemical Name', 'CAS', choices = 1)
expect_equivalent(cts_convert(NA, from = "Chemical Name", to = "inchikey"), NA)
})


# # integration tests
# test_that("cts_compinfo(cir_query())", {
# chk_cts()
# chk_cir()
# inchikey <- cir_query('Triclosan', representation = 'stdinchikey', verbose = FALSE)
# inchikey <- gsub('InChIKey=', '', inchikey)
# expect_equal(round(cts_compinfo(inchikey, verbose = FALSE)[[1]][["molweight"]], 3), 289.542)
# })


test_that("fromto", {
skip_on_cran()
skip_if_not(up, "CTS service down")
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-pubchem.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@ test_that("pc_synonyms", {
skip_on_cran()
skip_if_not(up, "PubChem service is down")
expect_equivalent(pc_synonyms(NA), NA)
expect_equal(pc_synonyms("Triclosan")[[1]][1], "5564")
expect_equal(pc_synonyms("Acetyl Salicylic Acid")[[1]][1], "aspirin")
expect_equal(length(pc_synonyms(c("Triclosan", "Aspirin"))), 2)
expect_equal(pc_synonyms("BPGDAMSIGCZZLK-UHFFFAOYSA-N",
from = "inchikey")[[1]][1], "12345")
from = "inchikey")[[1]][1], "Methylene diacetate")
expect_true(is.na(suppressWarnings(pc_synonyms("xxxx"))[[1]]))
})

Expand Down

0 comments on commit d0defe7

Please sign in to comment.