Skip to content

Commit

Permalink
parse do obo file
Browse files Browse the repository at this point in the history
  • Loading branch information
GuangchuangYu authored Jul 16, 2022
1 parent 838c767 commit b42425a
Showing 1 changed file with 56 additions and 0 deletions.
56 changes: 56 additions & 0 deletions inst/extdata/parse-obo.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@

parse_do <- function(obofile) {
x = readLines(obofile)

start = grep("^\\[Term\\]", x)
end <- c(start[-1] -1, length(x))

res <- lapply(seq_along(start), function(i) {
extract_do_item(x[start[i]:end[i]])
})

j <- vapply(res, is.null, logical(1))
message(paste0(sum(j), '/', length(j)), " obsolete terms found.")
res <- res[!j]

doinfo <- lapply(res, function(x) x$do) %>% do.call('rbind', .) %>% as.data.frame()

rel <- lapply(res, function(x) x$relation) %>% do.call('rbind', .)

return(list(doinfo = doinfo, rel = rel))
}


extract_do_item <- function(item) {
i <- grep('^\\[Typedef\\]', item)
if (length(i) > 0) {
item <- item[-(i[1]:length(item))]
}
## is_obsolete: true
useless <- get_do_info(item, '^is_obsolete:')
if (!is.na(useless)) return(NULL)

id <- get_do_info(item, "^id:")
name <- get_do_info(item, "^name:")
def <- get_do_info(item, "^def:")
def <- sub('\\"', "", def)
def <- sub('\\".*', "", def)

isa <- get_do_info(item, '^is_a:')
isa <- sub("\\s*!.*", "", isa)
res <- list(do=c(id=id, name=name, def=def),
relationship = data.frame(id=id, parent=isa))
}

get_do_info <- function(item, pattern) {
i <- grep(pattern, item)
if (length(i) == 0) return(NA)

sub("\\s*", "",
sub(pattern, "", item[i])
)
}


#x <- parse_do('HumanDO.obo')

0 comments on commit b42425a

Please sign in to comment.