Skip to content

Commit

Permalink
Several improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
rafalab committed Oct 11, 2022
1 parent 0201fc5 commit 06bb5a0
Showing 1 changed file with 177 additions and 82 deletions.
259 changes: 177 additions & 82 deletions chapter_to_slides.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,14 @@
#' after predetermined number o lines or characters is reached. If a section has titled exercise the section is
#' saved in a separate file and omitted from slides. The R chunks that do not include plotting functions
#' are preserved. Chunks that plot and do not include either eval=FALSE or echo=FALSE are copied twice
#' so that code shows in one page and the plot in another.
#' so that code shows in one page and the plot in another.
#'
#' Note that the yml header is hard wired. If you want to change, edit the line where `start` is defined.
#'
#' @details The output is should be checked before kniting as the output is rarely perfect. In particular,
#' think of the R output that will be generated by the R chunks as these are not seen and therefore not
#' counted when deciding when to start a new page.
#'
#' @author Rafael A. Irizarry
#'
#' @param input Input Rmd file to be converted.
Expand All @@ -23,32 +27,40 @@
#' @param max.lines Number of lines per slide
#' @param chars.per.line Number of characters that define a line.
#' @param max.section.title.length If the section number is bigger than this it gets cut-off. Defaults to infinity.
#' @param verbose If TRUE show information about line being processed.
#'
#' @return A data frame with counts for each group for each date with population sizes, if demo was provided.
#'
#' @examples
#' \dontrun{
#' rmd_to_slides("inference/models", "../lectures/inference/models")
#' rmd_to_slides("dsbood/inference/models.Rmd", "lectures/inference/models")
#' }
#' @export
#' @import stringr


chapter_to_slides <- function(input,
output = "tmp",
output.exercises = NULL,
suffix = "Rmd",
title = NULL,
author = "Rafael A. Irizarry",
max.lines = 10,
max.lines = 18,
chars.per.line = 60,
max.section.title.length = NULL){
max.section.title.length = NULL,
verbose=FALSE){
## Version: 0.0.1
## License: Artistic-2.0
## Author: Rafael A. Irizarry

library(stringr)

start_section <- function(start.lines = 0, env = parent.frame()){
cat("\n\n", env$the_section, "\n\n", sep = "",
file = env$file_name, append = TRUE)
env$lines <- start.lines
env$page <- env$page + 1
}

## define filename for extracted exercises if not provided
if(is.null(output.exercises)){
output.exercises <- paste0(output, "-exercises")
Expand All @@ -60,7 +72,7 @@ chapter_to_slides <- function(input,

## define title if not provided
if(is.null(title)){
title <- str_replace_all(basename(input), "-", " ") |> str_to_title()
title <- str_replace_all(basename(output), "-", " ") |> str_to_title()
}

if(is.null(max.section.title.length)) max.section.title.length <- -1L
Expand Down Expand Up @@ -90,8 +102,8 @@ chapter_to_slides <- function(input,
if(str_detect(x[j], "\\{")) ends <- ends - 1
}
}
}
x <- x[-out]
}

##line_type will store, for each line of x, what kind of line it is
##options are
Expand All @@ -101,6 +113,9 @@ chapter_to_slides <- function(input,
## exercise_start
## plot rchunk start inside or end
## quotes
## latex start inside or end
## table
## last line
line_type <- rep("prose", length(x))

## find section starts
Expand All @@ -119,13 +134,48 @@ chapter_to_slides <- function(input,
exercise_starts <- str_which(x, "## [Ee]xercise")
line_type[exercise_starts] <- "exercise_start"

## find the latex start and ends
latex_inds <- str_which(x, "\\$\\$")
if(length(latex_inds) %% 2 > 0){
warning("Detected unclosed latex. Check output carefully.")
latex_inds <- c(latex_inds, last(latex_inds))
}
latex_start <-latex_inds[seq(1,length(latex_inds),2)]
latex_end <- latex_inds[seq(2,length(latex_inds),2)]
latex_size <- rep(0, length(x)) ## used to decide if start new section
latex_size[latex_start] <- pmax(1, latex_end - latex_start - 2)

## if start and end in same line make it the end
line_type[latex_start] <- "latex_start"
line_type[latex_end] <- "latex_end"

## find the insider of latex
for(i in seq_along(latex_start)){
st <- latex_start[i]
en <- latex_end[i]
if(st==en){
line_type[st] <- "latex_start_and_end" ## if onle line of latex use this
} else{
ind <- (st+1):(en-1)
if(length(ind)>0){
line_type[ind] <- "latex_inside"
}
}
}

## find start of tables
line_type[str_detect(str_trim(x), "^\\|")] <- "table"

## find the rchunk start and ends
rchunk_start <- str_which(x, "^```\\{r")
rchunk_end <- str_which(x, "^```$")

line_type[rchunk_start] <- "rchunk_start"
line_type[rchunk_end] <- "rchunk_end"

rchunk_size <- rep(0, length(x)) ## used to decide if start new section
rchunk_size[rchunk_start] <- pmax(1, rchunk_end - rchunk_start - 2)

## Check if R chunk is a plot chunk and change if it is
rchunk_inds <- cbind(rchunk_start, rchunk_end)
plot_inds <- which(apply(rchunk_inds, 1, function(ind){
Expand All @@ -138,11 +188,15 @@ chapter_to_slides <- function(input,
line_type[plot_rchunk_start] <- "plot_rchunk_start"
line_type[plot_rchunk_end] <- "plot_rchunk_end"


## find quotes
quote_index <- str_which(x, "^>>.*")
line_type[quote_index] <- "quote"

## find the insider of r chunks
rchunk_size <- rep(0, length(x)) ## used to decide if start new section
rchunk_size[rchunk_start] <- rchunk_end - rchunk_start

for(i in seq_along(rchunk_start)){
ind <- (rchunk_start[i]+1):(rchunk_end[i]-1)
if(length(ind)>0) line_type[ind] <- "rchunk_inside"
Expand All @@ -156,7 +210,7 @@ chapter_to_slides <- function(input,

the_section <- ""
## the start is hard wired
start <- '---\ntitle: "LECTURETITLE"\nauthor: "THEAUTHORNAME"\ndate: "`r lubridate::today()`"\noutput:\n\tioslides_presentation:\n\t\tfig_caption: no\n\t\tfig_height: 5\n\t\tfig_width: 7\n\t\tout_width: "70%"\n\tbeamer_presentation: default\n\tslidy_presentation: default\n---\n\n```{r setup, include=FALSE}\nlibrary(tidyverse)\nlibrary(dslabs)\nlibrary(gridExtra)\nlibrary(ggthemes)\nds_theme_set()\noptions(digits = 3)\nknitr::opts_chunk$set(\n\tcomment = "#>",\n\tcollapse = TRUE,\n\tcache = TRUE,\n\tout.width = "70%",\n\tfig.align = "center",\n\tfig.width = 6,\n\tfig.asp = 0.618, # 1 / phi\n\tfig.show = "hold"\n)\n\nimg_path <- "img"\n```'
start <- '---\ntitle: "LECTURETITLE"\nauthor: "THEAUTHORNAME"\ndate: "`r lubridate::today()`"\noutput:\n ioslides_presentation:\n fig_caption: no\n fig_height: 5\n fig_width: 7\n out_width: "70%"\n beamer_presentation: default\n slidy_presentation: default\n---\n\n```{r setup, include=FALSE}\nlibrary(tidyverse)\nlibrary(dslabs)\nlibrary(gridExtra)\nlibrary(ggthemes)\nds_theme_set()\noptions(digits = 3)\nknitr::opts_chunk$set(\n comment = "#>",\n collapse = TRUE,\n cache = TRUE,\n out.width = "70%",\n fig.align = "center",\n fig.width = 6,\n fig.asp = 0.618, # 1 / phi\n fig.show = "hold"\n)\n\nimg_path <- "img"\n```'
start <- str_replace(start, "LECTURETITLE", title)
start <- str_replace(start, "THEAUTHORNAME", author)

Expand All @@ -167,109 +221,150 @@ chapter_to_slides <- function(input,
if(any(line_type=="exercise_start")) cat("", file = exercise_file_name)
exercise_flag <- FALSE

table_flag <- FALSE
table_start <- TRUE

## make last value a new lines
x[length(x)+1] <- "\n"
line_type[length(x)] <- "last_line"

## initialize values
chars <- 0
lines <- 0
page <- 0
## start going line by line
for(i in seq_along(x)){
if(verbose) cat("Page: ", page, ", Line: ", lines, ", Type: ", line_type[i],
", Section: ", the_section, "\n")
## if line is start of section, start section and initialize counts
## and turn of exercise flag (if previously true, exercise section has ended)
if(line_type[i]=="section"){
the_section <- x[i]
exercise_flag <- FALSE
chars <- 0
lines <- 0
cat("\n", x[i], "\n\n", sep = "", file = file_name, append = TRUE)
start_section()
} else{
## if exercise start, turn on flag and start just printing out exercises to
## new file
if(line_type[i]=="exercise_start" | exercise_flag){
exercise_flag <- TRUE
cat(x[i], "\n", file = exercise_file_name, append = TRUE)
} else{
## if its a quote add to slides
if(line_type[i] == "quote"){
chars <- chars + nchar(x[i])
lines <- lines + ceiling(chars/chars.per.line) + 1
cat(x[i], "\n\n", file = file_name, append = TRUE)
if(line_type[i]=="table" | table_flag){
if(table_start){
cat("\n", x[i], "\n", sep="", file = file_name, append = TRUE)
table_start <- FALSE
lines <- lines + 3
} else{
if(str_detect(str_trim(x[i+1]), "\\|")){ ##check if next line is table
cat(x[i], "\n", file = file_name, append = TRUE)
lines <- lines + 2
} else{ ##if next line not table, it's the end
cat(x[i], "\n\n", file = file_name, append = TRUE)
table_flag <- FALSE
table_start <- TRUE
lines <- lines + 3
}
}
} else{
## R chunks that are not plots are just added to output
if(line_type[i] %in% c("rchunk_end", "rchunk_inside","rchunk_start",
"plot_rchunk_end", "plot_rchunk_inside")){
if(str_detect(line_type[i], "inside")) lines <- lines + 1
cat(x[i], "\n", file = file_name, append = TRUE)
## if its a quote add to slides
if(line_type[i] == "quote"){
lines <- lines + ceiling(nchar(x[i])/chars.per.line) + 1
cat(x[i], "\n\n", file = file_name, append = TRUE)
} else{
## If r chunk includes a plot we will add it twice
## one with eval=FALSE and once with echo=FALSE
## unless the code already specifies it's echo or eval
if(line_type[i] == "plot_rchunk_start"){
## if echo nor eval are defined
## we include the code twice, first with eval=FALSE,
## which is what the while lopp does,
## then after the while loop it adds a sectio header,
##the first line with echo=FALSE, and in the next
## iteration of the i for loop will continue adding the lines
## to see why, look at the previous if statement
if(!str_detect(x[i], "echo|eval")){
y <- str_replace(x[i], "\\}", ", eval=FALSE}")
cat(y, "\n", file = file_name, append = TRUE)
j <- i
while(line_type[j]!="plot_rchunk_end"){
j <- j + 1
lines <- lines + 1
cat(x[j], "\n", file = file_name, append = TRUE)
}
lines <- lines + 1
cat("\n", the_section, "\n", sep = "",
file = file_name, append = TRUE)
y <- str_replace(x[i], "\\}", ", echo=FALSE}")
cat(y, "\n", file = file_name, append = TRUE)
} else{
cat(x[i], "\n", file = file_name, append = TRUE)
## R chunks that are not plots are just added to output
if(line_type[i] %in% c("rchunk_end", "rchunk_inside","rchunk_start",
"plot_rchunk_end", "plot_rchunk_inside",
"latex_inside")){
if(str_detect(line_type[i], "inside")) lines <- lines + 1
if(line_type[i] == "latex_inside") lines <- lines + 1 ## add one more for latex
cat(x[i], "\n", file = file_name, append = TRUE)
if(line_type[i] == "rchunk_start"){
if(lines + rchunk_size[i] > max.lines) start_section()
}
if(line_type[i] == "plot_rchunk_end") start_section()
} else{
## if we entry is a sentnce, we will split by periods
## and put each sentences as a bullet point
## the first three lines are two avoid spliting
## decimals, and abberviated titles.. might need more
## the trick is to covert points to commans, then convert back
## after the split
if(line_type[i] == "prose"){
x[i] <- str_trim(x[i])
x[i] <- str_replace_all(x[i], "(\\d)\\.(\\d)", "\\1,\\2")
x[i] <- str_replace_all(x[i], "(Mr|Ms|Dr)\\.", "\\1,")

y <- str_split(x[i], "\\.\\s+")[[1]]

for(j in seq_along(y)){
## convert back to periods
y[j] <- str_replace_all(y[j], "(\\d),(\\d)", "\\1.\\2")
y[j] <- str_replace_all(y[j], "(Mr|Ms|Dr),", "\\1.")
y[j] <- str_trim(y[j])
## If r chunk includes a plot we will add it twice
## one with eval=FALSE and once with echo=FALSE
## unless the code already specifies it's echo or eval
if(line_type[i] == "plot_rchunk_start"){
## if echo nor eval are defined
## we include the code twice, first with eval=FALSE,
## which is what the while lopp does,
## then after the while loop it adds a sectio header,
##the first line with echo=FALSE, and in the next
## iteration of the i for loop will continue adding the lines
## to see why, look at the previous if statement
if(!str_detect(x[i], "echo|eval")){
y <- str_replace(x[i], "\\}", ", eval=FALSE}")
cat(y, "\n", file = file_name, append = TRUE)
j <- i
while(line_type[j]!="plot_rchunk_end"){
j <- j + 1
cat(x[j], "\n", file = file_name, append = TRUE)
}
start_section()
y <- str_replace(x[i], "\\}", ", echo=FALSE}")
cat(y, "\n", file = file_name, append = TRUE)
} else{
if(lines>2) start_section()
cat(x[i], "\n", file = file_name, append = TRUE)
}
} else{
## if we entry is a sentnce, we will split by periods
## and put each sentences as a bullet point
## the first three lines are two avoid spliting
## decimals, and abberviated titles.. might need more
## the trick is to covert points to commans, then convert back
## after the split
if(line_type[i] == "prose"){
x[i] <- str_trim(x[i])
x[i] <- str_replace_all(x[i], "(\\d)\\.(\\d)", "\\1,\\2")
x[i] <- str_replace_all(x[i], "(Mr|Ms|Dr)\\.", "\\1,")

chars <- chars + nchar(y[j])
lines <- lines + ceiling(chars/chars.per.line) + 1
y <- str_split(x[i], "\\.\\s+")[[1]]

## if we have gone past max lines start a new section
if(lines > max.lines){
cat("\n\n", the_section, "\n\n", sep = "",
for(j in seq_along(y)){
## convert back to periods
y[j] <- str_replace_all(y[j], "(\\d),(\\d)", "\\1.\\2")
y[j] <- str_replace_all(y[j], "(Mr|Ms|Dr),", "\\1.")
y[j] <- str_trim(y[j])

lines <- lines + ceiling(nchar(y[j])/chars.per.line) + 1

## if we have gone past max lines start a new section
if(lines > max.lines){
start_section(ceiling(nchar(y[j])/chars.per.line) + 1)
}
## add a period at end of bullet point unless we already have
## punctuation
if(!str_sub(y[j], nchar(y[j]), nchar(y[j])) %in% c(".","?",":",",")){
y[j] <- y[j] <- str_c(y[j],".")
}
cat("- ", y[j], "\n\n", sep = "",
file = file_name, append = TRUE)
lines <- 0
chars <- 0
}
## add a period at end of bullet point unless we already have
## punctuation
if(!str_sub(y[j], nchar(y[j]), nchar(y[j])) %in% c(".","?",":",",")){
y[j] <- y[j] <- str_c(y[j],".")
}
cat("- ", y[j], "\n\n", sep = "",
file = file_name, append = TRUE)
} else{
if(line_type[i] == "latex_start"){
if(lines + latex_size[i]*2 > max.lines) start_section()
cat("\n", x[i], "\n", sep = "", file = file_name, append = TRUE)
lines <- lines + 1
} else{
if(line_type[i] == "latex_end"){
cat(x[i], "\n\n", sep = "", file = file_name, append = TRUE)
lines <- lines + 1
} else{
if(line_type[i] == "latex_start_and_end"){
cat("\n", x[i], "\n\n", sep = "", file = file_name, append = TRUE)
lines <- lines + 3
}
}
}
}
}
}
}
}
}
}
}
}
}
}

0 comments on commit 06bb5a0

Please sign in to comment.