Several improvements

jsgro · Oct 11, 2022 · 06bb5a0 · 06bb5a0
1 parent 0201fc5
commit 06bb5a0
Showing 1 changed file with 177 additions and 82 deletions.
diff --git a/chapter_to_slides.R b/chapter_to_slides.R
@@ -8,10 +8,14 @@
 #' after predetermined number o lines or characters is reached. If a section has titled exercise the section is 
 #' saved in a separate file and omitted from slides. The R chunks that do not include plotting functions
 #' are preserved. Chunks that plot and do not include either eval=FALSE or echo=FALSE are copied twice 
-#' so that code shows in one page and the plot in another. 
+#' so that code shows in one page and the plot in another.
 #' 
 #' Note that the yml header is hard wired. If you want to change, edit the line where `start` is defined.
 #' 
+#' @details The output is should be checked before kniting as the output is rarely perfect. In particular,
+#' think of the R output that will be generated by the R chunks as these are not seen and therefore not
+#' counted when deciding when to start a new page. 
+#' 
 #' @author Rafael A. Irizarry
 #' 
 #' @param input Input Rmd file to be converted.
@@ -23,32 +27,40 @@
 #' @param max.lines Number of lines per slide
 #' @param chars.per.line Number of characters that define a line.
 #' @param max.section.title.length If the section number is bigger than this it gets cut-off. Defaults to infinity.
+#' @param verbose If TRUE show information about line being processed.
 #' 
 #' @return A data frame with counts for each group for each date with population sizes, if demo was provided.
 #' 
 #' @examples
 #' \dontrun{
-#' rmd_to_slides("inference/models", "../lectures/inference/models")
+#' rmd_to_slides("dsbood/inference/models.Rmd", "lectures/inference/models")
 #' }
 #' @export
 #' @import stringr
 
-
 chapter_to_slides <- function(input, 
                           output = "tmp", 
                           output.exercises = NULL,
                           suffix = "Rmd",
                           title = NULL,
                           author = "Rafael A. Irizarry",
-                          max.lines = 10,
+                          max.lines = 18,
                           chars.per.line = 60,
-                          max.section.title.length = NULL){
+                          max.section.title.length = NULL,
+                          verbose=FALSE){
   ## Version: 0.0.1
   ## License: Artistic-2.0
   ## Author: Rafael A. Irizarry
 
   library(stringr)
 
+  start_section <- function(start.lines = 0, env = parent.frame()){
+    cat("\n\n", env$the_section, "\n\n", sep = "", 
+        file = env$file_name, append = TRUE)
+    env$lines <- start.lines
+    env$page <- env$page + 1
+  }
+
   ## define filename for extracted exercises if not provided
   if(is.null(output.exercises)){
     output.exercises <- paste0(output, "-exercises")
@@ -60,7 +72,7 @@ chapter_to_slides <- function(input,
 
   ## define title if not provided
   if(is.null(title)){
-    title <- str_replace_all(basename(input), "-", " ") |> str_to_title() 
+    title <- str_replace_all(basename(output), "-", " ") |> str_to_title() 
   }
 
   if(is.null(max.section.title.length)) max.section.title.length <- -1L
@@ -90,8 +102,8 @@ chapter_to_slides <- function(input,
         if(str_detect(x[j], "\\{")) ends <- ends - 1
       }
     }    
-  }
   x <- x[-out]
+  }
 
   ##line_type will store, for each line of x, what kind of line it is
   ##options are 
@@ -101,6 +113,9 @@ chapter_to_slides <- function(input,
   ## exercise_start
   ## plot rchunk start inside or end
   ## quotes
+  ## latex start inside or end
+  ## table
+  ## last line
   line_type <- rep("prose", length(x))
 
   ## find section starts
@@ -119,13 +134,48 @@ chapter_to_slides <- function(input,
   exercise_starts <- str_which(x, "## [Ee]xercise")
   line_type[exercise_starts] <- "exercise_start"
 
+  ## find the latex start and ends
+  latex_inds <- str_which(x, "\\$\\$")
+  if(length(latex_inds) %% 2 > 0){
+    warning("Detected unclosed latex. Check output carefully.")
+    latex_inds <- c(latex_inds, last(latex_inds))
+  }
+  latex_start <-latex_inds[seq(1,length(latex_inds),2)]
+  latex_end <- latex_inds[seq(2,length(latex_inds),2)]
+  latex_size <- rep(0, length(x)) ## used to decide if start new section
+  latex_size[latex_start] <- pmax(1, latex_end - latex_start - 2)
+
+  ## if start and end in same line make it the end
+  line_type[latex_start] <- "latex_start"
+  line_type[latex_end] <- "latex_end"
+
+  ## find the insider of latex
+  for(i in seq_along(latex_start)){
+    st <- latex_start[i]
+    en <- latex_end[i]
+    if(st==en){ 
+      line_type[st] <- "latex_start_and_end" ## if onle line of latex use this
+    } else{
+      ind <- (st+1):(en-1)
+      if(length(ind)>0){
+        line_type[ind] <- "latex_inside"
+      }
+    }
+  }
+
+  ## find start of tables
+  line_type[str_detect(str_trim(x), "^\\|")] <- "table"
+
   ## find the rchunk start and ends
   rchunk_start <- str_which(x, "^```\\{r")
   rchunk_end <- str_which(x, "^```$")
 
   line_type[rchunk_start] <- "rchunk_start"
   line_type[rchunk_end] <- "rchunk_end"
 
+  rchunk_size <- rep(0, length(x)) ## used to decide if start new section
+  rchunk_size[rchunk_start] <- pmax(1, rchunk_end - rchunk_start - 2)
+
   ## Check if R chunk is a plot chunk and change if it is
   rchunk_inds <- cbind(rchunk_start, rchunk_end)
   plot_inds <- which(apply(rchunk_inds, 1, function(ind){
@@ -138,11 +188,15 @@ chapter_to_slides <- function(input,
   line_type[plot_rchunk_start] <- "plot_rchunk_start"
   line_type[plot_rchunk_end] <- "plot_rchunk_end"
 
+
   ## find quotes
   quote_index <- str_which(x, "^>>.*")
   line_type[quote_index] <- "quote"
 
   ## find the insider of r chunks
+  rchunk_size <- rep(0, length(x)) ## used to decide if start new section
+  rchunk_size[rchunk_start] <- rchunk_end - rchunk_start
+
   for(i in seq_along(rchunk_start)){
     ind <- (rchunk_start[i]+1):(rchunk_end[i]-1)
     if(length(ind)>0) line_type[ind] <- "rchunk_inside"
@@ -156,7 +210,7 @@ chapter_to_slides <- function(input,
 
   the_section <- ""
   ## the start is hard wired
-  start <- '---\ntitle: "LECTURETITLE"\nauthor: "THEAUTHORNAME"\ndate: "`r lubridate::today()`"\noutput:\n\tioslides_presentation:\n\t\tfig_caption: no\n\t\tfig_height: 5\n\t\tfig_width: 7\n\t\tout_width: "70%"\n\tbeamer_presentation: default\n\tslidy_presentation: default\n---\n\n```{r setup, include=FALSE}\nlibrary(tidyverse)\nlibrary(dslabs)\nlibrary(gridExtra)\nlibrary(ggthemes)\nds_theme_set()\noptions(digits = 3)\nknitr::opts_chunk$set(\n\tcomment = "#>",\n\tcollapse = TRUE,\n\tcache = TRUE,\n\tout.width = "70%",\n\tfig.align = "center",\n\tfig.width = 6,\n\tfig.asp = 0.618,  # 1 / phi\n\tfig.show = "hold"\n)\n\nimg_path <- "img"\n```'
+  start <- '---\ntitle: "LECTURETITLE"\nauthor: "THEAUTHORNAME"\ndate: "`r lubridate::today()`"\noutput:\n  ioslides_presentation:\n    fig_caption: no\n    fig_height: 5\n    fig_width: 7\n    out_width: "70%"\n  beamer_presentation: default\n  slidy_presentation: default\n---\n\n```{r setup, include=FALSE}\nlibrary(tidyverse)\nlibrary(dslabs)\nlibrary(gridExtra)\nlibrary(ggthemes)\nds_theme_set()\noptions(digits = 3)\nknitr::opts_chunk$set(\n  comment = "#>",\n  collapse = TRUE,\n  cache = TRUE,\n  out.width = "70%",\n  fig.align = "center",\n  fig.width = 6,\n  fig.asp = 0.618,  # 1 / phi\n  fig.show = "hold"\n)\n\nimg_path <- "img"\n```'
   start <- str_replace(start, "LECTURETITLE", title)
   start <- str_replace(start, "THEAUTHORNAME", author)
 
@@ -167,109 +221,150 @@ chapter_to_slides <- function(input,
   if(any(line_type=="exercise_start")) cat("", file = exercise_file_name)
   exercise_flag <- FALSE
 
+  table_flag <- FALSE
+  table_start <- TRUE
+
+  ## make last value a new lines
+  x[length(x)+1] <- "\n"
+  line_type[length(x)] <- "last_line"
+
   ## initialize values
   chars <- 0
   lines <- 0
+  page <- 0
   ## start going line by line
   for(i in seq_along(x)){
+    if(verbose) cat("Page: ", page, ", Line: ", lines, ", Type: ", line_type[i], 
+                    ", Section: ", the_section, "\n")
     ## if line is start of section, start section and initialize counts
     ## and turn of exercise flag (if previously true, exercise section has ended)
     if(line_type[i]=="section"){
       the_section <- x[i]
       exercise_flag <- FALSE
-      chars <- 0
-      lines <- 0
-      cat("\n", x[i], "\n\n", sep = "", file = file_name, append = TRUE)
+      start_section()
     } else{
       ## if exercise start, turn on flag and start just printing out exercises to 
       ## new file
       if(line_type[i]=="exercise_start" | exercise_flag){
         exercise_flag <- TRUE
         cat(x[i], "\n", file = exercise_file_name, append = TRUE)
       } else{
-        ## if its a quote add to slides
-        if(line_type[i] == "quote"){
-          chars <- chars + nchar(x[i])
-          lines <- lines + ceiling(chars/chars.per.line) + 1
-          cat(x[i], "\n\n", file = file_name, append = TRUE)
+        if(line_type[i]=="table" | table_flag){
+          if(table_start){ 
+            cat("\n", x[i], "\n", sep="", file = file_name, append = TRUE)
+            table_start <- FALSE
+            lines <- lines + 3
+          } else{
+            if(str_detect(str_trim(x[i+1]), "\\|")){ ##check if next line is table
+              cat(x[i], "\n", file = file_name, append = TRUE)
+              lines <- lines + 2
+            } else{ ##if next line not table, it's the end
+              cat(x[i], "\n\n", file = file_name, append = TRUE)
+              table_flag <- FALSE
+              table_start <- TRUE
+              lines <- lines + 3
+            }
+          }
         } else{
-          ## R chunks that are not plots are just added to output
-          if(line_type[i] %in% c("rchunk_end", "rchunk_inside","rchunk_start",
-                                 "plot_rchunk_end", "plot_rchunk_inside")){
-            if(str_detect(line_type[i], "inside")) lines <- lines + 1
-            cat(x[i], "\n", file = file_name, append = TRUE)
+          ## if its a quote add to slides
+          if(line_type[i] == "quote"){
+            lines <- lines + ceiling(nchar(x[i])/chars.per.line) + 1
+            cat(x[i], "\n\n", file = file_name, append = TRUE)
           } else{
-            ## If r chunk includes a plot we will add it twice
-            ## one with eval=FALSE and once with echo=FALSE
-            ## unless the code already specifies it's echo or eval
-            if(line_type[i] == "plot_rchunk_start"){
-              ## if echo nor eval are defined
-              ## we include the code twice, first with eval=FALSE,
-              ## which is what the while lopp does,
-              ## then after the while loop it adds a sectio header,
-              ##the first line with echo=FALSE, and in the next
-              ## iteration of the i for loop will continue adding the lines
-              ## to see why, look at the previous if statement
-              if(!str_detect(x[i], "echo|eval")){ 
-                y <- str_replace(x[i], "\\}", ", eval=FALSE}")
-                cat(y, "\n", file = file_name, append = TRUE)
-                j <- i
-                while(line_type[j]!="plot_rchunk_end"){
-                  j <- j + 1
-                  lines <- lines + 1
-                  cat(x[j], "\n", file = file_name, append = TRUE)
-                }
-                lines <- lines + 1
-                cat("\n", the_section, "\n", sep = "", 
-                    file = file_name, append = TRUE)
-                y <- str_replace(x[i], "\\}", ", echo=FALSE}")
-                cat(y, "\n", file = file_name, append = TRUE)
-              } else{
-                cat(x[i], "\n", file = file_name, append = TRUE)
+            ## R chunks that are not plots are just added to output
+            if(line_type[i] %in% c("rchunk_end", "rchunk_inside","rchunk_start",
+                                   "plot_rchunk_end", "plot_rchunk_inside", 
+                                   "latex_inside")){
+              if(str_detect(line_type[i], "inside")) lines <- lines + 1
+              if(line_type[i] == "latex_inside") lines <- lines + 1 ## add one more for latex
+              cat(x[i], "\n", file = file_name, append = TRUE)
+              if(line_type[i] == "rchunk_start"){
+                if(lines + rchunk_size[i] > max.lines) start_section()
               }
+              if(line_type[i] == "plot_rchunk_end") start_section()
             } else{
-              ## if we entry is a sentnce, we will split by periods
-              ## and put each sentences as a bullet point
-              ## the first three lines are two avoid spliting
-              ## decimals, and abberviated titles.. might need more
-              ## the trick is to covert points to commans, then convert back
-              ## after the split
-              if(line_type[i] == "prose"){
-                x[i] <- str_trim(x[i])
-                x[i] <- str_replace_all(x[i], "(\\d)\\.(\\d)", "\\1,\\2")
-                x[i] <- str_replace_all(x[i], "(Mr|Ms|Dr)\\.", "\\1,")
-
-                y <- str_split(x[i], "\\.\\s+")[[1]]
-
-                for(j in seq_along(y)){
-                  ## convert back to periods
-                  y[j] <- str_replace_all(y[j], "(\\d),(\\d)", "\\1.\\2")
-                  y[j] <- str_replace_all(y[j], "(Mr|Ms|Dr),", "\\1.")
-                  y[j] <- str_trim(y[j])
+              ## If r chunk includes a plot we will add it twice
+              ## one with eval=FALSE and once with echo=FALSE
+              ## unless the code already specifies it's echo or eval
+              if(line_type[i] == "plot_rchunk_start"){
+                ## if echo nor eval are defined
+                ## we include the code twice, first with eval=FALSE,
+                ## which is what the while lopp does,
+                ## then after the while loop it adds a sectio header,
+                ##the first line with echo=FALSE, and in the next
+                ## iteration of the i for loop will continue adding the lines
+                ## to see why, look at the previous if statement
+                if(!str_detect(x[i], "echo|eval")){ 
+                  y <- str_replace(x[i], "\\}", ", eval=FALSE}")
+                  cat(y, "\n", file = file_name, append = TRUE)
+                  j <- i
+                  while(line_type[j]!="plot_rchunk_end"){
+                    j <- j + 1
+                    cat(x[j], "\n", file = file_name, append = TRUE)
+                  }
+                  start_section()
+                  y <- str_replace(x[i], "\\}", ", echo=FALSE}")
+                  cat(y, "\n", file = file_name, append = TRUE)
+                } else{
+                  if(lines>2) start_section()
+                  cat(x[i], "\n", file = file_name, append = TRUE)
+                }
+              } else{
+                ## if we entry is a sentnce, we will split by periods
+                ## and put each sentences as a bullet point
+                ## the first three lines are two avoid spliting
+                ## decimals, and abberviated titles.. might need more
+                ## the trick is to covert points to commans, then convert back
+                ## after the split
+                if(line_type[i] == "prose"){
+                  x[i] <- str_trim(x[i])
+                  x[i] <- str_replace_all(x[i], "(\\d)\\.(\\d)", "\\1,\\2")
+                  x[i] <- str_replace_all(x[i], "(Mr|Ms|Dr)\\.", "\\1,")
 
-                  chars <- chars + nchar(y[j])
-                  lines <- lines + ceiling(chars/chars.per.line) + 1
+                  y <- str_split(x[i], "\\.\\s+")[[1]]
 
-                  ## if we have gone past max lines start a new section
-                  if(lines > max.lines){
-                    cat("\n\n", the_section, "\n\n", sep = "", 
+                  for(j in seq_along(y)){
+                    ## convert back to periods
+                    y[j] <- str_replace_all(y[j], "(\\d),(\\d)", "\\1.\\2")
+                    y[j] <- str_replace_all(y[j], "(Mr|Ms|Dr),", "\\1.")
+                    y[j] <- str_trim(y[j])
+
+                    lines <- lines + ceiling(nchar(y[j])/chars.per.line) + 1
+
+                    ## if we have gone past max lines start a new section
+                    if(lines > max.lines){
+                      start_section(ceiling(nchar(y[j])/chars.per.line) + 1)
+                    }
+                    ## add a period at end of bullet point unless we already have
+                    ## punctuation
+                    if(!str_sub(y[j], nchar(y[j]), nchar(y[j])) %in% c(".","?",":",",")){
+                      y[j] <- y[j] <- str_c(y[j],".")
+                    } 
+                    cat("- ", y[j], "\n\n", sep = "", 
                         file = file_name, append = TRUE)
-                    lines <- 0 
-                    chars <- 0
                   }
-                  ## add a period at end of bullet point unless we already have
-                  ## punctuation
-                  if(!str_sub(y[j], nchar(y[j]), nchar(y[j])) %in% c(".","?",":",",")){
-                    y[j] <- y[j] <- str_c(y[j],".")
-                  } 
-                  cat("- ", y[j], "\n\n", sep = "", 
-                      file = file_name, append = TRUE)
+                } else{
+                  if(line_type[i] == "latex_start"){
+                    if(lines + latex_size[i]*2 > max.lines) start_section()
+                    cat("\n", x[i], "\n", sep = "", file = file_name, append = TRUE)
+                    lines <- lines + 1 
+                  } else{
+                    if(line_type[i] == "latex_end"){
+                      cat(x[i], "\n\n", sep = "", file = file_name, append = TRUE)
+                      lines <- lines + 1 
+                    } else{
+                      if(line_type[i] == "latex_start_and_end"){
+                        cat("\n", x[i], "\n\n", sep = "", file = file_name, append = TRUE)
+                        lines <- lines + 3
+                      }
+                    }
+                  }
                 }
-              } 
+              }
             }
           }
         }
       }
     }
   }
-}
+}