# HG changeset patch # User mingchen0919 # Date 1502212481 14400 # Node ID 963905bcb7548d580d1484ccc3d5b85fba312a96 planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_mirdeep2 commit 29e8b40899c71ca12fd07b2bb530b0ee65037588-dirty diff -r 000000000000 -r 963905bcb754 mirdeep2.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2.Rmd Tue Aug 08 13:14:41 2017 -0400 @@ -0,0 +1,146 @@ +--- +title: 'Mirdeep2' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO +) +``` + +# Job command line + +```{r 'build PATH', echo=FALSE} +# set PATH environment. +PATH = system('pwd', intern = TRUE) %>% + (function(x) { + paste0('/home/galaxy/mirdeep2/bin:', x) + }) %>% + (function(x) { + paste0('/home/galaxy/mirdeep2_patch:', x) + }) %>% + paste0(':$PATH') +``` + +## Build job command line. + +```{r 'build command line'} +# change directory to OUTPUT DIR +command_line = paste0('cd OUTPUT_DIR &&') %>% + # PATH and other environment variables + paste0('export PATH=', PATH, ' && ') %>% + (function(x) { + paste0(x, 'export PERL_MB_OPT="--install_base /home/galaxy/perl5" && + export PERL_MM_OPT="INSTALL_BASE=/home/galaxy/perl5" && + export PERL5LIB=/home/galaxy/mirdeep2/lib/perl5 && ') + }) %>% + # link collapsed reads and reads mapping files + (function(x) { + paste0(x, 'ln -s COLLAPSED_READS reads_collapsed.fa && ') + }) %>% + (function(x) { + paste0(x, 'ln -s READS_MAPPING reads_collapsed_vs_genome.arf && ') + }) %>% + paste0(' miRDeep2.pl ') %>% + # collapsed reads + (function(x) { + paste0(x, ' reads_collapsed.fa ') + }) %>% + # reference genome + (function(x) { + paste0(x, ' REFERENCE_GENOME ') + }) %>% + # reads mapping + (function(x) { + paste0(x, ' reads_collapsed_vs_genome.arf ') + }) %>% + # mature miRNA for this species + (function(x) { + if('SPECIES_MATURE_MIRNA' == 'None') { + paste0(x, tolower('SPECIES_MATURE_MIRNA'), ' ') + } else { + paste0(x, ' SPECIES_MATURE_MIRNA ') + } + }) %>% + # mature miRNA from related species + (function(x) { + if('SPECIES_RELATED_MATURE_MIRNA' == 'None') { + paste0(x, tolower('SPECIES_RELATED_MATURE_MIRNA'), ' ') + } else { + paste0(x, 'SPECIES_RELATED_MATURE_MIRNA ') + } + }) %>% + # precursor sequences + (function(x) { + if('PRECURSOR_SEQUENCES' == 'None') { + paste0(x, tolower('PRECURSOR_SEQUENCES'), ' ') + } else { + paste0(x, 'PRECURSOR_SEQUENCES ') + } + }) %>% + # min read stack height + (function(x) { + ifelse('MIN_READ_STACK_HEIGHT' == 'TRUE', x, paste0(x, ' -a MIN_READ_STACK_HEIGHT ')) + }) %>% + # min score cutoff + (function(x) { + paste0(x, '-b MIN_SCORE_CUTOFF ') + }) %>% + # disable randfold analysis + (function(x) { + ifelse(RANDFOLD_ANALYSIS, paste0(x, '-c '), x) + }) %>% + # max precursors number + (function(x) { + paste0(x, ' -g MAX_PRECURSOR_NUMBER ') + }) %>% + # species + (function(x) { + ifelse('SPECIES' == 'all', x, paste0(x, ' -t SPECIES ')) + }) %>% + # switch + (function(x) { + ifelse(SWITCH, x, paste0(x, ' -P ')) + }) %>% + # write stdout to reprot.log + (function(x) { + paste0(x, ' >report.log 2>&1 ') + }) + +command_line + +## run job +system(command_line) +``` + +## Results + +```{r echo=TRUE} +system('cp OUTPUT_DIR/result*.html result.html') +system('cp OUTPUT_DIR/result*.csv result.csv') +system('cp OUTPUT_DIR/report.log report.log') +system('mv OUTPUT_DIR/pdfs_* OUTPUT_DIR/pdfs') + +## check if OUTPUT DIR has results that we want. +system('ls OUTPUT_DIR', intern = TRUE) +``` + +### PDF files + +```{r echo=TRUE} +pdf_report_list = list() +pdf_files = list.files('OUTPUT_DIR/pdfs', pattern = '.*pdf') +pdf_files +for (i in pdf_files) { + # note that the root directory is OUTPUT_DIR, all the file links should be a relative path to the root directory! + pdf_report_list[[i]] = tags$li(tags$a(href=paste0('pdfs/', i), i)) +} +tags$ul(pdf_report_list) +``` + diff -r 000000000000 -r 963905bcb754 mirdeep2.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2.xml Tue Aug 08 13:14:41 2017 -0400 @@ -0,0 +1,181 @@ + + + mirdeep2 + r-getopt + r-rmarkdown + r-plyr + r-dplyr + r-htmltools + + + miRNA identification + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @article{friedlander2011mirdeep2, + title={miRDeep2 accurately identifies known and hundreds of novel microRNA genes in seven animal clades}, + author={Friedl{\"a}nder, Marc R and Mackowiak, Sebastian D and Li, Na and Chen, Wei and Rajewsky, Nikolaus}, + journal={Nucleic acids research}, + volume={40}, + number={1}, + pages={37--52}, + year={2011}, + publisher={Oxford University Press} + } + + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + + + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + + + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + + + \ No newline at end of file diff -r 000000000000 -r 963905bcb754 mirdeep2_mapper.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2_mapper.Rmd Tue Aug 08 13:14:41 2017 -0400 @@ -0,0 +1,96 @@ +--- +title: 'Mirdeep2 Mapper' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO +) +``` + +# Job command line + +## View arguments from command line. + +```{r 'view arguments'} +str(opt) +``` + +```{r 'build PATH', echo=FALSE} +# set PATH environment. +PATH = system('pwd', intern = TRUE) %>% + (function(x) { + paste0('/home/galaxy/mirdeep2/bin:', x) + }) %>% + paste0(':$PATH') +``` + + +## Build job command line. + +```{r 'build command line'} +command_line = paste0('export PATH=', PATH, ' && ') %>% + # index reference genome + paste0(' bowtie-build REFERENCE_GENOME ref_genome &&') %>% + paste0(' mapper.pl ') %>% + # reads + (function(x) { + paste0(x, 'FASTQ_READS -c ') + }) %>% + # reference genome + (function(x) { + paste0(x, '-p ref_genome ') + }) %>% + # # parse to fasta + # (function(x) { + # ifelse(PARSE_TO_FASTA, paste0(x, '-h '), x) + # }) %>% + # clean entries + (function(x) { + ifelse(CLEAN_ENTRIES, paste0(x, '-j '), x) + }) %>% + # clip 3 adapter + (function(x) { + ifelse('CLIP_3_ADAPTER' == '', x, paste0(x, '-k CLIP_3_ADAPTER ')) + }) %>% + # discard shorter reads + (function(x) { + paste0(x, '-l DISCARD_SHORTER_READS ') + }) %>% + # collapse reads + (function(x) { + # ifelse(COLLAPSE_READS, paste0(x, '-m '), x) + paste0(x, '-m ') + }) %>% + # map with one mismatch + (function(x) { + ifelse(MAP_WITH_ONE_MISMATCH, paste0(x, '-q '), x) + }) %>% + # map up to position + (function(x) { + paste0(x, '-r MAP_UP_TO_POSITION ') + }) %>% + # overwrite existing files(-n), outputs + (function(x) { + paste0(x, '-s reads_collapsed.fa -t reads_collapsed_vs_genome.arf -v -n ') + }) %>% + # write stdout to reprot.log + (function(x) { + paste0(x, ' 2>report.log') + }) +command_line +``` + +## Run command line + +```{r} +system(command_line) +``` + + diff -r 000000000000 -r 963905bcb754 mirdeep2_mapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2_mapper.xml Tue Aug 08 13:14:41 2017 -0400 @@ -0,0 +1,135 @@ + + + mirdeep2 + r-getopt + r-rmarkdown + r-plyr + r-dplyr + r-htmltools + + + Mapping reads to genome + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @article{friedlander2011mirdeep2, + title={miRDeep2 accurately identifies known and hundreds of novel microRNA genes in seven animal clades}, + author={Friedl{\"a}nder, Marc R and Mackowiak, Sebastian D and Li, Na and Chen, Wei and Rajewsky, Nikolaus}, + journal={Nucleic acids research}, + volume={40}, + number={1}, + pages={37--52}, + year={2011}, + publisher={Oxford University Press} + } + + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + + + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + + + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + + + \ No newline at end of file diff -r 000000000000 -r 963905bcb754 mirdeep2_mapper_render.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2_mapper_render.R Tue Aug 08 13:14:41 2017 -0400 @@ -0,0 +1,153 @@ +##======= Handle arguments from command line ======== +# setup R error handline to go to stderr +options(show.error.messages=FALSE, + error=function(){ + cat(geterrmessage(), file=stderr()) + quit("no", 1, F) + }) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +# suppress warning +options(warn = -1) + +options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) +args = commandArgs(trailingOnly=TRUE) + +suppressPackageStartupMessages({ + library(getopt) + library(tools) +}) + +# column 1: the long flag name +# column 2: the short flag alias. A SINGLE character string +# column 3: argument mask +# 0: no argument +# 1: argument required +# 2: argument is optional +# column 4: date type to which the flag's argument shall be cast. +# possible values: logical, integer, double, complex, character. +spec_list=list() + +##------- 1. input data --------------------- +spec_list$FASTQ_READS = c('reads', 'r', '1', 'character') +spec_list$REFERENCE_GENOME = c('reference_genome', 'g', 1, 'character') +spec_list$ECHO = c('echo', 'e', '1', 'character') + +##--------2. output report and report site directory -------------- +spec_list$OUTPUT_HTML = c('mirdeep2_mapper', 'o', '1', 'character') +spec_list$OUTPUT_DIR = c('mirdeep2_mapper_dir', 'd', '1', 'character') +spec_list$READS_COLLAPSED = c('reads_collapsed', 'm', '1', 'character') +spec_list$READ_X_COLLAPSED_VS_GENOME = c('reads_collapsed_vs_genome', 'n', '1', 'character') +spec_list$REPORT_LOG = c("report_log", 't', '1', 'character') + +##---------other parameters--------------- +spec_list$PARSE_TO_FASTA = c('parse_to_fasta', 'b', '1', 'character') +spec_list$CLEAN_ENTRIES = c('clean_entries', 'c', '1', 'character') +spec_list$CLIP_3_ADAPTER = c('clip_3_adapter', 'f', '2', 'character') +spec_list$DISCARD_SHORTER_READS = c('discard_shorter_reads', 'h', '1', 'character') +spec_list$COLLAPSE_READS_OR_NOT = c('collapse_reads_or_not', 'j', '1', 'character') +spec_list$MAP_WITH_ONE_MISMATCH = c('map_with_one_mismatch', 'k', '1', 'character') +spec_list$MAP_UP_TO_POSITION = c('map_up_to_position', 'l', '1', 'character') + +##--------3. Rmd templates sitting in the tool directory ---------- + +## _site.yml and index.Rmd files +spec_list$SITE_YML = c('site_yml', 's', 1, 'character') +spec_list$INDEX_Rmd = c('index_rmd', 'i', 1, 'character') + +## other Rmd body template files +spec_list$MIRDEEP2_MAPPER_RMD = c('mirdeep2_mapper_rmd', 'p', '1', 'character') + + + +##------------------------------------------------------------------ + +spec = t(as.data.frame(spec_list)) +opt = getopt(spec) +# arguments are accessed by long flag name (the first column in the spec matrix) +# NOT by element name in the spec_list +# example: opt$help, opt$expression_file +##====== End of arguments handling ========== + +#------ Load libraries --------- +library(rmarkdown) +library(plyr) +# library(stringr) +library(dplyr) +# library(highcharter) +# library(DT) +# library(reshape2) +# library(plotly) +# library(formattable) +library(htmltools) + + +#----- 1. create the report directory ------------------------ +paste0('mkdir -p ', opt$mirdeep2_mapper_dir) %>% + system() + +#----- 2. generate Rmd files with Rmd templates -------------- +# a. templates without placeholder variables: +# copy templates from tool directory to the working directory. +# b. templates with placeholder variables: +# substitute variables with user input values and place them in the working directory. + + +#----- mirdeep2_mapper.Rmd ----------------------- +readLines(opt$mirdeep2_mapper_rmd) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('FASTQ_READS', opt$reads, x) + }) %>% + (function(x) { + gsub('REFERENCE_GENOME', opt$reference_genome, x) + }) %>% + (function(x) { + gsub('PARSE_TO_FASTA', opt$parse_to_fasta, x) + }) %>% + (function(x) { + gsub('CLEAN_ENTRIES', opt$clean_entries, x) + }) %>% + (function(x) { + gsub('CLIP_3_ADAPTER', opt$clip_3_adapter, x) + }) %>% + (function(x) { + gsub('DISCARD_SHORTER_READS', opt$discard_shorter_reads, x) + }) %>% + (function(x) { + gsub('COLLAPSE_READS', opt$collapse_reads, x) + }) %>% + (function(x) { + gsub('MAP_WITH_ONE_MISMATCH', opt$map_with_one_mismatch, x) + }) %>% + (function(x) { + gsub('MAP_UP_TO_POSITION', opt$map_up_to_position, x) + }) %>% + (function(x) { + gsub('OUTPUT_DIR', opt$mirdeep2_mapper_dir, x) + }) %>% + (function(x) { + fileConn = file('mirdeep2_mapper.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + +#------ 3. render all Rmd files with render() -------- +render('mirdeep2_mapper.Rmd', output_file = opt$mirdeep2_mapper) + + +#-------4. manipulate outputs ----------------------------- +# a. copy non-site files +file.copy('reads_collapsed.fa', opt$reads_collapsed, recursive=TRUE) +file.copy('reads_collapsed_vs_genome.arf', opt$reads_collapsed_vs_genome, recursive=TRUE) +file.copy('report.log', opt$report_log, recursive=TRUE) + + + + + diff -r 000000000000 -r 963905bcb754 mirdeep2_render.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2_render.R Tue Aug 08 13:14:41 2017 -0400 @@ -0,0 +1,164 @@ +##======= Handle arguments from command line ======== +# setup R error handline to go to stderr +options(show.error.messages=FALSE, +error=function(){ + cat(geterrmessage(), file=stderr()) + quit("no", 1, F) +}) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +# suppress warning +options(warn = -1) + +options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) +args = commandArgs(trailingOnly=TRUE) + +suppressPackageStartupMessages({ + library(getopt) + library(tools) +}) + +# column 1: the long flag name +# column 2: the short flag alias. A SINGLE character string +# column 3: argument mask +# 0: no argument +# 1: argument required +# 2: argument is optional +# column 4: date type to which the flag's argument shall be cast. +# possible values: logical, integer, double, complex, character. +spec_list=list() + +##------- 1. input data --------------------- +spec_list$ECHO = c('echo', 'e', '1', 'character') +spec_list$COLLASPED_READS = c('collapsed_reads', 'a', '1', 'character') +spec_list$REFERENCE_GENOME = c('reference_genome', 'b', '1', 'character') +spec_list$READS_MAPPING = c('reads_mapping', 'c', '1', 'character') + +##--------2. output report and report site directory -------------- +spec_list$OUTPUT_HTML = c('mirdeep2_html', 'o', '1', 'character') +spec_list$OUTPUT_DIR = c('mirdeep2_output_dir', 'd', '1', 'character') +spec_list$CSV_RESULT = c('csv_result', 'r', '1', 'character') +spec_list$HTML_RESULT = c('html_result', 't', '1', 'character') +spec_list$REPORT_LOG = c('report_log', 'u', '1', 'character') + +##---------other parameters--------- +spec_list$SPECIES_MATURE_MIRNA = c('species_mature_mirna', 'f', '2', 'character') +spec_list$SPECIES_RELATED_MATURE_MIRRNA = c('related_species_mature_mirna', 'g', '2', 'character') +spec_list$PRECURSOR_SEQUENCES = c('precursor_sequences', 'h', '2', 'character') +spec_list$MIN_READ_STACK_HEIGHT = c('min_read_stack_height', 'j', '2', 'character') +spec_list$MIN_SCORE_CUTOFF = c('min_score_cutoff', 'k', '2', 'character') +spec_list$RANDFOLD_ANALYSIS = c('randfold_analysis', 'l', '2', 'character') +spec_list$MAX_PRECURSOR_NUMBER = c('max_precursor_number', 'm', '2', 'character') +spec_list$SPECIES = c('species', 'n', '2', 'character') +spec_list$SWITCH = c('switch', 'q', '2', 'character') + +##--------3. Rmd templates sitting in the tool directory ---------- + +## _site.yml and index.Rmd files +spec_list$SITE_YML = c('site_yml', 's', 1, 'character') +spec_list$INDEX_Rmd = c('index_rmd', 'i', 1, 'character') + +## other Rmd body template files +spec_list$MIRDEEP2_RMD = c('mirdeep2_rmd', 'p', '1', 'character') + + + +##------------------------------------------------------------------ + +spec = t(as.data.frame(spec_list)) +opt = getopt(spec) +# arguments are accessed by long flag name (the first column in the spec matrix) +# NOT by element name in the spec_list +# example: opt$help, opt$expression_file +##====== End of arguments handling ========== + +#------ Load libraries --------- +library(rmarkdown) +library(plyr) +# library(stringr) +library(dplyr) +# library(highcharter) +# library(DT) +# library(reshape2) +# library(plotly) +# library(formattable) +library(htmltools) + + +#----- 1. create the report directory ------------------------ +paste0('mkdir -p ', opt$mirdeep2_output_dir) %>% +system() + +#----- 2. generate Rmd files with Rmd templates -------------- +# a. templates without placeholder variables: +# copy templates from tool directory to the working directory. +# b. templates with placeholder variables: +# substitute variables with user input values and place them in the working directory. + + +#----- mirdeep2.Rmd ----------------------- +readLines(opt$mirdeep2_rmd) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('COLLAPSED_READS', opt$collapsed_reads, x) + }) %>% + (function(x) { + gsub('REFERENCE_GENOME', opt$reference_genome, x) + }) %>% + (function(x) { + gsub('READS_MAPPING', opt$reads_mapping, x) + }) %>% + (function(x) { + gsub('SPECIES_MATURE_MIRNA', opt$species_mature_mirna, x) + }) %>% + (function(x) { + gsub('SPECIES_RELATED_MATURE_MIRNA', opt$related_species_mature_mirna, x) + }) %>% + (function(x) { + gsub('PRECURSOR_SEQUENCES', opt$precursor_sequences, x) + }) %>% + (function(x) { + gsub('MIN_READ_STACK_HEIGHT', opt$min_read_stack_height, x) + }) %>% + (function(x) { + gsub('MIN_SCORE_CUTOFF', opt$min_score_cutoff, x) + }) %>% + (function(x) { + gsub('RANDFOLD_ANALYSIS', opt$randfold_analysis, x) + }) %>% + (function(x) { + gsub('MAX_PRECURSOR_NUMBER', opt$max_precursor_number, x) + }) %>% + (function(x) { + gsub('SPECIES', opt$species, x) + }) %>% + (function(x) { + gsub('SWITCH', opt$switch, x) + }) %>% + (function(x) { + gsub('OUTPUT_DIR', opt$mirdeep2_output_dir, x) + }) %>% + (function(x) { + fileConn = file('mirdeep2.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + +#------ 3. render all Rmd files with render() -------- +render('mirdeep2.Rmd',output_file = opt$mirdeep2_html) + + +#-------4. manipulate outputs ----------------------------- +# a. copy non-site files +file.copy('result.csv', opt$csv_result, recursive=TRUE) +file.copy('result.html', opt$html_result, recursive=TRUE) +file.copy('report.log', opt$report_log, recursive=TRUE) + + + +