Mercurial > repos > mingchen0919 > rmarkdown_mirdeep2
changeset 0:963905bcb754 draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_mirdeep2 commit 29e8b40899c71ca12fd07b2bb530b0ee65037588-dirty
author | mingchen0919 |
---|---|
date | Tue, 08 Aug 2017 13:14:41 -0400 |
parents | |
children | 75e53be98c51 |
files | mirdeep2.Rmd mirdeep2.xml mirdeep2_mapper.Rmd mirdeep2_mapper.xml mirdeep2_mapper_render.R mirdeep2_render.R |
diffstat | 6 files changed, 875 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2.Rmd Tue Aug 08 13:14:41 2017 -0400 @@ -0,0 +1,146 @@ +--- +title: 'Mirdeep2' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO +) +``` + +# Job command line + +```{r 'build PATH', echo=FALSE} +# set PATH environment. +PATH = system('pwd', intern = TRUE) %>% + (function(x) { + paste0('/home/galaxy/mirdeep2/bin:', x) + }) %>% + (function(x) { + paste0('/home/galaxy/mirdeep2_patch:', x) + }) %>% + paste0(':$PATH') +``` + +## Build job command line. + +```{r 'build command line'} +# change directory to OUTPUT DIR +command_line = paste0('cd OUTPUT_DIR &&') %>% + # PATH and other environment variables + paste0('export PATH=', PATH, ' && ') %>% + (function(x) { + paste0(x, 'export PERL_MB_OPT="--install_base /home/galaxy/perl5" && + export PERL_MM_OPT="INSTALL_BASE=/home/galaxy/perl5" && + export PERL5LIB=/home/galaxy/mirdeep2/lib/perl5 && ') + }) %>% + # link collapsed reads and reads mapping files + (function(x) { + paste0(x, 'ln -s COLLAPSED_READS reads_collapsed.fa && ') + }) %>% + (function(x) { + paste0(x, 'ln -s READS_MAPPING reads_collapsed_vs_genome.arf && ') + }) %>% + paste0(' miRDeep2.pl ') %>% + # collapsed reads + (function(x) { + paste0(x, ' reads_collapsed.fa ') + }) %>% + # reference genome + (function(x) { + paste0(x, ' REFERENCE_GENOME ') + }) %>% + # reads mapping + (function(x) { + paste0(x, ' reads_collapsed_vs_genome.arf ') + }) %>% + # mature miRNA for this species + (function(x) { + if('SPECIES_MATURE_MIRNA' == 'None') { + paste0(x, tolower('SPECIES_MATURE_MIRNA'), ' ') + } else { + paste0(x, ' SPECIES_MATURE_MIRNA ') + } + }) %>% + # mature miRNA from related species + (function(x) { + if('SPECIES_RELATED_MATURE_MIRNA' == 'None') { + paste0(x, tolower('SPECIES_RELATED_MATURE_MIRNA'), ' ') + } else { + paste0(x, 'SPECIES_RELATED_MATURE_MIRNA ') + } + }) %>% + # precursor sequences + (function(x) { + if('PRECURSOR_SEQUENCES' == 'None') { + paste0(x, tolower('PRECURSOR_SEQUENCES'), ' ') + } else { + paste0(x, 'PRECURSOR_SEQUENCES ') + } + }) %>% + # min read stack height + (function(x) { + ifelse('MIN_READ_STACK_HEIGHT' == 'TRUE', x, paste0(x, ' -a MIN_READ_STACK_HEIGHT ')) + }) %>% + # min score cutoff + (function(x) { + paste0(x, '-b MIN_SCORE_CUTOFF ') + }) %>% + # disable randfold analysis + (function(x) { + ifelse(RANDFOLD_ANALYSIS, paste0(x, '-c '), x) + }) %>% + # max precursors number + (function(x) { + paste0(x, ' -g MAX_PRECURSOR_NUMBER ') + }) %>% + # species + (function(x) { + ifelse('SPECIES' == 'all', x, paste0(x, ' -t SPECIES ')) + }) %>% + # switch + (function(x) { + ifelse(SWITCH, x, paste0(x, ' -P ')) + }) %>% + # write stdout to reprot.log + (function(x) { + paste0(x, ' >report.log 2>&1 ') + }) + +command_line + +## run job +system(command_line) +``` + +## Results + +```{r echo=TRUE} +system('cp OUTPUT_DIR/result*.html result.html') +system('cp OUTPUT_DIR/result*.csv result.csv') +system('cp OUTPUT_DIR/report.log report.log') +system('mv OUTPUT_DIR/pdfs_* OUTPUT_DIR/pdfs') + +## check if OUTPUT DIR has results that we want. +system('ls OUTPUT_DIR', intern = TRUE) +``` + +### PDF files + +```{r echo=TRUE} +pdf_report_list = list() +pdf_files = list.files('OUTPUT_DIR/pdfs', pattern = '.*pdf') +pdf_files +for (i in pdf_files) { + # note that the root directory is OUTPUT_DIR, all the file links should be a relative path to the root directory! + pdf_report_list[[i]] = tags$li(tags$a(href=paste0('pdfs/', i), i)) +} +tags$ul(pdf_report_list) +``` +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2.xml Tue Aug 08 13:14:41 2017 -0400 @@ -0,0 +1,181 @@ +<tool id="mirdeep2" name="mirdeep2" version="1.0.0"> + <requirements> + <requirement type="package" version="2.0.0.8">mirdeep2</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.2">r-rmarkdown</requirement> + <requirement type="package" version="1.8.4">r-plyr</requirement> + <requirement type="package" version="0.5.0">r-dplyr</requirement> + <requirement type="package" version="0.3.5">r-htmltools</requirement> + </requirements> + <description> + miRNA identification + </description> + <stdio> + <regex match="Execution halted" + source="both" + level="fatal" + description="Execution halted." /> + <regex match="Error in" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + <regex match="Fatal error" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + </stdio> + <command> + <![CDATA[ + ## Add tools to PATH + export PATH=/opt/R-3.2.5/bin:\$PATH && + + Rscript '${__tool_directory__}/mirdeep2_render.R' + + ## 1. input data + -e $echo + -a $collapsed_reads + -b $reference_genome + -c $reads_mapping + + ## 2. output report and report site directory + -o $mirdeep2_site + -d $mirdeep2_site.files_path + -r $csv_result + -t $html_result + -u $report_log + + ## other paramters + + -f $species_mature_mirna + -g $related_species_mature_mirna + -h $precursor_sequences + -j $min_read_stack_height + -k $min_score_cutoff + -l $randfold_analysis + -m $max_precursor_number + -n $species + -q $switch + + ## 3. Rmd templates sitting in the tool directory + + ## other Rmd body template files + -p '${__tool_directory__}/mirdeep2.Rmd' + + + ]]> + </command> + <inputs> + <param name="collapsed_reads" type="data" format="fasta" multiple="False" optional="False" + label="Read input file" help="Reads in fastq or fasta format"/> + <param name="reference_genome" type="data" format="fasta" multiple="False" optional="False" + label="Reference genome from history" + help="Reference genome in fasta format"/> + <param name="reads_mapping" type="data" format="tabular" multiple="False" optional="False" + label="reads mappings" help="reads mappings in arf format"/> + <param name="species_mature_mirna" type="data" format="fasta" multiple="False" optional="True" + label="miRBase mature miRNA for this species" + help="mature miRNA in fasta format"/> + <param name="related_species_mature_mirna" type="data" format="fasta" multiple="False" optional="TRUE" + label="miRBase mature miRNA from related species" + help="mature miRNA in fasta format"/> + <param name="precursor_sequences" type="data" format="fasta" multiple="False" optional="TRUE" + label="precursor sequences" help="miRBase miRNA precursor sequences in fasta format"/> + <param name="min_read_stack_height" type="integer" argument="-a" optional="True" + label="minimum read stack height" + help="minimum read stack height that triggers analysis. Using this option disables + automatic estimation of the optimal value."/> + <param name="min_score_cutoff" type="integer" value="0" argument="-b" label="minimum score cut-off" + help="minimum score cut-off for predicted novel miRNAs to be displayed in the overview + table. This score cut-off is by default 0."/> + <param name="randfold_analysis" type="boolean" truevalue="TRUE" falsevalue="FALSE" argument="-c" checked="False" + label="randfold analysis" help="disable randfold analysis"/> + <param name="max_precursor_number" type="integer" value="50000" argument="-g" label="maximum number of precursors" + help="maximum number of precursors to analyze when automatic excision gearing is used. + default=50000, if set to -1 all precursors will be analyzed"/> + <param name="species" type="select" label="Search in species"> + <option value="all">All species</option> + <option value="tni">tetraodon</option> + <option value="dps">d.pseudoobscura</option> + <option value="dya">d.yakuba</option> + <option value="ame">a.mellifera</option> + <option value="dmo">d.mojavensis</option> + <option value="cel">worm</option> + <option value="aga">a.gambiae</option> + <option value="cbr">c.briggsae</option> + <option value="cin">c.intestinalis</option> + <option value="mmu">mouse</option> + <option value="xtr">x.tropicalis</option> + <option value="eca">horse</option> + <option value="cfa">dog</option> + <option value="fru">fugu</option> + <option value="bta">cow</option> + <option value="der">d.erecta</option> + <option value="dgr">d.grimshawi</option> + <option value="gga">chicken</option> + <option value="spu">s.purpuratus</option> + <option value="bfl">lancelet</option> + <option value="ptr">chimp</option> + <option value="dse">d.sechellia</option> + <option value="dpe">d.persimilis</option> + <option value="dvi">d.virilis</option> + <option value="rno">rat</option> + <option value="dme">d.melanogaster</option> + <option value="lca">cat</option> + <option value="sja">c.japonica</option> + <option value="dan">d.ananassae</option> + <option value="hsa">human</option> + <option value="dsi">d.simulans</option> + </param> + <param name="switch" type="boolean" truevalue="TRUE" falsevalue="FALSE" + label="mature_ref_miRNAs contain miRBase v18 identifiers?" + help="use this switch if mature_ref_miRNAs contain miRBase v18 identifiers (5p and 3p) instead of previous ids from v17"/> + + + <param name="echo" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" /> + </inputs> + <outputs> + <data format="html" name="mirdeep2_site" label="mirdeep2 miRNA identification" /> + <data name="csv_result" format="csv" label="miRNA in csv format" /> + <data name="html_result" format="html" label="miRNA in html format" from_work_dir="result.html"/> + <data name="report_log" format="txt" label="report log" from_work_dir="result.csv"/> + </outputs> + <citations> + @article{friedlander2011mirdeep2, + title={miRDeep2 accurately identifies known and hundreds of novel microRNA genes in seven animal clades}, + author={Friedl{\"a}nder, Marc R and Mackowiak, Sebastian D and Li, Na and Chen, Wei and Rajewsky, Nikolaus}, + journal={Nucleic acids research}, + volume={40}, + number={1}, + pages={37--52}, + year={2011}, + publisher={Oxford University Press} + } + <citation type="bibtex"> + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + </citation> + <citation type="bibtex"> + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + </citation> + <citation type="bibtex"> + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + </citation> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2_mapper.Rmd Tue Aug 08 13:14:41 2017 -0400 @@ -0,0 +1,96 @@ +--- +title: 'Mirdeep2 Mapper' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO +) +``` + +# Job command line + +## View arguments from command line. + +```{r 'view arguments'} +str(opt) +``` + +```{r 'build PATH', echo=FALSE} +# set PATH environment. +PATH = system('pwd', intern = TRUE) %>% + (function(x) { + paste0('/home/galaxy/mirdeep2/bin:', x) + }) %>% + paste0(':$PATH') +``` + + +## Build job command line. + +```{r 'build command line'} +command_line = paste0('export PATH=', PATH, ' && ') %>% + # index reference genome + paste0(' bowtie-build REFERENCE_GENOME ref_genome &&') %>% + paste0(' mapper.pl ') %>% + # reads + (function(x) { + paste0(x, 'FASTQ_READS -c ') + }) %>% + # reference genome + (function(x) { + paste0(x, '-p ref_genome ') + }) %>% + # # parse to fasta + # (function(x) { + # ifelse(PARSE_TO_FASTA, paste0(x, '-h '), x) + # }) %>% + # clean entries + (function(x) { + ifelse(CLEAN_ENTRIES, paste0(x, '-j '), x) + }) %>% + # clip 3 adapter + (function(x) { + ifelse('CLIP_3_ADAPTER' == '', x, paste0(x, '-k CLIP_3_ADAPTER ')) + }) %>% + # discard shorter reads + (function(x) { + paste0(x, '-l DISCARD_SHORTER_READS ') + }) %>% + # collapse reads + (function(x) { + # ifelse(COLLAPSE_READS, paste0(x, '-m '), x) + paste0(x, '-m ') + }) %>% + # map with one mismatch + (function(x) { + ifelse(MAP_WITH_ONE_MISMATCH, paste0(x, '-q '), x) + }) %>% + # map up to position + (function(x) { + paste0(x, '-r MAP_UP_TO_POSITION ') + }) %>% + # overwrite existing files(-n), outputs + (function(x) { + paste0(x, '-s reads_collapsed.fa -t reads_collapsed_vs_genome.arf -v -n ') + }) %>% + # write stdout to reprot.log + (function(x) { + paste0(x, ' 2>report.log') + }) +command_line +``` + +## Run command line + +```{r} +system(command_line) +``` + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2_mapper.xml Tue Aug 08 13:14:41 2017 -0400 @@ -0,0 +1,135 @@ +<tool id="mirdeep2_mapper_site" name="mirdeep2_mapper" version="1.0.0"> + <requirements> + <requirement type="package" version="2.0.0.8">mirdeep2</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.2">r-rmarkdown</requirement> + <requirement type="package" version="1.8.4">r-plyr</requirement> + <requirement type="package" version="0.5.0">r-dplyr</requirement> + <requirement type="package" version="0.3.5">r-htmltools</requirement> + </requirements> + <description> + Mapping reads to genome + </description> + <stdio> + <regex match="Execution halted" + source="both" + level="fatal" + description="Execution halted." /> + <regex match="Error in" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + <regex match="Fatal error" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + </stdio> + <command> + <![CDATA[ + ## Add tools to PATH + export PATH=/opt/R-3.2.5/bin:\$PATH && + + Rscript '${__tool_directory__}/mirdeep2_mapper_render.R' + + ## 1. input data + -r $reads + -g $reference_genome + -e $echo + + ## 2. output report and report site directory + -o $mirdeep2_mapper_site + -d $mirdeep2_mapper_site.files_path + -m $reads_collapsed + -n $reads_collapsed_vs_genome + -t $report_log + + ## other parameters + -b $parse_to_fasta + -c $clean_entries + -f $clip_3_adapter + -h $discard_shorter_reads + -j $collapse_reads_or_not + -k $map_with_one_mismatch + -l $map_up_to_position + + + ## 3. Rmd templates sitting in the tool directory + + ## other Rmd body template files + -p '${__tool_directory__}/mirdeep2_mapper.Rmd' + + + + ]]> + </command> + <inputs> + <param name="reads" type="data" format="fastq,fasta" multiple="False" optional="False" + label="Read input file" help="Reads in fastq or fasta format"/> + <param name="reference_genome" type="data" format="fasta" multiple="false" optional="False" argument="-p" + label="Reference genome from history" /> + + <param name="parse_to_fasta" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" argument="-h" + optional="True" label="Parse to fasta format" /> + <param name="clean_entries" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" argument="-j" + optional="True" label="Remove reads with non-canonical letters" + help="remove all entries that have a sequence that contains letters other than a,c,g,t,u,n,A,C,G,T,U,N" /> + <param name="clip_3_adapter" type="text" argument="-k" value="" optional="true" label="Clip 3' adapter sequence (optional)" + help="Enter an adapter sequence"/> + <param name="discard_shorter_reads" type="integer" min="0" value="18" argument="-l" + label="Discard reads shorter than int nts" /> + <param name="collapse_reads_or_not" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" argument="-m" + label="Collapse reads" /> + <param name="map_with_one_mismatch" type="boolean" truevalue="TRUE" falsevalue="FALSE" argument="-q" + label="Map with one mismatch" + help="map with one mismatch in the seed (mapping takes longer)"/> + <param name="map_up_to_position" type="integer" min="1" value="5" label="Map up to int nts" argument="-r" + help="A read is allowed to map up to this number of positions in the genome. Default is 5"/> + + <param name="echo" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" /> + </inputs> + <outputs> + <data name="mirdeep2_mapper_site" format="html" label="mirdeep2_mapper site" /> + <data name="reads_collapsed" format="fasta" label="reads_collapsed" /> + <data name="reads_collapsed_vs_genome" format="tabular" label="reads_collapsed_vs_ref_genome" /> + <data name="report_log" format="txt" label="report log"/> + </outputs> + <citations> + @article{friedlander2011mirdeep2, + title={miRDeep2 accurately identifies known and hundreds of novel microRNA genes in seven animal clades}, + author={Friedl{\"a}nder, Marc R and Mackowiak, Sebastian D and Li, Na and Chen, Wei and Rajewsky, Nikolaus}, + journal={Nucleic acids research}, + volume={40}, + number={1}, + pages={37--52}, + year={2011}, + publisher={Oxford University Press} + } + <citation type="bibtex"> + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + </citation> + <citation type="bibtex"> + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + </citation> + <citation type="bibtex"> + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + </citation> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2_mapper_render.R Tue Aug 08 13:14:41 2017 -0400 @@ -0,0 +1,153 @@ +##======= Handle arguments from command line ======== +# setup R error handline to go to stderr +options(show.error.messages=FALSE, + error=function(){ + cat(geterrmessage(), file=stderr()) + quit("no", 1, F) + }) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +# suppress warning +options(warn = -1) + +options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) +args = commandArgs(trailingOnly=TRUE) + +suppressPackageStartupMessages({ + library(getopt) + library(tools) +}) + +# column 1: the long flag name +# column 2: the short flag alias. A SINGLE character string +# column 3: argument mask +# 0: no argument +# 1: argument required +# 2: argument is optional +# column 4: date type to which the flag's argument shall be cast. +# possible values: logical, integer, double, complex, character. +spec_list=list() + +##------- 1. input data --------------------- +spec_list$FASTQ_READS = c('reads', 'r', '1', 'character') +spec_list$REFERENCE_GENOME = c('reference_genome', 'g', 1, 'character') +spec_list$ECHO = c('echo', 'e', '1', 'character') + +##--------2. output report and report site directory -------------- +spec_list$OUTPUT_HTML = c('mirdeep2_mapper', 'o', '1', 'character') +spec_list$OUTPUT_DIR = c('mirdeep2_mapper_dir', 'd', '1', 'character') +spec_list$READS_COLLAPSED = c('reads_collapsed', 'm', '1', 'character') +spec_list$READ_X_COLLAPSED_VS_GENOME = c('reads_collapsed_vs_genome', 'n', '1', 'character') +spec_list$REPORT_LOG = c("report_log", 't', '1', 'character') + +##---------other parameters--------------- +spec_list$PARSE_TO_FASTA = c('parse_to_fasta', 'b', '1', 'character') +spec_list$CLEAN_ENTRIES = c('clean_entries', 'c', '1', 'character') +spec_list$CLIP_3_ADAPTER = c('clip_3_adapter', 'f', '2', 'character') +spec_list$DISCARD_SHORTER_READS = c('discard_shorter_reads', 'h', '1', 'character') +spec_list$COLLAPSE_READS_OR_NOT = c('collapse_reads_or_not', 'j', '1', 'character') +spec_list$MAP_WITH_ONE_MISMATCH = c('map_with_one_mismatch', 'k', '1', 'character') +spec_list$MAP_UP_TO_POSITION = c('map_up_to_position', 'l', '1', 'character') + +##--------3. Rmd templates sitting in the tool directory ---------- + +## _site.yml and index.Rmd files +spec_list$SITE_YML = c('site_yml', 's', 1, 'character') +spec_list$INDEX_Rmd = c('index_rmd', 'i', 1, 'character') + +## other Rmd body template files +spec_list$MIRDEEP2_MAPPER_RMD = c('mirdeep2_mapper_rmd', 'p', '1', 'character') + + + +##------------------------------------------------------------------ + +spec = t(as.data.frame(spec_list)) +opt = getopt(spec) +# arguments are accessed by long flag name (the first column in the spec matrix) +# NOT by element name in the spec_list +# example: opt$help, opt$expression_file +##====== End of arguments handling ========== + +#------ Load libraries --------- +library(rmarkdown) +library(plyr) +# library(stringr) +library(dplyr) +# library(highcharter) +# library(DT) +# library(reshape2) +# library(plotly) +# library(formattable) +library(htmltools) + + +#----- 1. create the report directory ------------------------ +paste0('mkdir -p ', opt$mirdeep2_mapper_dir) %>% + system() + +#----- 2. generate Rmd files with Rmd templates -------------- +# a. templates without placeholder variables: +# copy templates from tool directory to the working directory. +# b. templates with placeholder variables: +# substitute variables with user input values and place them in the working directory. + + +#----- mirdeep2_mapper.Rmd ----------------------- +readLines(opt$mirdeep2_mapper_rmd) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('FASTQ_READS', opt$reads, x) + }) %>% + (function(x) { + gsub('REFERENCE_GENOME', opt$reference_genome, x) + }) %>% + (function(x) { + gsub('PARSE_TO_FASTA', opt$parse_to_fasta, x) + }) %>% + (function(x) { + gsub('CLEAN_ENTRIES', opt$clean_entries, x) + }) %>% + (function(x) { + gsub('CLIP_3_ADAPTER', opt$clip_3_adapter, x) + }) %>% + (function(x) { + gsub('DISCARD_SHORTER_READS', opt$discard_shorter_reads, x) + }) %>% + (function(x) { + gsub('COLLAPSE_READS', opt$collapse_reads, x) + }) %>% + (function(x) { + gsub('MAP_WITH_ONE_MISMATCH', opt$map_with_one_mismatch, x) + }) %>% + (function(x) { + gsub('MAP_UP_TO_POSITION', opt$map_up_to_position, x) + }) %>% + (function(x) { + gsub('OUTPUT_DIR', opt$mirdeep2_mapper_dir, x) + }) %>% + (function(x) { + fileConn = file('mirdeep2_mapper.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + +#------ 3. render all Rmd files with render() -------- +render('mirdeep2_mapper.Rmd', output_file = opt$mirdeep2_mapper) + + +#-------4. manipulate outputs ----------------------------- +# a. copy non-site files +file.copy('reads_collapsed.fa', opt$reads_collapsed, recursive=TRUE) +file.copy('reads_collapsed_vs_genome.arf', opt$reads_collapsed_vs_genome, recursive=TRUE) +file.copy('report.log', opt$report_log, recursive=TRUE) + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mirdeep2_render.R Tue Aug 08 13:14:41 2017 -0400 @@ -0,0 +1,164 @@ +##======= Handle arguments from command line ======== +# setup R error handline to go to stderr +options(show.error.messages=FALSE, +error=function(){ + cat(geterrmessage(), file=stderr()) + quit("no", 1, F) +}) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +# suppress warning +options(warn = -1) + +options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) +args = commandArgs(trailingOnly=TRUE) + +suppressPackageStartupMessages({ + library(getopt) + library(tools) +}) + +# column 1: the long flag name +# column 2: the short flag alias. A SINGLE character string +# column 3: argument mask +# 0: no argument +# 1: argument required +# 2: argument is optional +# column 4: date type to which the flag's argument shall be cast. +# possible values: logical, integer, double, complex, character. +spec_list=list() + +##------- 1. input data --------------------- +spec_list$ECHO = c('echo', 'e', '1', 'character') +spec_list$COLLASPED_READS = c('collapsed_reads', 'a', '1', 'character') +spec_list$REFERENCE_GENOME = c('reference_genome', 'b', '1', 'character') +spec_list$READS_MAPPING = c('reads_mapping', 'c', '1', 'character') + +##--------2. output report and report site directory -------------- +spec_list$OUTPUT_HTML = c('mirdeep2_html', 'o', '1', 'character') +spec_list$OUTPUT_DIR = c('mirdeep2_output_dir', 'd', '1', 'character') +spec_list$CSV_RESULT = c('csv_result', 'r', '1', 'character') +spec_list$HTML_RESULT = c('html_result', 't', '1', 'character') +spec_list$REPORT_LOG = c('report_log', 'u', '1', 'character') + +##---------other parameters--------- +spec_list$SPECIES_MATURE_MIRNA = c('species_mature_mirna', 'f', '2', 'character') +spec_list$SPECIES_RELATED_MATURE_MIRRNA = c('related_species_mature_mirna', 'g', '2', 'character') +spec_list$PRECURSOR_SEQUENCES = c('precursor_sequences', 'h', '2', 'character') +spec_list$MIN_READ_STACK_HEIGHT = c('min_read_stack_height', 'j', '2', 'character') +spec_list$MIN_SCORE_CUTOFF = c('min_score_cutoff', 'k', '2', 'character') +spec_list$RANDFOLD_ANALYSIS = c('randfold_analysis', 'l', '2', 'character') +spec_list$MAX_PRECURSOR_NUMBER = c('max_precursor_number', 'm', '2', 'character') +spec_list$SPECIES = c('species', 'n', '2', 'character') +spec_list$SWITCH = c('switch', 'q', '2', 'character') + +##--------3. Rmd templates sitting in the tool directory ---------- + +## _site.yml and index.Rmd files +spec_list$SITE_YML = c('site_yml', 's', 1, 'character') +spec_list$INDEX_Rmd = c('index_rmd', 'i', 1, 'character') + +## other Rmd body template files +spec_list$MIRDEEP2_RMD = c('mirdeep2_rmd', 'p', '1', 'character') + + + +##------------------------------------------------------------------ + +spec = t(as.data.frame(spec_list)) +opt = getopt(spec) +# arguments are accessed by long flag name (the first column in the spec matrix) +# NOT by element name in the spec_list +# example: opt$help, opt$expression_file +##====== End of arguments handling ========== + +#------ Load libraries --------- +library(rmarkdown) +library(plyr) +# library(stringr) +library(dplyr) +# library(highcharter) +# library(DT) +# library(reshape2) +# library(plotly) +# library(formattable) +library(htmltools) + + +#----- 1. create the report directory ------------------------ +paste0('mkdir -p ', opt$mirdeep2_output_dir) %>% +system() + +#----- 2. generate Rmd files with Rmd templates -------------- +# a. templates without placeholder variables: +# copy templates from tool directory to the working directory. +# b. templates with placeholder variables: +# substitute variables with user input values and place them in the working directory. + + +#----- mirdeep2.Rmd ----------------------- +readLines(opt$mirdeep2_rmd) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('COLLAPSED_READS', opt$collapsed_reads, x) + }) %>% + (function(x) { + gsub('REFERENCE_GENOME', opt$reference_genome, x) + }) %>% + (function(x) { + gsub('READS_MAPPING', opt$reads_mapping, x) + }) %>% + (function(x) { + gsub('SPECIES_MATURE_MIRNA', opt$species_mature_mirna, x) + }) %>% + (function(x) { + gsub('SPECIES_RELATED_MATURE_MIRNA', opt$related_species_mature_mirna, x) + }) %>% + (function(x) { + gsub('PRECURSOR_SEQUENCES', opt$precursor_sequences, x) + }) %>% + (function(x) { + gsub('MIN_READ_STACK_HEIGHT', opt$min_read_stack_height, x) + }) %>% + (function(x) { + gsub('MIN_SCORE_CUTOFF', opt$min_score_cutoff, x) + }) %>% + (function(x) { + gsub('RANDFOLD_ANALYSIS', opt$randfold_analysis, x) + }) %>% + (function(x) { + gsub('MAX_PRECURSOR_NUMBER', opt$max_precursor_number, x) + }) %>% + (function(x) { + gsub('SPECIES', opt$species, x) + }) %>% + (function(x) { + gsub('SWITCH', opt$switch, x) + }) %>% + (function(x) { + gsub('OUTPUT_DIR', opt$mirdeep2_output_dir, x) + }) %>% + (function(x) { + fileConn = file('mirdeep2.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + +#------ 3. render all Rmd files with render() -------- +render('mirdeep2.Rmd',output_file = opt$mirdeep2_html) + + +#-------4. manipulate outputs ----------------------------- +# a. copy non-site files +file.copy('result.csv', opt$csv_result, recursive=TRUE) +file.copy('result.html', opt$html_result, recursive=TRUE) +file.copy('report.log', opt$report_log, recursive=TRUE) + + + +