changeset 0:1a11c4fd13d0 draft

planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastq_dump commit 65063d5b207a70df38a0bcb6fb57a8f9170d9e9b
author mingchen0919
date Wed, 27 Sep 2017 21:41:29 -0400
parents
children cd86400bbaed
files fastq_dump_pe.Rmd fastq_dump_pe.xml fastq_dump_pe_render.R fastq_dump_se.Rmd fastq_dump_se.xml fastq_dump_se_render.R
diffstat 6 files changed, 395 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump_pe.Rmd	Wed Sep 27 21:41:29 2017 -0400
@@ -0,0 +1,54 @@
+---
+title: 'Fastq-dump: download and extract paired end reads into FASTQ/FASTA file'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO
+)
+```
+
+# Command line arguments
+
+```{r 'command line arguments'}
+str(opt)
+```
+
+# Download and extract reads
+
+```{r 'download and extract reads'}
+# create a directory to store read files
+dir.create('read_files_directory')
+# download and extract reads
+sra_accessions = strsplit(gsub(',', ' ', 'SRA_ACCESSION'), ' ')[[1]]
+sra_accessions = sra_accessions[sra_accessions != '']
+# loop through SRA accessions to download and extract reads.
+for(id in sra_accessions) {
+  if('FORMAT' == 'fasta') {
+    command = paste0('fastq-dump --fasta --split-files ', '-O read_files_directory ', id)
+  } else {
+    command = paste0('fastq-dump --split-files ', '-O read_files_directory ', id)
+  }
+  # fastq-dump command
+  print(command)
+  # command line stdout
+  system(command = command, intern = TRUE)
+}
+```
+
+
+# Rename files
+
+```{r}
+old_files = paste0('./read_files_directory/', list.files('./read_files_directory'))
+new_files = gsub('_1', '_forward', old_files)
+new_files = gsub('_2', '_reverse', new_files)
+file.rename(old_files, new_files)
+```
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump_pe.xml	Wed Sep 27 21:41:29 2017 -0400
@@ -0,0 +1,50 @@
+<tool id="rmarkdown_fastqc_dump_pe" name="fastqc-dump-pe" version="1.0.0">
+    <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.2">r-rmarkdown</requirement>
+        <requirement type="package" version="0.3.5">r-htmltools</requirement>
+        <requirement type="package" version="0.5.0">r-dplyr</requirement>
+        <requirement type="package" version="0.5.4">parallel-fastq-dump</requirement>
+    </requirements>
+    <description>
+        Download and extract paired end reads in fastq or fasta format from NCBI SRA. The output is a list of paired datasets
+        collection.
+    </description>
+    <stdio>
+        <!--All stderr are redirected to a file. "XXX" is used to match with nothing-->
+        <regex match="XXX"
+               source="stderr"
+               level="warning"
+               description="Check the warnings_and_errors.txt file for more details."/>
+    </stdio>
+    <command>
+        <![CDATA[
+            Rscript '${__tool_directory__}/fastq_dump_pe_render.R'
+                -i '$sra_accession'
+                -e $echo
+                -f $format
+
+                -r $report
+                -d $report.files_path
+                -s $sink_message
+
+                -t '${__tool_directory__}/fastq_dump_pe.Rmd'
+        ]]>
+    </command>
+    <inputs>
+        <param type="text" name="sra_accession" label="SRR/DRR/ERR accessions" optional="false"
+               help="A list of SRR/DRR/ERR accessions separated by comma or space. e.g. SRR6077558,ERR343809"/>
+        <param type="boolean" name="format" truevalue="fastq" falsevalue="fasta" checked="true"
+               label="output files in fastq (Yes) or fasta (No)?"/>
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
+               label="Display analysis code in report?"/>
+    </inputs>
+    <outputs>
+        <data format="html" name="report" label="Fastq-dump report" />
+        <collection type="list:paired" name="list_collection" label="Fastq-dump (paired end reads)">
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;identifier_1&gt;[^_]+)\.(?P&lt;ext&gt;[^\._]+)?" directory="read_files_directory"/>
+        </collection>
+        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt" />
+    </outputs>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump_pe_render.R	Wed Sep 27 21:41:29 2017 -0400
@@ -0,0 +1,96 @@
+##======= Handle arguments from command line ========
+# setup R error handline to go to stderr
+options(show.error.messages=FALSE,
+        error=function(){
+          cat(geterrmessage(), file=stderr())
+          quit("no", 1, F)
+        })
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+# suppress warning
+options(warn = -1)
+
+options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
+args = commandArgs(trailingOnly=TRUE)
+
+suppressPackageStartupMessages({
+  library(getopt)
+  library(tools)
+})
+
+#/////////////////////// SINK WARNINGS AND ERRORS TO A FILE FOR DEBUGGING ///////////
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+
+# column 1: the long flag name
+# column 2: the short flag alias. A SINGLE character string
+# column 3: argument mask
+#           0: no argument
+#           1: argument required
+#           2: argument is optional
+# column 4: date type to which the flag's argument shall be cast.
+#           possible values: logical, integer, double, complex, character.
+##------- 1. input data ---------------------
+spec_list=list()
+spec_list$SRA_ACCESSION = c('sra_accession', 'i', '1', 'character')
+spec_list$FORMAT = c('format', 'f', '1', 'character')
+spec_list$ECHO = c('echo', 'e', '1', 'character')
+##--------2. output report and outputs --------------
+spec_list$REPORT_HTML = c('report_html', 'r', '1', 'character')
+spec_list$OUTPUT_DIR = c('output_dir', 'd', '1', 'character')
+spec_list$SINK_OUTPUT = c('sink_message', 's', '1', 'character')
+##--------3. Rmd templates in the tool directory ----------
+spec_list$FASTQ_DUMP_PE_RMD = c('fastq_dump_pe_rmd', 't', '1', 'character')
+
+spec = t(as.data.frame(spec_list))
+opt = getopt(spec)
+
+#------ Load libraries ---------
+library(rmarkdown)
+library(htmltools)
+library(dplyr)
+
+#----- 1. create the report directory ------------------------
+dir.create(opt$output_dir)
+
+#----- 2. generate Rmd files with Rmd templates --------------
+#   a. templates without placeholder variables:
+#         copy templates from tool directory to the working directory.
+#   b. templates with placeholder variables:
+#         substitute variables with user input values and place them in the working directory.
+
+#----- 01 fastq_dump_pe.Rmd -----------------------
+readLines(opt$fastq_dump_pe_rmd) %>%
+  (function(x) {
+    gsub('SRA_ACCESSION', opt$sra_accession, x)
+  }) %>%
+  (function(x) {
+    gsub('FORMAT', opt$format, x)
+  }) %>%
+  (function(x) {
+    gsub('ECHO', opt$echo, x)
+  }) %>%
+  (function(x) {
+    gsub('OUTPUT_DIR', opt$output_dir, x)
+  }) %>%
+  (function(x) {
+    fileConn = file('fastq_dump_pe.Rmd')
+    writeLines(x, con=fileConn)
+    close(fileConn)
+  })
+
+#------ 3. render all Rmd files --------
+render('fastq_dump_pe.Rmd', output_file = opt$report_html)
+
+
+#-------4. manipulate outputs -----------------------------
+
+
+
+
+
+sink()
+#/////////// END OF SINK OUTPUT ///////////////////////////
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump_se.Rmd	Wed Sep 27 21:41:29 2017 -0400
@@ -0,0 +1,51 @@
+---
+title: 'Fastq-dump: download and extract single end reads into FASTQ/FASTA file'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = ECHO
+)
+```
+
+# Command line arguments
+
+```{r 'command line arguments'}
+str(opt)
+```
+
+# Download and extract reads
+
+```{r 'download and extract reads'}
+# create a directory to store read files
+dir.create('read_files_directory')
+# download and extract reads
+sra_accessions = strsplit(gsub(',', ' ', 'SRA_ACCESSION'), ' ')[[1]]
+sra_accessions = sra_accessions[sra_accessions != '']
+# loop through SRA accessions to download and extract reads.
+for(id in sra_accessions) {
+  if('FORMAT' == 'fasta') {
+    command = paste0('fastq-dump --fasta ', '-O read_files_directory ', id)
+  } else {
+    command = paste0('fastq-dump ', '-O read_files_directory ', id)
+  }
+}
+```
+
+* `fastq-dump` command
+```{r}
+print(command)
+```
+
+* `command line stdout`
+
+```{r}
+system(command = command, intern = TRUE)
+```
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump_se.xml	Wed Sep 27 21:41:29 2017 -0400
@@ -0,0 +1,48 @@
+<tool id="rmarkdown_fastqc_dump_se" name="fastqc-dump-se" version="1.0.0">
+    <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.2">r-rmarkdown</requirement>
+        <requirement type="package" version="0.3.5">r-htmltools</requirement>
+        <requirement type="package" version="0.5.0">r-dplyr</requirement>
+        <requirement type="package" version="0.5.4">parallel-fastq-dump</requirement>
+    </requirements>
+    <description>
+        Download and extract single end reads in fastq or fasta format from NCBI SRA. The output is a list of datasets
+        collection.
+    </description>
+    <stdio>
+        <!--All stderr are redirected to a file. "XXX" is used to match with nothing-->
+        <regex match="XXX"
+               source="stderr"
+               level="warning"
+               description="Check the warnings_and_errors.txt file for more details." />
+    </stdio>
+    <command>
+        <![CDATA[
+            Rscript '${__tool_directory__}/fastq_dump_se_render.R'
+                -i '$sra_accession'
+                -e $echo
+                -f $format
+
+                -r $report
+                -d $report.files_path
+                -s $sink_message
+
+                -t '${__tool_directory__}/fastq_dump_se.Rmd'
+        ]]>
+    </command>
+    <inputs>
+        <param type="text" name="sra_accession" label="SRR/DRR/ERR accessions"
+               help="A list of SRR/DRR/ERR accessions separated by comma or space. e.g. SRR6077558,ERR343809"/>
+        <param type="boolean" name="format" truevalue="fastq" falsevalue="fasta" checked="true" label="output files in fastq (Yes) or fasta (No)?"/>
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" />
+    </inputs>
+    <outputs>
+        <data format="html" name="report" label="Fastq-dump report" />
+        <collection type="list" name="list_collection" label="Fastq-dump (single end reads)">
+            <discover_datasets pattern="__name_and_ext__" directory="read_files_directory" />
+        </collection>
+        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt" />
+    </outputs>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump_se_render.R	Wed Sep 27 21:41:29 2017 -0400
@@ -0,0 +1,96 @@
+##======= Handle arguments from command line ========
+# setup R error handline to go to stderr
+options(show.error.messages=FALSE,
+        error=function(){
+          cat(geterrmessage(), file=stderr())
+          quit("no", 1, F)
+        })
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+# suppress warning
+options(warn = -1)
+
+options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
+args = commandArgs(trailingOnly=TRUE)
+
+suppressPackageStartupMessages({
+  library(getopt)
+  library(tools)
+})
+
+#/////////////////////// SINK WARNINGS AND ERRORS TO A FILE FOR DEBUGGING ///////////
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+
+# column 1: the long flag name
+# column 2: the short flag alias. A SINGLE character string
+# column 3: argument mask
+#           0: no argument
+#           1: argument required
+#           2: argument is optional
+# column 4: date type to which the flag's argument shall be cast.
+#           possible values: logical, integer, double, complex, character.
+##------- 1. input data ---------------------
+spec_list=list()
+spec_list$SRA_ACCESSION = c('sra_accession', 'i', '1', 'character')
+spec_list$FORMAT = c('format', 'f', '1', 'character')
+spec_list$ECHO = c('echo', 'e', '1', 'character')
+##--------2. output report and outputs --------------
+spec_list$REPORT_HTML = c('report_html', 'r', '1', 'character')
+spec_list$OUTPUT_DIR = c('output_dir', 'd', '1', 'character')
+spec_list$SINK_OUTPUT = c('sink_message', 's', '1', 'character')
+##--------3. Rmd templates in the tool directory ----------
+spec_list$FASTQ_DUMP_SE_RMD = c('fastq_dump_se_rmd', 't', '1', 'character')
+
+spec = t(as.data.frame(spec_list))
+opt = getopt(spec)
+
+#------ Load libraries ---------
+library(rmarkdown)
+library(htmltools)
+library(dplyr)
+
+#----- 1. create the report directory ------------------------
+dir.create(opt$output_dir)
+
+#----- 2. generate Rmd files with Rmd templates --------------
+#   a. templates without placeholder variables:
+#         copy templates from tool directory to the working directory.
+#   b. templates with placeholder variables:
+#         substitute variables with user input values and place them in the working directory.
+
+#----- 01 fastq_dump_se.Rmd -----------------------
+readLines(opt$fastq_dump_se_rmd) %>%
+  (function(x) {
+    gsub('SRA_ACCESSION', opt$sra_accession, x)
+  }) %>%
+  (function(x) {
+    gsub('FORMAT', opt$format, x)
+  }) %>%
+  (function(x) {
+    gsub('ECHO', opt$echo, x)
+  }) %>%
+  (function(x) {
+    gsub('OUTPUT_DIR', opt$output_dir, x)
+  }) %>%
+  (function(x) {
+    fileConn = file('fastq_dump_se.Rmd')
+    writeLines(x, con=fileConn)
+    close(fileConn)
+  })
+
+#------ 3. render all Rmd files --------
+render('fastq_dump_se.Rmd', output_file = opt$report_html)
+
+
+#-------4. manipulate outputs -----------------------------
+
+
+
+
+
+sink()
+#/////////// END OF SINK OUTPUT ///////////////////////////
\ No newline at end of file