# HG changeset patch
# User mingchen0919
# Date 1508263638 14400
# Node ID 1cc0ed4567e16b0de9dc764e9fdef499eb062873
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client_main commit d9ab791a7ce12362dc6e28c0a518a3f23dd581fe-dirty
diff -r 000000000000 -r 1cc0ed4567e1 bdss_client.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bdss_client.Rmd Tue Oct 17 14:07:18 2017 -0400
@@ -0,0 +1,52 @@
+---
+title: 'Download with BDSS client'
+output:
+ html_document:
+ number_sections: true
+ toc: true
+ theme: cosmo
+ highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+ echo = ECHO,
+ error=TRUE
+)
+```
+
+# Command line arguments
+
+```{r 'command line arguments'}
+str(opt)
+```
+
+# BDSS configuration file
+
+First, we create a bdss configuration file `bdss.cfg` in the current directory.
+
+```{r}
+system('echo "[metadata_repository]" > bdss.cfg')
+system('echo url=http://bdss.bioinfo.wsu.edu/ >> bdss.cfg')
+```
+
+# Download data
+
+```{r 'download and extract reads'}
+# create a directory to store read files
+dir.create('read_files_directory')
+# download and extract reads
+urls = strsplit(gsub(',', ' ', 'URLS'), ' ')[[1]]
+urls = urls[urls != '']
+# loop through SRA accessions to download and extract reads.
+for(url in urls) {
+ print(url)
+ bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer --destination read_files_directory -u ', url)
+ print(bdss_command)
+ print(system(bdss_command, intern = TRUE))
+}
+# all files that need to be saved should be moved to REPORT_DIR directory
+# print(system('mv read_files_directory REPORT_DIR', intern = TRUE))
+```
+
+
diff -r 000000000000 -r 1cc0ed4567e1 bdss_client.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bdss_client.xml Tue Oct 17 14:07:18 2017 -0400
@@ -0,0 +1,47 @@
+
+
+ pandoc
+ r-base
+ r-getopt
+ r-rmarkdown
+ r-htmltools
+ r-dplyr
+ parallel-fastq-dump
+ r-rcurl
+
+
+ Download data with BDSS client.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 1cc0ed4567e1 bdss_client_render.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bdss_client_render.R Tue Oct 17 14:07:18 2017 -0400
@@ -0,0 +1,82 @@
+library(getopt)
+library(rmarkdown)
+library(htmltools)
+library(dplyr)
+library(RCurl)
+
+
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+##---------below is the code for rendering .Rmd templates-----
+
+ ##=============STEP 1: handle command line arguments==========
+ ##
+ ##============================================================
+ # column 1: the long flag name
+ # column 2: the short flag alias. A SINGLE character string
+ # column 3: argument mask
+ # 0: no argument
+ # 1: argument required
+ # 2: argument is optional
+ # column 4: date type to which the flag's argument shall be cast.
+ # possible values: logical, integer, double, complex, character.
+ #-------------------------------------------------------------
+ #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++
+ # 1. short flag alias should match the flag in the command section in the XML file.
+ # 2. long flag name can be any legal R variable names
+ # 3. two names in args_list can have common string but one name should not be a part of another name.
+ # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems.
+ #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ ##------- 1. input data ---------------------
+ args_list=list()
+ args_list$URLS = c('urls', 'i', '1', 'character')
+ args_list$ECHO = c('echo', 'e', '1', 'character')
+ ##--------2. output report and outputs --------------
+ args_list$REPORT_HTML = c('report_html', 'r', '1', 'character')
+ args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character')
+ args_list$SINK_OUTPUT = c('sink_message', 's', '1', 'character')
+ ##--------3. Rmd templates in the tool directory ----------
+ args_list$BDSS_CLIENT_RMD = c('bdss_client_rmd', 't', '1', 'character')
+
+ opt = getopt(t(as.data.frame(args_list)))
+
+
+ ##=======STEP 2: create report directory (optional)==========
+ ##
+ ##===========================================================
+ dir.create(opt$report_dir)
+
+ ##=STEP 3: replace placeholders in .Rmd with argument values=
+ ##
+ ##===========================================================
+ #++ need to replace placeholders with args values one by one+
+ #----- 01 bdss_client.Rmd -----------------------
+ readLines(opt$bdss_client_rmd) %>%
+ (function(x) {
+ gsub('URLS', opt$urls, x)
+ }) %>%
+ (function(x) {
+ gsub('ECHO', opt$echo, x)
+ }) %>%
+ (function(x) {
+ gsub('REPORT_DIR', opt$report_dir, x)
+ }) %>%
+ (function(x) {
+ fileConn = file('bdss_client.Rmd')
+ writeLines(x, con=fileConn)
+ close(fileConn)
+ })
+
+ ##=============STEP 4: render .Rmd templates=================
+ ##
+ ##===========================================================
+ render('bdss_client.Rmd', output_file = opt$report_html)
+
+
+##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file
diff -r 000000000000 -r 1cc0ed4567e1 bdss_client_sra.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bdss_client_sra.Rmd Tue Oct 17 14:07:18 2017 -0400
@@ -0,0 +1,105 @@
+---
+title: 'Download and extract single end fastq/fasta data with BDSS client from SRA accessions'
+output:
+ html_document:
+ number_sections: true
+ toc: true
+ theme: cosmo
+ highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+ echo = ECHO,
+ error=TRUE
+)
+```
+
+# Command line arguments
+
+```{r 'command line arguments'}
+str(opt)
+```
+
+# BDSS configuration file
+
+First, we create a bdss configuration file `bdss.cfg` in the current directory.
+
+```{r}
+system('echo "[metadata_repository]" > bdss.cfg')
+system('echo url=http://bdss.bioinfo.wsu.edu/ >> bdss.cfg')
+```
+
+# Download and extract reads
+
+```{r 'download and extract reads'}
+# create two directories, one for single end and the other for paired end SRA reads.
+dir.create('se_read_files_directory')
+dir.create('pe_read_files_directory')
+# download and extract reads (single end)
+sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]]
+sra_ids_se = sra_ids_se[sra_ids_se != '']
+# loop through SRA accessions to download and extract reads.
+for(id in sra_ids_se) {
+ # build URL from SRA id
+ url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/',
+ substr(id, 1, 3), '/',
+ substr(id, 1, 6), '/', id, '/', id, '.sra')
+ # download sra file with bdss
+ bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
+ system(bdss_command, intern = TRUE)
+ # convert .sra to .fastq/.fasta
+ if('FORMAT' == 'fasta') {
+ command = paste0('fastq-dump --fasta -O se_read_files_directory ', id, '.sra')
+ } else {
+ command = paste0('fastq-dump -O se_read_files_directory ', id, '.sra')
+ }
+ cat('----convert SRA to fastq/fasta------\n')
+ print(system(command, intern = TRUE))
+}
+
+# download and extract reads (paired end)
+sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]]
+sra_ids_pe = sra_ids_pe[sra_ids_pe != '']
+# loop through SRA accessions to download and extract reads.
+for(id in sra_ids_pe) {
+ # build URL from SRA id
+ url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/',
+ substr(id, 1, 3), '/',
+ substr(id, 1, 6), '/', id, '/', id, '.sra')
+ # download sra file with bdss
+ bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
+ system(bdss_command, intern = TRUE)
+ # convert .sra to .fastq/.fasta
+ if('FORMAT' == 'fasta') {
+ command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id, '.sra')
+ } else {
+ command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id, '.sra')
+ }
+ cat('----convert SRA to fastq/fasta------\n')
+ command_stdout = system(command, intern = TRUE)
+ print(command_stdout)
+ if(!(paste0(id, '_2.FORMAT') %in% list.files('pe_read_files_directory'))) {
+ # this is not a paired end SRA file. The corresponding file will be deleted.
+ cat(paste0(id, ' is not paired end SRA, the corresponding fastq/fasta file will deleted.'))
+ system(paste0('rm pe_read_files_directory/', id, '_1.*'), intern = TRUE)
+ }
+
+}
+
+cat('-----single end files----\n')
+list.files('./se_read_files_directory')
+cat('-----paired end files----\n')
+list.files('./pe_read_files_directory')
+
+cat('-----Renaming files------\n')
+# rename files for paired end reads
+old_files = paste0('./pe_read_files_directory/', list.files('./pe_read_files_directory'))
+print(old_files)
+new_files = gsub('_1', '_forward', old_files)
+new_files = gsub('_2', '_reverse', new_files)
+print(new_files)
+file.rename(old_files, new_files)
+```
+
+
diff -r 000000000000 -r 1cc0ed4567e1 bdss_client_sra.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bdss_client_sra.xml Tue Oct 17 14:07:18 2017 -0400
@@ -0,0 +1,64 @@
+
+
+ pandoc
+ r-base
+ r-getopt
+ r-rmarkdown
+ r-htmltools
+ r-dplyr
+ parallel-fastq-dump
+ r-rcurl
+
+
+ Download data with BDSS client and generate list (single end SRA data) and list:paired dataset collection
+ (paired end SRA data).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 1cc0ed4567e1 bdss_client_sra_render.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bdss_client_sra_render.R Tue Oct 17 14:07:18 2017 -0400
@@ -0,0 +1,90 @@
+library(getopt)
+library(rmarkdown)
+library(htmltools)
+library(dplyr)
+library(RCurl)
+
+
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+##---------below is the code for rendering .Rmd templates-----
+
+##=============STEP 1: handle command line arguments==========
+##
+##============================================================
+# column 1: the long flag name
+# column 2: the short flag alias. A SINGLE character string
+# column 3: argument mask
+# 0: no argument
+# 1: argument required
+# 2: argument is optional
+# column 4: date type to which the flag's argument shall be cast.
+# possible values: logical, integer, double, complex, character.
+#-------------------------------------------------------------
+#++++++++++++++++++++ Best practice ++++++++++++++++++++++++++
+# 1. short flag alias should match the flag in the command section in the XML file.
+# 2. long flag name can be any legal R variable names
+# 3. two names in args_list can have common string but one name should not be a part of another name.
+# for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems.
+#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+##------- 1. input data ---------------------
+args_list=list()
+args_list$SRA_IDS_SE = c('sra_ids_se', 'i', '1', 'character')
+args_list$SRA_IDS_PE = c('sra_ids_pe', 'p', '1', 'character')
+args_list$FORMAT = c('format', 'f', '1', 'character')
+args_list$ECHO = c('echo', 'e', '1', 'character')
+##--------2. output report and outputs --------------
+args_list$REPORT_HTML = c('report_html', 'r', '1', 'character')
+args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character')
+args_list$SINK_OUTPUT = c('sink_message', 's', '1', 'character')
+##--------3. Rmd templates in the tool directory ----------
+args_list$BDSS_CLIENT_RMD = c('bdss_client_rmd', 't', '1', 'character')
+
+opt = getopt(t(as.data.frame(args_list)))
+
+
+##=======STEP 2: create report directory (optional)==========
+##
+##===========================================================
+dir.create(opt$report_dir)
+
+##=STEP 3: replace placeholders in .Rmd with argument values=
+##
+##===========================================================
+#++ need to replace placeholders with args values one by one+
+#----- 01 bdss_client.Rmd -----------------------
+readLines(opt$bdss_client_rmd) %>%
+ (function(x) {
+ gsub('SRA_IDS_SE', opt$sra_ids_se, x)
+ }) %>%
+ (function(x) {
+ gsub('SRA_IDS_PE', opt$sra_ids_pe, x)
+ }) %>%
+ (function(x) {
+ gsub('FORMAT', opt$format, x)
+ }) %>%
+ (function(x) {
+ gsub('ECHO', opt$echo, x)
+ }) %>%
+ (function(x) {
+ gsub('REPORT_DIR', opt$report_dir, x)
+ }) %>%
+ (function(x) {
+ fileConn = file('bdss_client.Rmd')
+ writeLines(x, con=fileConn)
+ close(fileConn)
+ })
+
+##=============STEP 4: render .Rmd templates=================
+##
+##===========================================================
+render('bdss_client.Rmd', output_file = opt$report_html)
+
+
+##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file