comparison bdss_client_sra.Rmd @ 0:512d008295db draft default tip

planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client_main commit d9ab791a7ce12362dc6e28c0a518a3f23dd581fe-dirty
author mingchen0919
date Tue, 17 Oct 2017 14:09:01 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:512d008295db
1 ---
2 title: 'Download and extract single end fastq/fasta data with BDSS client from SRA accessions'
3 output:
4 html_document:
5 number_sections: true
6 toc: true
7 theme: cosmo
8 highlight: tango
9 ---
10
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
12 knitr::opts_chunk$set(
13 echo = ECHO,
14 error=TRUE
15 )
16 ```
17
18 # Command line arguments
19
20 ```{r 'command line arguments'}
21 str(opt)
22 ```
23
24 # BDSS configuration file
25
26 First, we create a bdss configuration file `bdss.cfg` in the current directory.
27
28 ```{r}
29 system('echo "[metadata_repository]" > bdss.cfg')
30 system('echo url=http://bdss.bioinfo.wsu.edu/ >> bdss.cfg')
31 ```
32
33 # Download and extract reads
34
35 ```{r 'download and extract reads'}
36 # create two directories, one for single end and the other for paired end SRA reads.
37 dir.create('se_read_files_directory')
38 dir.create('pe_read_files_directory')
39 # download and extract reads (single end)
40 sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]]
41 sra_ids_se = sra_ids_se[sra_ids_se != '']
42 # loop through SRA accessions to download and extract reads.
43 for(id in sra_ids_se) {
44 # build URL from SRA id
45 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/',
46 substr(id, 1, 3), '/',
47 substr(id, 1, 6), '/', id, '/', id, '.sra')
48 # download sra file with bdss
49 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
50 system(bdss_command, intern = TRUE)
51 # convert .sra to .fastq/.fasta
52 if('FORMAT' == 'fasta') {
53 command = paste0('fastq-dump --fasta -O se_read_files_directory ', id, '.sra')
54 } else {
55 command = paste0('fastq-dump -O se_read_files_directory ', id, '.sra')
56 }
57 cat('----convert SRA to fastq/fasta------\n')
58 print(system(command, intern = TRUE))
59 }
60
61 # download and extract reads (paired end)
62 sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]]
63 sra_ids_pe = sra_ids_pe[sra_ids_pe != '']
64 # loop through SRA accessions to download and extract reads.
65 for(id in sra_ids_pe) {
66 # build URL from SRA id
67 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/',
68 substr(id, 1, 3), '/',
69 substr(id, 1, 6), '/', id, '/', id, '.sra')
70 # download sra file with bdss
71 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
72 system(bdss_command, intern = TRUE)
73 # convert .sra to .fastq/.fasta
74 if('FORMAT' == 'fasta') {
75 command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id, '.sra')
76 } else {
77 command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id, '.sra')
78 }
79 cat('----convert SRA to fastq/fasta------\n')
80 command_stdout = system(command, intern = TRUE)
81 print(command_stdout)
82 if(!(paste0(id, '_2.FORMAT') %in% list.files('pe_read_files_directory'))) {
83 # this is not a paired end SRA file. The corresponding file will be deleted.
84 cat(paste0(id, ' is not paired end SRA, the corresponding fastq/fasta file will deleted.'))
85 system(paste0('rm pe_read_files_directory/', id, '_1.*'), intern = TRUE)
86 }
87
88 }
89
90 cat('-----single end files----\n')
91 list.files('./se_read_files_directory')
92 cat('-----paired end files----\n')
93 list.files('./pe_read_files_directory')
94
95 cat('-----Renaming files------\n')
96 # rename files for paired end reads
97 old_files = paste0('./pe_read_files_directory/', list.files('./pe_read_files_directory'))
98 print(old_files)
99 new_files = gsub('_1', '_forward', old_files)
100 new_files = gsub('_2', '_reverse', new_files)
101 print(new_files)
102 file.rename(old_files, new_files)
103 ```
104
105