changeset 7:c7620aa7e1f0 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sra-tools commit d1347141d384ed404f674d7ce408b6769e763ea1
author iuc
date Wed, 10 May 2017 10:45:41 -0400
parents 30775c836c77
children 1920e0508831
files fastq_dump.xml sam_dump.xml sra_macros.xml sra_pileup.xml test-data/fastq_dump_result.fastq.gz
diffstat 5 files changed, 364 insertions(+), 169 deletions(-) [+]
line wrap: on
line diff
--- a/fastq_dump.xml	Wed Mar 22 05:23:31 2017 -0400
+++ b/fastq_dump.xml	Wed May 10 10:45:41 2017 -0400
@@ -1,5 +1,5 @@
-<tool id="fastq_dump" name="Extract reads" version="@VERSION@.1">
-    <description>in FASTQ/A format from NCBI SRA.</description>
+<tool id="fastq_dump" name="Extract reads in Fastq/a" version="@VERSION@.2">
+    <description>format from NCBI SRA</description>
     <macros>
         <import>sra_macros.xml</import>
     </macros>
@@ -9,14 +9,21 @@
         <![CDATA[
 
     #if $input.input_select=="file_list":
-      for acc in `cat $input.file_list` ;
-      do
+    
+        for acc in `cat $input.file_list` ;
+        do
+    
     #elif $input.input_select=="accession_number":
-      acc="$input.accession" &&
+
+        ## Stripping leading and trailing spaces in case user typed them in 
+        acc="${input.accession}" &&
+    
     #end if
 
     #if $input.input_select=="file_list" or $input.input_select=="accession_number":
-          [ ""\$acc" =~ ^[E|S|D]RR[0-9]{1,}$" ] && (
+    
+        [ ""\$acc" =~ ^[E|S|D]RR[0-9]{1,}$" ] && (
+    
     #end if
 
     ## Need to set the home directory to the current working directory,
@@ -74,38 +81,35 @@
     $adv.clip
     $adv.skip_technical
 
-    #if str( $outputformat ) == "fasta":
-        --fasta
+    #if str( $outputformat ) == "fastqsanger.gz":
+        --gzip
+    #elif str( $outputformat ) == "fastqsanger.bz2":   
+        --bzip2
     #end if
     #if $input.input_select=="file":
         --stdout
         "$input.file" > "$output_file"
-    #elif $input.input_select=="file_list":
-        "\$acc"
-    #else:
-         --stdout
+    
+    #elif $input.input_select=="accession_number":
+        --stdout
         "\$acc" > "$output_accession" )
     #end if
 
     #if $input.input_select=="file_list":
-    ) ; done
-
-    ;
+        ) ; done
 
-
-
-
+        ;
 
-    for i in `ls *.fast* | cut -f 1 -d '_' | uniq` ; do
-      count=`ls \$i* | wc -l` ;
-      data=(\$(ls -d \$i*));
+        for i in `ls *.fast* | cut -f 1 -d '_' | uniq` ; do
+            count=`ls \$i* | wc -l` ;
+            data=(\$(ls -d \$i*));
       
-      if [ "\$count" -eq 2 ]; then
-         mv "\${data[0]}" "\${data[0]}"_forward.$outputformat;  mv "\${data[1]}" "\${data[1]}"_reverse.$outputformat ;
-      elif [ "\$count" -eq 1 ]; then
-         mv "\${data[0]}" "\${data[0]}"__single.$outputformat ;
-      fi;
-    done
+            if [ "\$count" -eq 2 ]; then
+                mv "\${data[0]}" "\${data[0]}"_forward.$outputformat;  mv "\${data[1]}" "\${data[1]}"_reverse.$outputformat ;
+            elif [ "\$count" -eq 1 ]; then
+                 mv "\${data[0]}" "\${data[0]}"__single.$outputformat ;
+            fi;
+        done
 
 
     #end if
@@ -115,129 +119,239 @@
     </command>
     <inputs>
         <expand macro="input_conditional"/>
-        <param name="outputformat" type="select" label="select output format">
-            <option value="fastqsanger">fastq</option>
-            <option value="fasta">fasta</option>
+        <param name="outputformat" type="select" display="radio" label="Select output format" help="Compression will greatly reduce the amount of space occupied by downloaded data. Downstream applications such as a short-read mappers will accept compressed data as input. Consider this example: an uncoimpressed 400 Mb fastq datasets compresses to 100 Mb or 80 Mb by gzip or bzip2, respectively. " argument="--gzip --bzip2">
+            <option value="fastqsanger.gz">gzip compressed fastq</option>
+            <option value="fastqsanger">Uncompressed fastq</option>
+            <option value="fastqsanger.bz2">bzip2 compressed fastq</option>
         </param>
         <section name="adv" title="Advanced Options" expanded="False">
-            <param name="minID" type="integer" label="minimum spot ID" optional="true"/>
-            <param name="maxID" type="integer" label="maximum spot ID" optional="true"/>
-            <param name="minlen" type="integer" label="minimum read length" optional="true"/>
-            <param name="split" type="boolean" checked="true" truevalue="--split-spot" falsevalue="">
-                <label>split spot by read pairs</label>
-            </param>
+            <param name="minID" type="integer" label="Minimum spot ID" optional="true" help="Minimum spot id to be dumped." argument="--minSpotId"/>
+            <param name="maxID" type="integer" label="Maximum spot ID" optional="true" help="Maximum spot id to be dumped." argument="--maxSpotId"/>
+            <param name="minlen" type="integer" label="Minimum read length" optional="true" help="Filter by sequence length. Will dump only reads longer or equal to this value." argument="--minReadLen"/>
+            <param name="split" type="boolean" checked="true" truevalue="--split-spot" falsevalue="" label="Split spot by read pairs" help="Split spots into individual reads." argument="--split-spot"/>
             <expand macro="alignments"/>
             <expand macro="region"/>
             <expand macro="matepairDist"/>
-            <param name="readfilter" type="select" value="">
-                <label>filter by value</label>
+            <param name="readfilter" type="select" value="" label="filter by value" argument="--read-filter">
                 <option value="">None</option>
                 <option value="pass">pass</option>
                 <option value="reject">reject</option>
                 <option value="criteria">criteria</option>
                 <option value="redacted">redacted</option>
             </param>
-            <param name="spotgroups" type="text" label="filter by spot-groups" optional="true"/>
-            <param name="clip" type="boolean" truevalue="--clip" falsevalue="">
-                <label>apply left and right clips</label>
-            </param>
-            <param name="skip_technical" type="boolean" truevalue="--skip-technical" falsevalue="" checked="False" label="Dump only biological reads"/>
+            <param name="spotgroups" type="text" label="Filter by spot-groups" optional="true" argument="--spot-groups"/>
+            <param name="clip" type="boolean" truevalue="--clip" falsevalue="" argument="--clip" label="Apply left and right clips" />
+            <param name="skip_technical" type="boolean" truevalue="--skip-technical" falsevalue="" checked="False" label="Dump only biological reads" argument="--skip-technical"/>
         </section>
     </inputs>
     <outputs>
-      <collection name="list_paired" type="list:paired" label="Pair-end Fast(q|a)">
-        <filter>input['input_select'] == "file_list"</filter>
+        <collection name="list_paired" type="list:paired" label="Pair-end data (fastq-dump)">
+            <filter>input['input_select'] == "file_list"</filter>
+
         <!-- Use named regex group to grab pattern
              <identifier_0>_<identifier_1>.fq. Here identifier_0 is the list
              identifier in the nested collection and identifier_1 is either
              forward or reverse (for instance samp1_forward.fq).
         -->
-        <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_\d+.fastq_(?P&lt;identifier_1&gt;[^_]+)\.fastq" ext="fastqsanger" visible="false" />
-        <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_\d+.fasta_(?P&lt;identifier_1&gt;[^_]+)\.fasta" ext="fasta" visible="false" />
-      </collection>
-      <collection name="output_collection" type='list' label="Single-end Fast(q|a)">
-        <filter>input['input_select'] == "file_list"</filter>
-        <discover_datasets pattern="(?P&lt;designation&gt;.+)_\d+.fastq__single\.fastq" directory="." ext='fastqsanger'/>
-        <discover_datasets pattern="(?P&lt;designation&gt;.+)_\d+.fasta__single\.fasta" directory="." ext='fasta'/>
-      </collection>
-      <data format="fastqsanger" name="output_accession" >
-        <filter>input['input_select'] == "accession_number"</filter>
-        <change_format>
-          <when input="outputformat" value="fasta" format="fasta"/>
-        </change_format>
-      </data>
-      <data format="fastqsanger" name="output_file" label="${input.file.name}.${outputformat}">
-        <filter>input['input_select'] == "file"</filter>
-        <change_format>
-          <when input="outputformat" value="fasta" format="fasta"/>
-        </change_format>
-      </data>
+        
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_\d+.fastq_(?P&lt;identifier_1&gt;[^_]+)\.fastqsanger" ext="fastqsanger" />
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_\d+.fastq.gz_(?P&lt;identifier_1&gt;[^_]+)\.fastqsanger.gz" ext="fastqsanger.gz" />
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_\d+.fastq.bz2_(?P&lt;identifier_1&gt;[^_]+)\.fastqsanger.bz2" ext="fastqsanger.bz2" />
+        </collection>
+        <collection name="output_collection" type='list' label="Single-end data (fastq-dump)">
+            <filter>input['input_select'] == "file_list"</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_\d+.fastq__single\.fastqsanger" directory="." ext='fastqsanger'/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_\d+.fastq.gz__single\.fastqsanger.gz" directory="." ext='fastqsanger.gz'/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_\d+.fastq.bz2__single\.fastqsanger.bz2" directory="." ext='fastqsanger.bz2'/>
+        </collection>
+        <data format="fastqsanger" name="output_accession" label="${input.accession} (fastq-dump)">
+            <filter>input['input_select'] == "accession_number"</filter>
+            <change_format>
+                <when input="outputformat" value="fastqsanger.gz" format="fastqsanger.gz"/>
+                <when input="outputformat" value="fastqsanger.bz2" format="fastqsanger.bz2"/>
+            </change_format>
+        </data>
+        <data format="fastqsanger" name="output_file" label="${input.file.name} (fastq-dump)">
+            <filter>input['input_select'] == "file"</filter>
+            <change_format>
+                <when input="outputformat" value="fastqsanger.gz" format="fastqsanger.gz"/>
+                <when input="outputformat" value="fastqsanger.bz2" format="fastqsanger.bz2"/>
+            </change_format>
+        </data>
     </outputs>
     <tests>
-      <test>
-        <param name="input_select" value="accession_number"/>
-        <param name="outputformat" value="fastqsanger"/>
-        <param name="accession" value="SRR044777"/>
-        <param name="skip_technical" value="True"/>
-        <output name="output_accession">
-          <assert_contents>
-            <not_has_text text="rRNA_primer"/>
-            <has_text text="F47USSH02GNP1D" />
-          </assert_contents>
-        </output>
-      </test>
-      <test>
-        <param name="input_select" value="accession_number"/>
-        <param name="outputformat" value="fastqsanger"/>
-        <param name="accession" value="SRR925743"/>
-        <param name="maxID" value="5"/>
-        <output name="output_accession" file="fastq_dump_result.fastq" ftype="fastqsanger"/>
-      </test>
-      <test>
-        <param name="input_select" value="file_list"/>
-        <param name="outputformat" value="fastqsanger"/>
-        <param name="file_list" value="list_pe"/>
-        <param name="maxID" value="5"/>
-        <output_collection name="list_paired" type="list:paired">
-          <element name="DRR015708">
-            <element name="forward" file="DRR015708_forward.fastqsanger">
-            </element>
-            <element name="reverse" file="DRR015708_reverse.fastqsanger">
-            </element>
-          </element>
-        </output_collection>
-      </test>
-      <test>
-        <param name="input_select" value="file_list"/>
-        <param name="outputformat" value="fastqsanger"/>
-        <param name="file_list" value="list_pe2"/>
-        <param name="maxID" value="5"/>
-        <output_collection name="list_paired" type="list:paired">
-          <element name="ERR027433">
-            <element name="forward" file="ERR027433_forward.fastqsanger">
-            </element>
-            <element name="reverse" file="ERR027433_reverse.fastqsanger">
-            </element>
-          </element>
-        </output_collection>
-      </test>      
-      <test>
-        <param name="input_select" value="file_list"/>
-        <param name="outputformat" value="fastqsanger"/>
-        <param name="file_list" value="list_se"/>
-        <param name="maxID" value="5"/>
-        <output_collection name="output_collection" type="list">
-          <element name="SRR1993644" file="SRR1993644.fastqsanger"/>
-        </output_collection>
-      </test>            
+        <test>
+            <param name="input_select" value="accession_number"/>
+            <param name="outputformat" value="fastqsanger"/>
+            <param name="accession" value="SRR044777"/>
+            <param name="skip_technical" value="True"/>
+            <output name="output_accession">
+                <assert_contents>
+                    <not_has_text text="rRNA_primer"/>
+                    <has_text text="F47USSH02GNP1D" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input_select" value="accession_number"/>
+            <param name="outputformat" value="fastqsanger.gz"/>
+            <param name="accession" value="SRR925743"/>
+            <param name="maxID" value="5"/>
+            <output name="output_accession" file="fastq_dump_result.fastq.gz" decompress="True"/>
+        </test>
+        <test>
+            <param name="input_select" value="accession_number"/>
+            <param name="outputformat" value="fastqsanger"/>
+            <param name="accession" value="SRR925743"/>
+            <param name="maxID" value="5"/>
+            <output name="output_accession" file="fastq_dump_result.fastq" ftype="fastqsanger"/>
+        </test>
+        <test>
+            <param name="input_select" value="file_list"/>
+            <param name="outputformat" value="fastqsanger"/>
+            <param name="file_list" value="list_pe"/>
+            <param name="maxID" value="5"/>
+            <output_collection name="list_paired" type="list:paired">
+                <element name="DRR015708">
+                    <element name="forward" file="DRR015708_forward.fastqsanger">
+                    </element>
+                    <element name="reverse" file="DRR015708_reverse.fastqsanger">
+                    </element>
+                </element>
+            </output_collection>
+        </test>
+        <test>
+            <param name="input_select" value="file_list"/>
+            <param name="outputformat" value="fastqsanger"/>
+            <param name="file_list" value="list_pe2"/>
+            <param name="maxID" value="5"/>
+            <output_collection name="list_paired" type="list:paired">
+                <element name="ERR027433">
+                    <element name="forward" file="ERR027433_forward.fastqsanger">
+                    </element>
+                    <element name="reverse" file="ERR027433_reverse.fastqsanger">
+                    </element>
+                </element>
+            </output_collection>
+        </test>      
+        <test>
+            <param name="input_select" value="file_list"/>
+            <param name="outputformat" value="fastqsanger"/>
+            <param name="file_list" value="list_se"/>
+            <param name="maxID" value="5"/>
+            <output_collection name="output_collection" type="list">
+                <element name="SRR1993644" file="SRR1993644.fastqsanger"/>
+            </output_collection>
+        </test>            
     </tests>
-    <help>
-        This tool extracts reads from SRA archives using fastq-dump.
-        The fastq-dump program is developed at NCBI, and is available at
-        http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
+    <help><![CDATA[
+**What it does?**
+
+This tool extracts data (in fastq_ format) from the Short Read Archive (SRA) at the National Center for Biotechnology Information (NCBI). It is based on the fastq-dump_ utility of the SRA Toolkit.
+
+**How to use it?**
+
+There are three ways in which you can download data:
+
+ 1. Data for single accession
+ 2. Multiple datasets using a list of accessions
+ 3. Extract data from already uploaded SRA dataset
+
+Below we discuss each in detail.
+
+------
+
+**Uploading data for a single accession**
+
+When you type a single accession number (e.g., `SRR1582967`) into **Accession** box and click **Execute** the tool will fetch data for you. It is important to keep the following in mind:
+
+ - if data is paired-ended (or mate-paired) the tool will generate a single *interleaved* dataset, in which forward and reverse mates are alternating (see an example dataset below)
+ - if data is single ended, a standard single fastq dataset will be produced
+
+-----
+
+**Uploading multiple datasets using a list of accessions**
+
+A more realistic scenario is when you want to upload a number of datasets at once. To do this you need a list of accession, where there is only one accession per line (see below for information on how to generate such a file). Once you have this file:
+
+ 1. Upload it into your history using Galaxy's upload tool
+ 2. Once the list of accessions is uploaded choose *List of SRA accessions, one per line* from **select input type** dropdown
+ 3. Choose uploaded file within the **sra accession list** field
+ 4. Click **Execute**
+
+.. class:: warningmark
+
+Fastq datasets produced by this option will be saved in Galaxy's history as a collection_ - a single history element containing multiple datasets. In fact, two collections will be produced: one containing paired-end data and another containing single-end data. Single-end or pair-end collections may be empty if the accessions provided in the list contain only SINGLE or PAIRED data, respectively.
+
+-----
+
+**Extract data from already uploaded SRA dataset**
+
+If a SRA dataset is present in the history, it can be converted into fastq dataset by setting **select input type** drop-down to *SRA archive in current history*. Just like in the case of extracting data for single accession number the following applies:
+
+ - if data is paired-ended (or mate-pair) the tool will generate a single *interleaved* dataset, in which forward and reverse mates are alternating (see example below).
+ - if data is single ended, a standard fastq dataset will be produced
+
+@ACCESSION_LIST_HOWTO@
+
+-----
+
+**Paired-end (and mate-pair) data in fastq format**
 
-        NB: Single-end or pair-end collections may be empty if given SRRs LibraryLayout contains only either SINGLE or PAIRED respectively
-        @SRATOOLS_ATTRRIBUTION@
+Paired end datasets can be represented as two individual datasets:
+
+First dataset::
+
+ @1/1
+ AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA
+ +
+ EGGEGGGDFGEEEAEECGDEGGFEEGEFGBEEDDECFEFDD@CDD<ED
+ @2/1
+ AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA
+ +
+ HHHHHHEGFHEEFEEHEEHHGGEGGGGEFGFGGGGHHHHFBEEEEEFG
+
+Second dataset::
+
+ @1/2
+ CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
+ +
+ GHHHDFDFGFGEGFBGEGGEGEGGGHGFGHFHFHHHHHHHEF?EFEFF
+ @2/2
+ CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
+ +
+ HHHHHHHHHHHHHGHHHHHHGHHHHHHHHHHHFHHHFHHHHHHHHHHH
+
+Or a single *interleaved* dataset::
+
+ @1/1
+ AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA
+ +
+ EGGEGGGDFGEEEAEECGDEGGFEEGEFGBEEDDECFEFDD@CDD<ED
+ @1/2
+ CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
+ +
+ GHHHDFDFGFGEGFBGEGGEGEGGGHGFGHFHFHHHHHHHEF?EFEFF
+ @2/1
+ AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA
+ +
+ HHHHHHEGFHEEFEEHEEHHGGEGGGGEFGFGGGGHHHHFBEEEEEFG
+ @2/2
+ CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC
+ +
+ HHHHHHHHHHHHHGHHHHHHGHHHHHHHHHHHFHHHFHHHHHHHHHHH
+
+----
+
+
+.. _fastq: https://en.wikipedia.org/wiki/FASTQ_format
+.. _fastq-dump: https://ncbi.github.io/sra-tools/fastq-dump.html
+.. _collection: https://galaxyproject.org/tutorials/collections/
+.. _link: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies
+
+@SRATOOLS_ATTRRIBUTION@
+
+]]>
     </help>
     <expand macro="citation"/>
   </tool>
--- a/sam_dump.xml	Wed Mar 22 05:23:31 2017 -0400
+++ b/sam_dump.xml	Wed May 10 10:45:41 2017 -0400
@@ -1,5 +1,5 @@
-<tool id="sam_dump" name="Extract reads" version="@VERSION@">
-    <description>in SAM or BAM format from NCBI SRA.</description>
+<tool id="sam_dump" name="Extract reads in BAM" version="@VERSION@.2">
+    <description>format from NCBI SRA</description>
     <macros>
         <import>sra_macros.xml</import>
     </macros>
@@ -11,7 +11,7 @@
       for acc in `cat $input.file_list` ;
       do
     #elif $input.input_select=="accession_number":
-      acc="$input.accession" &&
+      acc="${input.accession}" &&
     #end if
 
     #if $input.input_select=="file_list" or $input.input_select=="accession_number":
@@ -91,7 +91,7 @@
     </command>
     <inputs>
         <expand macro="input_conditional"/>
-        <param name="outputformat" type="select" label="select output format">
+        <param name="outputformat" type="select" display="radio" label="select output format" help="In vast majority of cases you want to download data in bam format. It is more compact and is accepted by all downstream tools.">
             <option value="bam">bam</option>
             <option value="sam">sam</option>
         </param>
@@ -113,18 +113,18 @@
         </section>
     </inputs>
     <outputs>
-        <collection name="output_collection" type='list'>
+        <collection name="output_collection" type="list" label="SAM/BAM data (fastq-dump)">
           <filter>input['input_select'] == "file_list"</filter>
           <discover_datasets pattern="(?P&lt;designation&gt;.+)\.bam" directory="." ext='bam'/>
           <discover_datasets pattern="(?P&lt;designation&gt;.+)\.sam" directory="." ext='sam'/>
         </collection>
-        <data name="output_accession" format="bam" label="${input.accession}.${outputformat}">
+        <data name="output_accession" format="bam" label="${input.accession} (sam-dump)">
             <filter>input['input_select'] == "accession_number"</filter>
             <change_format>
                 <when input="outputformat" value="sam" format="sam"/>
             </change_format>
         </data>
-        <data name="output_file" format="bam" label="${input.file.name}.${outputformat}">
+        <data name="output_file" format="bam" label="${input.file.name} (sam-dump)">
             <filter>input['input_select'] == "file"</filter>
             <change_format>
                 <when input="outputformat" value="sam" format="sam"/>
@@ -140,11 +140,59 @@
             <output name="output_accession" file="sam_dump_result.sam" compare="contains" ftype="sam"/>
         </test>
     </tests>
-    <help>
-        This tool extracts reads from sra archives using sam-dump.
-        The sam-dump program is developed at NCBI, and is available at
-        http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
-        @SRATOOLS_ATTRRIBUTION@
-    </help>
+    <help><![CDATA[
+**What it does?**
+
+This tool extracts data (in BAM_ format) from the Short Read Archive (SRA) at the National Center for Biotechnology Information (NCBI). It is based on the sam-dump_ utility of the SRA Toolkit.
+
+**How to use it?**
+
+There are three ways in which you can download data:
+
+ 1. Data for single accession
+ 2. Multiple datasets using a list of accessions
+ 3. Extract data from already uploaded SRA dataset
+
+Below we discuss each in detail.
+
+------
+
+**Uploading data for a single accession**
+
+When you type a single accession number (e.g., `SRR1582967`) into **Accession** box and click **Execute** the tool will fetch data for you. As a result you will get a single BAM (or SAM) dataset in the history. 
+
+-----
+
+**Uploading multiple datasets using a list of accessions**
+
+A more realistic scenario is when you want to upload a number of datasets at once. To do this you need a list of accession, where there is only one accession per line (see below for information on how to generate such a file). Once you have this file:
+
+ 1. Upload it into your history using Galaxy's upload tool
+ 2. Once the list of accessions is uploaded choose *List of SRA accessions, one per line* from **select input type** dropdown
+ 3. Choose uploaded file within the **sra accession list** field
+ 4. Click **Execute**
+
+.. class:: warningmark
+
+BAM datasets produced by this option will be saved in Galaxy's history as a collection_ - a single history element containing multiple datasets. 
+
+-----
+
+**Extract data from already uploaded SRA dataset**
+
+If a SRA dataset is present in the history, it can be converted into BAM dataset by setting **select input type** drop-down to *SRA archive in current history*. Just like in the case of extracting data for single accession number a single BAM dataset will be generated in the history. 
+
+@ACCESSION_LIST_HOWTO@
+
+-----
+
+.. _BAM: https://samtools.github.io/hts-specs/SAMv1.pdf
+.. _sam-dump: http://ncbi.github.io/sra-tools/sam-dump.html
+.. _collection: https://galaxyproject.org/tutorials/collections/
+.. _link: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies
+
+
+@SRATOOLS_ATTRRIBUTION@
+    ]]></help>
     <expand macro="citation"/>
 </tool>
--- a/sra_macros.xml	Wed Mar 22 05:23:31 2017 -0400
+++ b/sra_macros.xml	Wed May 10 10:45:41 2017 -0400
@@ -1,19 +1,28 @@
 <macros>
-    <token name="@VERSION@">2.8.0</token>
+    <token name="@VERSION@">2.8.1</token>
     <macro name="requirements">
         <requirements>
-            <requirement type="package" version="2.8.0">sra-tools</requirement>
+            <requirement type="package" version="2.8.1">sra-tools</requirement>
         </requirements>
     </macro>
     <macro name="input_conditional">
         <conditional name="input">
             <param name="input_select" type="select" label="select input type">
                 <option value="accession_number">SRR accession</option>
+                <option value="file_list">List of SRA accession, one per line</option>
                 <option value="file">SRA archive in current history</option>
-                <option value="file_list">List of SRA accession, one per line</option>
             </param>
             <when value="accession_number">
-                <param name="accession" type="text" label="SRR accession" help="Must start with SRR,DRR or ERR, e.g. SRR925743 , ERR343809"/>
+                <param name="accession" type="text" label="Accession" help="Must start with SRR,DRR or ERR, e.g. SRR925743 ,ERR343809">
+                    <sanitizer>
+                        <valid initial="string.printable">
+                            <remove value=" "/>
+                        </valid>
+                        <mapping initial="none">
+                            <add source=" " target=""/>
+                        </mapping>
+                    </sanitizer>
+                </param>
             </when>
             <when value="file">
                 <param format="sra" name="file" type="data" label="sra archive"/>
@@ -24,39 +33,45 @@
         </conditional>
     </macro>
     <macro name="alignments">
-        <param name="alignments" type="select" value="both">
-            <label>aligned or unaligned reads</label>
+        <param name="alignments" type="select" value="both" label="Output aligned or unaligned reads" help="Output reads according to their alignment status." argument="--aligned and --unaligned">
             <option value="both">both</option>
             <option value="aligned">aligned only</option>
             <option value="unaligned">unaligned only</option>
         </param>
     </macro>
     <macro name="minMapq">
-        <param name="minMapq" type="integer" min="0" max="42" label="minimum mapping quality" optional="true"/>
+        <param name="minMapq" type="integer" min="0" max="42" label="Minimum mapping quality" optional="true" help="Minimum mapping quality an alignment has to have, to be dumped." argument="--min-mapq"/>
     </macro>
     <macro name="region">
         <param format="text" name="region" type="text" label="aligned region" optional="true"
-               help="Filter by position on genome. Can be either accession.version (ex: NC_000001.10), chromosome name (ex:chr1 or 1) or 1-based coordinates (ex: chr1:1-101)."/>
+               help="Filter by position on genome. Can be either accession.version (ex: NC_000001.10), chromosome name (ex:chr1 or 1) or 1-based coordinates (ex: chr1:1-101)." argument="--aligned-region"/>
     </macro>
     <macro name="matepairDist">
         <param name="matepairDist" type="text" label="mate-pair distance (from-to|unknown)" optional="true"
-               help="Filter by distance between matepairs. Use unknown to find matepairs split between the references. Use from-to (inclusive) to limit matepair distance on the same reference"/>
+               help="Filter by distance between matepairs. Use unknown to find matepairs split between the references. Use from-to (inclusive) to limit matepair distance on the same reference" argument="--matepair-distance"/>
     </macro>
     <macro name="citation">
         <citations>
             <citation type="doi">10.1093/nar/gkq1019</citation>
         </citations>
     </macro>
-    <token name="@SRATOOLS_ATTRRIBUTION@">
-        Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies.
+    <token name="@ACCESSION_LIST_HOWTO@">
+-----
 
-        Galaxy tool wrapper originally written by Matt Shirley (mdshw5 at gmail.com).
+**How to generate accession lists**
 
-        Wrapper modified by Philip Mabon ( philip.mabon at phac-aspc.gc.ca ).
+ 1. Go to **SRA Run Selector** by clicking this link_
+ 2. Find the study you are interested in by typing a search term within the **Search** box. This can be a word (e.g., *mitochondria*) or an accession you have gotten from a paper (e.g., *SRR1582967*). 
+ 3. Once you click on the study of interest you will see the number of datasets in this study within the **Related SRA data** box
+ 4. Click on the Runs number
+ 5. On the page that would open you will see **Accession List** button
+ 6. Clicking of this button will produce a file that you will need to upload into Galaxy and use as the input to this tool.
+    </token>
 
-        Tool dependencies, clean-up and bug-fixes by Marius van den Beek (m.vandenbeek at gmail.com).
-
-        For support and bug reports contact Matt Shirley or Marius van den Beek or go to https://github.com/galaxyproject/tools-iuc.
-
+    <token name="@SRATOOLS_ATTRRIBUTION@">
+Galaxy tool wrapper originally written by Matt Shirley (mdshw5 at gmail.com).
+Wrapper modified by Philip Mabon ( philip.mabon at phac-aspc.gc.ca ).
+Tool dependencies, clean-up and bug-fixes by Marius van den Beek (m.vandenbeek at gmail.com).
+For support and bug reports contact Matt Shirley or Marius van den Beek or go to https://github.com/galaxyproject/tools-iuc.
     </token>
 </macros>
--- a/sra_pileup.xml	Wed Mar 22 05:23:31 2017 -0400
+++ b/sra_pileup.xml	Wed May 10 10:45:41 2017 -0400
@@ -1,5 +1,5 @@
-<tool id="sra_pileup" name="Generate pileup format" version="@VERSION@">
-    <description>from NCBI sra.</description>
+<tool id="sra_pileup" name="Generate pileup format" version="@VERSION@.2">
+    <description>from NCBI sra</description>
     <macros>
         <import>sra_macros.xml</import>
     </macros>
@@ -7,6 +7,11 @@
     <version_command>sra-pileup --version</version_command>
     <command detect_errors="exit_code">
         <![CDATA[
+
+        #if $input.input_select=="accession_number":
+            acc="${input.accession}" &&
+        #end if
+
         ## Need to set the home directory to the current working directory,
         ## else the tool tries to write to home/.ncbi and fails when used
         ## with a cluster manager.
@@ -16,7 +21,7 @@
         #if ( str( $adv.region ) == "" ):
             ASCP_PATH=`command -v ascp` &&
             ASCP_KEY=`dirname \$ASCP_PATH`/asperaweb_id_dsa.openssh || true &&
-            prefetch -X 200G --ascp-path "\$ASCP_PATH|\$ASCP_KEY" "$input.accession" &&
+            prefetch -X 200G --ascp-path "\$ASCP_PATH|\$ASCP_KEY" "\$acc" &&
             ## Duplicate vdb-config, in case settings changed between prefetch and
             ## sra-pileup command.
             vdb-config -s "/repository/user/main/public/root=\$PWD" &&
@@ -31,7 +36,7 @@
         #if $input.input_select == "file":
             "$input.file" > "$output_file"
         #elif $input.input_select == "accession_number":
-            "$input.accession" > "$output_accession"
+            "\$acc" > "$output_accession"
         #elif $input.input_select == "text":
             `cat "$input.text"` > "$output_text"
         #end if
@@ -48,7 +53,16 @@
                 <param format="sra" name="file" type="data" label="sra archive"/>
             </when>
             <when value="accession_number">
-                <param format="text" name="accession" type="text" label="SRR accession" help="Must start with SRR, e.g. SRR925743"/>
+                <param format="text" name="accession" type="text" label="SRR accession" help="Must start with SRR, e.g. SRR925743">
+                    <sanitizer>
+                        <valid initial="string.printable">
+                            <remove value=" "/>
+                        </valid>
+                        <mapping initial="none">
+                            <add source=" " target=""/>
+                        </mapping>
+                    </sanitizer>
+                </param>
             </when>
             <when value="text">
                 <param format="txt" name="text" type="data" label="text file"/>
@@ -60,13 +74,13 @@
         </section>
     </inputs>
     <outputs>
-        <data format="pileup" name="output_accession" label="${input.accession}.pileup">
+        <data format="pileup" name="output_accession" label="${input.accession} (sra-pileup)">
             <filter>input['input_select'] == "accession_number"</filter>
         </data>
-        <data format="pileup" name="output_file" label="${input.file.name}.pileup">
+        <data format="pileup" name="output_file" label="${input.file.name} (sra-pileup)">
             <filter>input['input_select'] == "file"</filter>
         </data>
-        <data format="pileup" name="output_text" label="${input.text.name}.pileup">
+        <data format="pileup" name="output_text" label="${input.text.name} (sra-pileup)">
             <filter>input['input_select'] == "text"</filter>
         </data>
     </outputs>
@@ -79,10 +93,14 @@
         </test>
     </tests>
     <help>
-        This tool produces pileup format from sra archives using sra-pileup.
-        The sra-pileup program is developed at NCBI, and is available at
-        http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
-        @SRATOOLS_ATTRRIBUTION@
+ <![CDATA[
+
+This tool produces pileup format from sra archives using sra-pileup.
+The sra-pileup program is developed at NCBI, and is available at
+http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software.
+@SRATOOLS_ATTRRIBUTION@
+
+]]>
     </help>
     <expand macro="citation"/>
 </tool>
Binary file test-data/fastq_dump_result.fastq.gz has changed