diff fasterq_dump.xml @ 27:9a776b080193 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sra-tools commit cbb1499906c801443d72bdf313d86f0182aca010
author iuc
date Sun, 22 Jan 2023 17:51:50 +0000
parents 83c7d564b128
children 4317d3cb6cba
line wrap: on
line diff
--- a/fasterq_dump.xml	Fri Sep 03 16:17:53 2021 +0000
+++ b/fasterq_dump.xml	Sun Jan 22 17:51:50 2023 +0000
@@ -1,22 +1,21 @@
-<tool id="fasterq_dump" name="Faster Download and Extract Reads in FASTQ" version="@VERSION@+galaxy1" profile="18.01">
+<tool id="fasterq_dump" name="Faster Download and Extract Reads in FASTQ" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <description>format from NCBI SRA</description>
-    <expand macro="bio_tools"/>
     <macros>
-        <import>sra_macros.xml</import>
+        <import>macros.xml</import>
     </macros>
+    <expand macro="edam_ontology"/>
+    <expand macro="bio_tools"/>
     <expand macro="requirements"/>
-    <version_command>fasterq-dump --version</version_command>
+    <version_command>fasterq-dump --version | tr -d $'\n'</version_command>
     <command detect_errors="exit_code"><![CDATA[
     set -o | grep -q pipefail && set -o pipefail;
     @COPY_CONFIGFILE@
+    @CONFIGURE_RETRY@
     @SET_ACCESSIONS@
-    #if $input.input_select == "file":
-        acc='${input.file.name}' &&
-        ln -s '${input.file}' "\$acc" &&
-    #end if
-    @CONFIGURE_RETRY@
     while [ \$SRA_PREFETCH_ATTEMPT -le \$SRA_PREFETCH_RETRIES ] ; do
         fasterq-dump "\$acc" -e \${GALAXY_SLOTS:-1}
+        --seq-defline '@\$sn/\$ri'
+        --qual-defline '+'
         $adv.split
         #if str( $adv.minlen ) != "":
             --min-read-len "$adv.minlen"
@@ -33,7 +32,7 @@
     mkdir -p output &&
     mkdir -p outputOther &&
     count="\$(ls *.fastq | wc -l)" &&
-    echo "There are \$count fastq" &&
+    echo "There are \$count fastq files" &&
     data=(\$(ls *.fastq)) &&
     if [ "\$count" -eq 1 ]; then
         @COMPRESS@ "\${data[0]}" > output/"\${acc}"__single.fastqsanger.gz &&
@@ -61,13 +60,11 @@
             rm "\$file";
         done;
     fi;
-    #if $input.input_select=="file_list":
-        ) ; done
-
-        ;
-    #elif  $input.input_select=="accession_number":
-    );
+    
+    #if $input.input_select != "sra_file":
+        ); done;
     #end if
+    echo "Done with all accessions."
     ]]>
     </command>
     <expand macro="configfile_hack"/>
@@ -109,10 +106,8 @@
             <param name="accession" value="ERR086330"/>
             <output_collection name="list_paired" type="list:paired" count="1">
                 <element name="ERR086330">
-                    <element name="forward" file="ERR086330_1.fastq.gz" decompress="True">
-                    </element>
-                    <element name="reverse" file="ERR086330_2.fastq.gz" decompress="True">
-                    </element>
+                    <element name="forward" file="ERR086330_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                    <element name="reverse" file="ERR086330_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
                 </element>
             </output_collection>
         </test>
@@ -127,22 +122,34 @@
             </output_collection>
         </test>
         <test expect_num_outputs="4">
-            <param name="input_select" value="file"/>
-            <param name="file" value="SRR522874.sra"/>
+            <param name="input_select" value="accession_number"/>
+            <param name="accession" value="ERR086330, SRR11953971"/>
+            <output_collection name="list_paired" type="list:paired" count="2">
+                <element name="ERR086330">
+                    <element name="forward" file="ERR086330_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                    <element name="reverse" file="ERR086330_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                </element>
+                <element name="SRR11953971">
+                    <element name="forward" file="SRR11953971_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                    <element name="reverse" file="SRR11953971_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="input_select" value="sra_file"/>
+            <param name="sra_file" value="SRR522874.sra"/>
             <param name="split" value="--split-files"/>
             <param name="skip_technical" value="True"/>
             <output_collection name="list_paired" type="list:paired" count="1">
                 <element name="SRR522874.sra">
-                    <element name="forward" file="SRR522874.sra_2.fastq.gz" decompress="True">
-                    </element>
-                    <element name="reverse" file="SRR522874.sra_4.fastq.gz" decompress="True">
-                    </element>
+                    <element name="forward" file="SRR522874.sra_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                    <element name="reverse" file="SRR522874.sra_4.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
                 </element>
             </output_collection>
         </test>
         <test expect_num_outputs="4">
-            <param name="input_select" value="file"/>
-            <param name="file" value="SRR522874.sra"/>
+            <param name="input_select" value="sra_file"/>
+            <param name="sra_file" value="SRR522874.sra"/>
             <param name="split" value="--split-files"/>
             <param name="skip_technical" value="False"/>
             <output_collection name="output_collection_other" type="list" count="4">
@@ -156,16 +163,16 @@
             <param name="input_select" value="file_list"/>
             <param name="file_list" value="list_sra"/>
             <param name="minlen" value="21"/>
-            <output_collection name="output_collection_other" type="list">
+            <output_collection name="output_collection_other" type="list" count="1">
                 <element name="SRR522874__single" file="SRR522874.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
             </output_collection>
             <output_collection name="list_paired" type="list:paired" count="1">
                 <element name="SRR522874">
-                    <element name="forward" file="SRR522874_1.fastq.gz" decompress="True"/>
-                    <element name="reverse" file="SRR522874_2.fastq.gz" decompress="True"/>
+                    <element name="forward" file="SRR522874_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                    <element name="reverse" file="SRR522874_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
                 </element>
             </output_collection>
-            <output_collection name="output_collection" type="list">
+            <output_collection name="output_collection" type="list" count="1">
                 <element name="SRR002702" file="SRR002702_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
             </output_collection>
         </test>
@@ -174,8 +181,8 @@
             <param name="file_list" value="sra_manifest.tabular" ftype="sra_manifest.tabular"/>
             <output_collection name="list_paired" type="list:paired" count="1">
                 <element name="SRR11953971">
-                    <element name="forward" file="SRR11953971_1.fastq.gz" decompress="True"/>
-                    <element name="reverse" file="SRR11953971_2.fastq.gz" decompress="True"/>
+                    <element name="forward" file="SRR11953971_1.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
+                    <element name="reverse" file="SRR11953971_2.fastq.gz" ftype="fastqsanger.gz" decompress="True"/>
                 </element>
             </output_collection>
         </test>
@@ -183,51 +190,20 @@
     <help><![CDATA[
 **What it does?**
 
-This tool extracts data (in fastq_ format) from the Short Read Archive (SRA) at the National Center for Biotechnology Information (NCBI). It is based on the fasterq-dump_ utility of the SRA Toolkit.
-
-**How to use it?**
-
-There are three ways in which you can download data:
-
- 1. Data for single accession
- 2. Multiple datasets using a list of accessions
- 3. Extract data from already uploaded SRA dataset
-
-Below we discuss each in detail.
-
-------
-
-**Uploading data for a single accession**
-
-When you type a single accession number (e.g., `SRR1582967`) into **Accession** box and click **Execute** the tool will fetch data for you.
+This tool extracts data (in fastq_ format) from the Short Read Archive (SRA) at the National Center for Biotechnology Information (NCBI). It is based on the fasterq-dump_ utility of the SRA Toolkit.  The following applies:
 
------
-
-**Uploading multiple datasets using a list of accessions**
-
-A more realistic scenario is when you want to upload a number of datasets at once. To do this you need a list of accession, where there is only one accession per line (see below for information on how to generate such a file). Once you have this file:
+ - if data is paired-ended (or mate-pair) the tool will generate a collection of file pairs, in which each element will be a pair of fastq_ files containing forward and reverse mates.
+ - if data is single ended, each element of the collection will be a single fastq_ dataset.
 
- 1. Upload it into your history using Galaxy's upload tool
- 2. Once the list of accessions is uploaded choose *List of SRA accessions, one per line* from **select input type** dropdown
- 3. Choose uploaded file within the **sra accession list** field
- 4. Click **Execute**
 
------
-
-**Extract data from already uploaded SRA dataset**
-
-If a SRA dataset is present in the history, it can be converted into fastq dataset by setting **select input type** drop-down to *SRA archive in current history*. Just like in the case of extracting data for single accession number the following applies:
-
- - if data is paired-ended (or mate-pair) the tool will generate a single *interleaved* dataset, in which forward and reverse mates are alternating (see example below).
- - if data is single ended, a standard fastq dataset will be produced
+@HOW_TO_USE_IT@
 
 -----
 
 **Output**
 
-In every case, fastq datasets produced will be saved in Galaxy's history as a collection_ - a single history element containing multiple datasets.
-In fact, three collections will be produced: one containing paired-end data, another containing single-end data, and a third one which contains reads which could not be classified.
-Some collections may be empty if the accessions provided in the list does not contain one of the type of data.
+In every case, fastq datasets produced will be saved in Galaxy's history as a collection_ - a single history element containing multiple datasets. In fact, regardless of the experimental design, three collections will be produced: one containing paired-end data, another containing single-end data, and a third one which contains reads which could not be classified.
+Some collections may be empty if the accessions provided in the list do not contain one of the type of data.
 
 .. class:: warningmark
 
@@ -236,7 +212,7 @@
 .. class:: warningmark
 
 By default, only biological reads are dumped and in case of PAIRED dataset only the spots which have both reads will be in the paired-end collection. The remaining single reads will be in the other colletion.
-To keep all reads, and maybe do not have the same number of reads in forward and reverse use the --split-files option in Advanced Options, Select how to split the spots.
+To keep all reads, and potentially not have the same number of reads in forward and reverse use the --split-files option in Advanced Options, Select how to split the spots.
 
 @ACCESSION_LIST_HOWTO@
 
@@ -244,14 +220,12 @@
 
 
 .. _fastq: https://en.wikipedia.org/wiki/FASTQ_format
-.. _fastq-dump: https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=toolkit_doc&f=fastq-dump
 .. _fasterq-dump: https://github.com/ncbi/sra-tools/wiki/HowTo:-fasterq-dump
 .. _collection: https://galaxyproject.org/tutorials/collections/
-.. _link: https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies
+.. _link: https://trace.ncbi.nlm.nih.gov/Traces/index.html?view=run_browser&display=reads
 
 @SRATOOLS_ATTRRIBUTION@
-
 ]]>
     </help>
     <expand macro="citation"/>
-  </tool>
+</tool>