view fastq_dl.xml @ 0:5e7401777990 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/fastq_dl commit 8da9481e027494c5fd881564d01d9e2ab55fe305
author iuc
date Sat, 16 Nov 2024 18:43:55 +0000
parents
children
line wrap: on
line source

<tool id="fastq_dl" name="fastq-dl" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
    <description>Download FASTQ files from ENA</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="creators"/>
    <command detect_errors="aggressive"><![CDATA[
    mkdir -p single-end paired-end logs &&
    #if str($input_type.select_input_type) == "accession_ids"
        IFS=' ' &&
        read -ra accessionsarr <<< "$accessions" &&
        @FASTQ_DL_FOR_LOOP@
    #elif str($input_type.select_input_type) == "accessions_list"
        mapfile -t accessionsarr < "$accessions_file" &&
        @FASTQ_DL_FOR_LOOP@
    #end if
    #if str($only_download_metadata) == ""
        &&
        find . -maxdepth 1 -name "*_1.fastq.gz" -exec bash -c 'mv "\$0" "paired-end/\$(basename "\$0" | sed "s/_1/_forward/")"' {} \; &&
        find . -maxdepth 1 -name "*_2.fastq.gz" -exec bash -c 'mv "\$0" "paired-end/\$(basename "\$0" | sed "s/_2/_reverse/")"' {} \; &&
        find . -maxdepth 1 -name "*_R1.fastq.gz" -exec bash -c 'mv "\$0" "paired-end/\$(basename "\$0" | sed "s/_R1/_forward/")"' {} \; &&
        find . -maxdepth 1 -name "*_R2.fastq.gz" -exec bash -c 'mv "\$0" "paired-end/\$(basename "\$0" | sed "s/_R2/_reverse/")"' {} \; &&
        mv *.gz single-end > /dev/null 2>&1 || true
    #end if
    ]]></command>
    <inputs>
        <conditional name="input_type">
            <param name="select_input_type" type="select" label="Select an input type">
                <option value="accession_ids">ENA accession IDs</option>
                <option value="accessions_list">A list of ENA accession IDs, one per row</option>
            </param>
            <when value="accession_ids">
                <param name="accessions" type="text" label="Accession IDs" help="ENA accessions (Study, Sample, Experiment, Run accession) separated by whitespaces" optional="false" />
            </when>
            <when value="accessions_list">
                <param name="accessions_file" type="data" format="txt" label="Accession IDs File" help="ENA accessions (Study, Sample, Experiment, Run accession) stored in a file. One accession per line" optional="false" />
            </when>
        </conditional>
        <param name="group_by_experiment" type="boolean" label="Group by Experiment" help="Group Runs by experiment accession" truevalue="--group-by-experiment" falsevalue="" />
        <param name="group_by_sample" type="boolean" label="Group by Sample" help="Group Runs by sample accession" truevalue="--group-by-sample" falsevalue="" />
        <param name="only_download_metadata" type="boolean" label="Only Download Metadata" help="Skip FASTQ download and retrieve metadata only" truevalue="--only-download-metadata" falsevalue="" />
    </inputs>
    <outputs>
        <collection name="metadata" type="list" label="Metadata files">
            <discover_datasets pattern="(?P&lt;designation&gt;.+)\-fastq-run-info.tsv" directory="logs" ext="tsv" />
        </collection>
        <collection name="single_end_collection" type="list" label="Single-end data">
            <filter>only_download_metadata == False</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fastq\.gz" directory="single-end" ext="fastq.gz" />
        </collection>
        <collection name="paired_end_collection" type="list:paired" label="Paired-end data">
            <filter>only_download_metadata == False</filter>
            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;identifier_1&gt;[^_]+)\.fastq.gz" directory="paired-end" ext="fastq.gz" />
        </collection>
    </outputs>
    <tests>
        <!-- #1 Testing single end and paired end using accessions file -->
        <test expect_num_outputs="3">
            <param name="select_input_type" value="accessions_list" />
            <param name="accessions_file" value="accessions.txt" />
            <output_collection name="metadata" type="list" count="4">
                <element name="DRR011117" file="Metadata_files/DRR011117.tsv" />
                <element name="ERR2651925" file="Metadata_files/ERR2651925.tsv" />
                <element name="ERR4319712" file="Metadata_files/ERR4319712.tsv" />
                <element name="SRR9678965" file="Metadata_files/SRR9678965.tsv" />
            </output_collection>
            <output_collection name="single_end_collection" type="list" count="2">
                <element name="DRR011117" decompress="True">
                    <assert_contents>
                        <has_text text="@DRR011117.1 HXVJWSB01AD414/4" />
                        <has_size size="23102" />
                    </assert_contents>
                </element>
                <element name="SRR9678965" decompress="True">
                    <assert_contents>
                        <has_text text="@SRR9678965.1 HQCI9RE01A6I97/2" />
                        <has_size size="2465043" />
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="paired_end_collection" type="list:paired" count="2">
                <element name="ERR2651925" decompress="True">
                    <element name="forward" decompress="True">
                        <assert_contents>
                            <has_text text="@ERR2651925.1 M01945:48:000000000-B9G5G:1:1102:16788:1675/1" />
                            <has_size size="4977454" />
                        </assert_contents>
                    </element>
                    <element name="reverse" decompress="True">
                        <assert_contents>
                            <has_text text="@ERR2651925.1 M01945:48:000000000-B9G5G:1:1102:16788:1675/2" />
                            <has_size size="6079979" />
                        </assert_contents>
                    </element>
                </element>
                <element name="ERR4319712" decompress="True">
                    <element name="forward" decompress="True">
                        <assert_contents>
                            <has_text text="@ERR4319712.1 M02944:93:000000000-ALWFJ:1:2105:13646:2309/1" />
                            <has_size size="2104680" />
                        </assert_contents>
                    </element>
                    <element name="reverse" decompress="True">
                        <assert_contents>
                            <has_text text="@ERR4319712.1 M02944:93:000000000-ALWFJ:1:2105:13646:2309/2" />
                            <has_size size="2578613" />
                        </assert_contents>
                    </element>
                </element>
            </output_collection>
        </test>
        <!-- #2 Testing single end and paired end using accessions as text input -->
        <test expect_num_outputs="3">
            <param name="select_input_type" value="accession_ids" />
            <param name="accessions" value="ERR4319712 DRR011117 ERR2651925 SRR9678965" />
            <output_collection name="metadata" type="list" count="4">
                <element name="DRR011117" file="Metadata_files/DRR011117.tsv" />
                <element name="ERR2651925" file="Metadata_files/ERR2651925.tsv" />
                <element name="ERR4319712" file="Metadata_files/ERR4319712.tsv" />
                <element name="SRR9678965" file="Metadata_files/SRR9678965.tsv" />
            </output_collection>
            <output_collection name="single_end_collection" type="list" count="2">
                <element name="DRR011117">
                    <assert_contents>
                        <has_size size="23102" />
                    </assert_contents>
                </element>
                <element name="SRR9678965">
                    <assert_contents>
                        <has_size size="2465043" />
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="paired_end_collection" type="list:paired" count="2">
                <element name="ERR2651925">
                    <element name="forward">
                        <assert_contents>
                            <has_size size="4977454" />
                        </assert_contents>
                    </element>
                    <element name="reverse">
                        <assert_contents>
                            <has_size size="6079979" />
                        </assert_contents>
                    </element>
                </element>
                <element name="ERR4319712">
                    <element name="forward">
                        <assert_contents>
                            <has_size size="2104680" />
                        </assert_contents>
                    </element>
                    <element name="reverse">
                        <assert_contents>
                            <has_size size="2578613" />
                        </assert_contents>
                    </element>
                </element>
            </output_collection>
        </test>
        <!-- #3 Testing only download metadata -->
        <test expect_num_outputs="1">
            <param name="select_input_type" value="accessions_list" />
            <param name="accessions_file" value="accessions.txt" />
            <param name="only_download_metadata" value="--only-download-metadata" />
            <output_collection name="metadata" type="list" count="4">
                <element name="DRR011117" file="Metadata_files/DRR011117.tsv" />
                <element name="ERR2651925" file="Metadata_files/ERR2651925.tsv" />
                <element name="ERR4319712" file="Metadata_files/ERR4319712.tsv" />
                <element name="SRR9678965" file="Metadata_files/SRR9678965.tsv" />
            </output_collection>
        </test>
        <!-- #4 Testing group by experiment -->
        <test expect_num_outputs="3">
            <param name="select_input_type" value="accessions_list" />
            <param name="accessions_file" value="accessions.txt" />
            <param name="group_by_experiment" value="--group-by-experiment" />
            <output_collection name="metadata" type="list" count="4">
                <element name="DRR011117" file="Metadata_files/DRR011117.tsv" />
                <element name="ERR2651925" file="Metadata_files/ERR2651925.tsv" />
                <element name="ERR4319712" file="Metadata_files/ERR4319712.tsv" />
                <element name="SRR9678965" file="Metadata_files/SRR9678965.tsv" />
            </output_collection>
            <output_collection name="single_end_collection" type="list" count="2">
                <element name="DRX010073">
                    <assert_contents>
                        <has_size size="23102" />
                    </assert_contents>
                </element>
                <element name="SRX6439351">
                    <assert_contents>
                        <has_size size="2465043" />
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="paired_end_collection" type="list:paired" count="2">
                <element name="ERX2668415">
                    <element name="forward">
                        <assert_contents>
                            <has_size size="4977454" />
                        </assert_contents>
                    </element>
                    <element name="reverse">
                        <assert_contents>
                            <has_size size="6079979" />
                        </assert_contents>
                    </element>
                </element>
                <element name="ERX4268079">
                    <element name="forward">
                        <assert_contents>
                            <has_size size="2104680" />
                        </assert_contents>
                    </element>
                    <element name="reverse">
                        <assert_contents>
                            <has_size size="2578613" />
                        </assert_contents>
                    </element>
                </element>
            </output_collection>
        </test>
        <!-- #5 Testing group by sample -->
        <test expect_num_outputs="3">
            <param name="select_input_type" value="accessions_list" />
            <param name="accessions_file" value="accessions.txt" />
            <param name="group_by_sample" value="--group-by-sample" />
            <output_collection name="metadata" type="list" count="4">
                <element name="DRR011117" file="Metadata_files/DRR011117.tsv" />
                <element name="ERR2651925" file="Metadata_files/ERR2651925.tsv" />
                <element name="ERR4319712" file="Metadata_files/ERR4319712.tsv" />
                <element name="SRR9678965" file="Metadata_files/SRR9678965.tsv" />
            </output_collection>
            <output_collection name="single_end_collection" type="list" count="2">
                <element name="SAMD00008419">
                    <assert_contents>
                        <has_size size="23102" />
                    </assert_contents>
                </element>
                <element name="SAMN12272107">
                    <assert_contents>
                        <has_size size="2465043" />
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="paired_end_collection" type="list:paired" count="2">
                <element name="SAMEA4724129">
                    <element name="forward">
                        <assert_contents>
                            <has_size size="4977454" />
                        </assert_contents>
                    </element>
                    <element name="reverse">
                        <assert_contents>
                            <has_size size="6079979" />
                        </assert_contents>
                    </element>
                </element>
                <element name="SAMEA7040559">
                    <element name="forward">
                        <assert_contents>
                            <has_size size="2104680" />
                        </assert_contents>
                    </element>
                    <element name="reverse">
                        <assert_contents>
                            <has_size size="2578613" />
                        </assert_contents>
                    </element>
                </element>
            </output_collection>
        </test>
    </tests>
    <help><![CDATA[

This tool downloads FASTQ files from the European Nucleotide Archive (ENA) based on a list of ENA accession IDs.
You can provide either accession IDs in text format or upload a file containing accession IDs (one per line).
The tool also allows you to group downloaded data by experiment or sample and can optionally retrieve only metadata
without downloading the FASTQ files.

Input Types
-----------

You can select from two types of inputs:

1. **ENA Accession IDs (Text Input)**:
   - Provide a list of ENA accession IDs (e.g., Study, Sample, Experiment, or Run accessions) separated by whitespace.
   
2. **Accession IDs File**:
   - Provide a file containing a list of ENA accession IDs, one per line.

Parameters
----------

- **Group by Experiment**:  
   This option groups the downloaded runs by the experiment accession, which can be useful if you need to process 
   data related to a specific experiment.

- **Group by Sample**:  
   This option groups the downloaded runs by the sample accession.

- **Only Download Metadata**:  
   Select this option if you only want to retrieve metadata without downloading the actual FASTQ files. This is 
   useful if you need information about the runs but do not need the raw sequence data.

Outputs
-------

The tool generates three types of outputs:

1. **Metadata Files**:  
   This collection contains metadata files for each accession, in `.tsv` format, which provide details about the 
   corresponding run.

2. **Single-End Data**:  
   If the input FASTQ files contain single-end reads, those files will be placed into a separate collection.
   In `.fastq.gz` format.

3. **Paired-End Data**:  
   If the input FASTQ files contain paired-end reads, those files will be grouped into pairs (forward and reverse).
   The paired files will also be placed in a separate collection and will be in `.fastq.gz` format.

    ]]></help>
    <expand macro="citations"/>
</tool>