Mercurial > repos > iuc > fastq_dl
diff fastq_dl.xml @ 0:5e7401777990 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/fastq_dl commit 8da9481e027494c5fd881564d01d9e2ab55fe305
author | iuc |
---|---|
date | Sat, 16 Nov 2024 18:43:55 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_dl.xml Sat Nov 16 18:43:55 2024 +0000 @@ -0,0 +1,326 @@ +<tool id="fastq_dl" name="fastq-dl" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> + <description>Download FASTQ files from ENA</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="creators"/> + <command detect_errors="aggressive"><![CDATA[ + mkdir -p single-end paired-end logs && + #if str($input_type.select_input_type) == "accession_ids" + IFS=' ' && + read -ra accessionsarr <<< "$accessions" && + @FASTQ_DL_FOR_LOOP@ + #elif str($input_type.select_input_type) == "accessions_list" + mapfile -t accessionsarr < "$accessions_file" && + @FASTQ_DL_FOR_LOOP@ + #end if + #if str($only_download_metadata) == "" + && + find . -maxdepth 1 -name "*_1.fastq.gz" -exec bash -c 'mv "\$0" "paired-end/\$(basename "\$0" | sed "s/_1/_forward/")"' {} \; && + find . -maxdepth 1 -name "*_2.fastq.gz" -exec bash -c 'mv "\$0" "paired-end/\$(basename "\$0" | sed "s/_2/_reverse/")"' {} \; && + find . -maxdepth 1 -name "*_R1.fastq.gz" -exec bash -c 'mv "\$0" "paired-end/\$(basename "\$0" | sed "s/_R1/_forward/")"' {} \; && + find . -maxdepth 1 -name "*_R2.fastq.gz" -exec bash -c 'mv "\$0" "paired-end/\$(basename "\$0" | sed "s/_R2/_reverse/")"' {} \; && + mv *.gz single-end > /dev/null 2>&1 || true + #end if + ]]></command> + <inputs> + <conditional name="input_type"> + <param name="select_input_type" type="select" label="Select an input type"> + <option value="accession_ids">ENA accession IDs</option> + <option value="accessions_list">A list of ENA accession IDs, one per row</option> + </param> + <when value="accession_ids"> + <param name="accessions" type="text" label="Accession IDs" help="ENA accessions (Study, Sample, Experiment, Run accession) separated by whitespaces" optional="false" /> + </when> + <when value="accessions_list"> + <param name="accessions_file" type="data" format="txt" label="Accession IDs File" help="ENA accessions (Study, Sample, Experiment, Run accession) stored in a file. One accession per line" optional="false" /> + </when> + </conditional> + <param name="group_by_experiment" type="boolean" label="Group by Experiment" help="Group Runs by experiment accession" truevalue="--group-by-experiment" falsevalue="" /> + <param name="group_by_sample" type="boolean" label="Group by Sample" help="Group Runs by sample accession" truevalue="--group-by-sample" falsevalue="" /> + <param name="only_download_metadata" type="boolean" label="Only Download Metadata" help="Skip FASTQ download and retrieve metadata only" truevalue="--only-download-metadata" falsevalue="" /> + </inputs> + <outputs> + <collection name="metadata" type="list" label="Metadata files"> + <discover_datasets pattern="(?P<designation>.+)\-fastq-run-info.tsv" directory="logs" ext="tsv" /> + </collection> + <collection name="single_end_collection" type="list" label="Single-end data"> + <filter>only_download_metadata == False</filter> + <discover_datasets pattern="(?P<designation>.+)\.fastq\.gz" directory="single-end" ext="fastq.gz" /> + </collection> + <collection name="paired_end_collection" type="list:paired" label="Paired-end data"> + <filter>only_download_metadata == False</filter> + <discover_datasets pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[^_]+)\.fastq.gz" directory="paired-end" ext="fastq.gz" /> + </collection> + </outputs> + <tests> + <!-- #1 Testing single end and paired end using accessions file --> + <test expect_num_outputs="3"> + <param name="select_input_type" value="accessions_list" /> + <param name="accessions_file" value="accessions.txt" /> + <output_collection name="metadata" type="list" count="4"> + <element name="DRR011117" file="Metadata_files/DRR011117.tsv" /> + <element name="ERR2651925" file="Metadata_files/ERR2651925.tsv" /> + <element name="ERR4319712" file="Metadata_files/ERR4319712.tsv" /> + <element name="SRR9678965" file="Metadata_files/SRR9678965.tsv" /> + </output_collection> + <output_collection name="single_end_collection" type="list" count="2"> + <element name="DRR011117" decompress="True"> + <assert_contents> + <has_text text="@DRR011117.1 HXVJWSB01AD414/4" /> + <has_size size="23102" /> + </assert_contents> + </element> + <element name="SRR9678965" decompress="True"> + <assert_contents> + <has_text text="@SRR9678965.1 HQCI9RE01A6I97/2" /> + <has_size size="2465043" /> + </assert_contents> + </element> + </output_collection> + <output_collection name="paired_end_collection" type="list:paired" count="2"> + <element name="ERR2651925" decompress="True"> + <element name="forward" decompress="True"> + <assert_contents> + <has_text text="@ERR2651925.1 M01945:48:000000000-B9G5G:1:1102:16788:1675/1" /> + <has_size size="4977454" /> + </assert_contents> + </element> + <element name="reverse" decompress="True"> + <assert_contents> + <has_text text="@ERR2651925.1 M01945:48:000000000-B9G5G:1:1102:16788:1675/2" /> + <has_size size="6079979" /> + </assert_contents> + </element> + </element> + <element name="ERR4319712" decompress="True"> + <element name="forward" decompress="True"> + <assert_contents> + <has_text text="@ERR4319712.1 M02944:93:000000000-ALWFJ:1:2105:13646:2309/1" /> + <has_size size="2104680" /> + </assert_contents> + </element> + <element name="reverse" decompress="True"> + <assert_contents> + <has_text text="@ERR4319712.1 M02944:93:000000000-ALWFJ:1:2105:13646:2309/2" /> + <has_size size="2578613" /> + </assert_contents> + </element> + </element> + </output_collection> + </test> + <!-- #2 Testing single end and paired end using accessions as text input --> + <test expect_num_outputs="3"> + <param name="select_input_type" value="accession_ids" /> + <param name="accessions" value="ERR4319712 DRR011117 ERR2651925 SRR9678965" /> + <output_collection name="metadata" type="list" count="4"> + <element name="DRR011117" file="Metadata_files/DRR011117.tsv" /> + <element name="ERR2651925" file="Metadata_files/ERR2651925.tsv" /> + <element name="ERR4319712" file="Metadata_files/ERR4319712.tsv" /> + <element name="SRR9678965" file="Metadata_files/SRR9678965.tsv" /> + </output_collection> + <output_collection name="single_end_collection" type="list" count="2"> + <element name="DRR011117"> + <assert_contents> + <has_size size="23102" /> + </assert_contents> + </element> + <element name="SRR9678965"> + <assert_contents> + <has_size size="2465043" /> + </assert_contents> + </element> + </output_collection> + <output_collection name="paired_end_collection" type="list:paired" count="2"> + <element name="ERR2651925"> + <element name="forward"> + <assert_contents> + <has_size size="4977454" /> + </assert_contents> + </element> + <element name="reverse"> + <assert_contents> + <has_size size="6079979" /> + </assert_contents> + </element> + </element> + <element name="ERR4319712"> + <element name="forward"> + <assert_contents> + <has_size size="2104680" /> + </assert_contents> + </element> + <element name="reverse"> + <assert_contents> + <has_size size="2578613" /> + </assert_contents> + </element> + </element> + </output_collection> + </test> + <!-- #3 Testing only download metadata --> + <test expect_num_outputs="1"> + <param name="select_input_type" value="accessions_list" /> + <param name="accessions_file" value="accessions.txt" /> + <param name="only_download_metadata" value="--only-download-metadata" /> + <output_collection name="metadata" type="list" count="4"> + <element name="DRR011117" file="Metadata_files/DRR011117.tsv" /> + <element name="ERR2651925" file="Metadata_files/ERR2651925.tsv" /> + <element name="ERR4319712" file="Metadata_files/ERR4319712.tsv" /> + <element name="SRR9678965" file="Metadata_files/SRR9678965.tsv" /> + </output_collection> + </test> + <!-- #4 Testing group by experiment --> + <test expect_num_outputs="3"> + <param name="select_input_type" value="accessions_list" /> + <param name="accessions_file" value="accessions.txt" /> + <param name="group_by_experiment" value="--group-by-experiment" /> + <output_collection name="metadata" type="list" count="4"> + <element name="DRR011117" file="Metadata_files/DRR011117.tsv" /> + <element name="ERR2651925" file="Metadata_files/ERR2651925.tsv" /> + <element name="ERR4319712" file="Metadata_files/ERR4319712.tsv" /> + <element name="SRR9678965" file="Metadata_files/SRR9678965.tsv" /> + </output_collection> + <output_collection name="single_end_collection" type="list" count="2"> + <element name="DRX010073"> + <assert_contents> + <has_size size="23102" /> + </assert_contents> + </element> + <element name="SRX6439351"> + <assert_contents> + <has_size size="2465043" /> + </assert_contents> + </element> + </output_collection> + <output_collection name="paired_end_collection" type="list:paired" count="2"> + <element name="ERX2668415"> + <element name="forward"> + <assert_contents> + <has_size size="4977454" /> + </assert_contents> + </element> + <element name="reverse"> + <assert_contents> + <has_size size="6079979" /> + </assert_contents> + </element> + </element> + <element name="ERX4268079"> + <element name="forward"> + <assert_contents> + <has_size size="2104680" /> + </assert_contents> + </element> + <element name="reverse"> + <assert_contents> + <has_size size="2578613" /> + </assert_contents> + </element> + </element> + </output_collection> + </test> + <!-- #5 Testing group by sample --> + <test expect_num_outputs="3"> + <param name="select_input_type" value="accessions_list" /> + <param name="accessions_file" value="accessions.txt" /> + <param name="group_by_sample" value="--group-by-sample" /> + <output_collection name="metadata" type="list" count="4"> + <element name="DRR011117" file="Metadata_files/DRR011117.tsv" /> + <element name="ERR2651925" file="Metadata_files/ERR2651925.tsv" /> + <element name="ERR4319712" file="Metadata_files/ERR4319712.tsv" /> + <element name="SRR9678965" file="Metadata_files/SRR9678965.tsv" /> + </output_collection> + <output_collection name="single_end_collection" type="list" count="2"> + <element name="SAMD00008419"> + <assert_contents> + <has_size size="23102" /> + </assert_contents> + </element> + <element name="SAMN12272107"> + <assert_contents> + <has_size size="2465043" /> + </assert_contents> + </element> + </output_collection> + <output_collection name="paired_end_collection" type="list:paired" count="2"> + <element name="SAMEA4724129"> + <element name="forward"> + <assert_contents> + <has_size size="4977454" /> + </assert_contents> + </element> + <element name="reverse"> + <assert_contents> + <has_size size="6079979" /> + </assert_contents> + </element> + </element> + <element name="SAMEA7040559"> + <element name="forward"> + <assert_contents> + <has_size size="2104680" /> + </assert_contents> + </element> + <element name="reverse"> + <assert_contents> + <has_size size="2578613" /> + </assert_contents> + </element> + </element> + </output_collection> + </test> + </tests> + <help><![CDATA[ + +This tool downloads FASTQ files from the European Nucleotide Archive (ENA) based on a list of ENA accession IDs. +You can provide either accession IDs in text format or upload a file containing accession IDs (one per line). +The tool also allows you to group downloaded data by experiment or sample and can optionally retrieve only metadata +without downloading the FASTQ files. + +Input Types +----------- + +You can select from two types of inputs: + +1. **ENA Accession IDs (Text Input)**: + - Provide a list of ENA accession IDs (e.g., Study, Sample, Experiment, or Run accessions) separated by whitespace. + +2. **Accession IDs File**: + - Provide a file containing a list of ENA accession IDs, one per line. + +Parameters +---------- + +- **Group by Experiment**: + This option groups the downloaded runs by the experiment accession, which can be useful if you need to process + data related to a specific experiment. + +- **Group by Sample**: + This option groups the downloaded runs by the sample accession. + +- **Only Download Metadata**: + Select this option if you only want to retrieve metadata without downloading the actual FASTQ files. This is + useful if you need information about the runs but do not need the raw sequence data. + +Outputs +------- + +The tool generates three types of outputs: + +1. **Metadata Files**: + This collection contains metadata files for each accession, in `.tsv` format, which provide details about the + corresponding run. + +2. **Single-End Data**: + If the input FASTQ files contain single-end reads, those files will be placed into a separate collection. + In `.fastq.gz` format. + +3. **Paired-End Data**: + If the input FASTQ files contain paired-end reads, those files will be grouped into pairs (forward and reverse). + The paired files will also be placed in a separate collection and will be in `.fastq.gz` format. + + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file