changeset 0:27e997d5c273 draft default tip

"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/qc/fastq_quality_trimmer commit b8f23cd5bce0118cc9aab61253056b596e42a06f"
author ebi-gxa
date Fri, 11 Mar 2022 03:56:00 +0000
parents
children
files fastq_quality_trimmer.xml get_test_data.sh test-data/fastq_quality_trimmer.fastq test-data/fastq_quality_trimmer.out.fq
diffstat 4 files changed, 207 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_quality_trimmer.xml	Fri Mar 11 03:56:00 2022 +0000
@@ -0,0 +1,117 @@
+<tool id="cshl_fastq_quality_trimmer" name="Trim By Quality" profile="18.01" version="0.0.14+galaxy0">
+	<description>Trims reads at the 3'-end based on quality, discarding reads with trimmed length less than threshold.</description>
+    <requirements>
+        <requirement type="package" version="0.0.14">fastx_toolkit</requirement>
+    </requirements>
+	<command><![CDATA[
+        fastq_quality_trimmer 
+        -i '$input'
+        -t '$cutoff' 
+        -l '$minlen' 
+        -Q '$phred'
+        -v 
+        -o '$output'
+        ]]></command>
+
+	<inputs>
+        <param name="input" label="Library to clip" argument="-i" type="data" format="fastq,fastqsanger" optional="false" />
+		<param name="cutoff" size="4" type="integer" value="20">
+			<label>Minimum quality score</label>
+			<help>Nucleotides below this quality will be trimmed</help>
+		</param>
+		<param name="minlen" size="4" type="integer" value="1">
+			<label>Minimum sequence length</label>
+			<help>Sequences shorter than this length will be discard. Leave at zero to keep all sequences</help>
+		</param>
+        <param name="phred" label="PHRED Encoding" argument="-Q" type="select" help="PHRED encoding used in the input files">
+            <option value="33" selected="true">33</option>
+            <option value="64">64</option>
+        </param>
+	</inputs>
+
+    <outputs>
+        <data label="${tool.name} on ${on_string}: Trimmed fastq" name="output" format_source="input" />
+    </outputs>
+
+	<tests>
+		<test>
+			<param name="input" value="fastq_quality_trimmer.fastq" ftype="fastq" />
+			<param name="cutoff" value="30"/>
+			<param name="minlen" value="16"/>
+			<param name="phred" value="64"/>
+            <output name="output" md5="5173e32392286a05e1317387cc0d567e" />
+		</test>
+	</tests>
+
+	<help>
+**What it does**
+
+This tool scans the sequence from the end for the first nucleotide to possess the specified minimum quality score. It will then trim (remove nucleotides from) the sequence after this position. After trimming, sequences that are shorter than the minimum length are discarded.
+  
+--------
+
+**Example**
+
+Input Fasta file (with 20 bases in each sequences)::
+
+    @1
+    TATGGTCAGAAACCATATGC
+    +1
+    40 40 40 40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19
+    @2
+    CAGCGAGGCTTTAATGCCAT
+    +2
+    40 40 40 40 40 40 40 40 30 20 19 20 19 19 19 19 19 19 19 19
+    @3
+    CAGCGAGGCTTTAATGCCAT
+    +3
+    40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19 19 19 19
+    
+
+Trimming with a cutoff of 20, we get the following FASTQ file::
+
+    @1
+    TATGGTCAGAAA
+    +1
+    40 40 40 40 40 40 40 40 40 40 40 20
+    @2
+    CAGCGAGGCTTT
+    +2
+    40 40 40 40 40 40 40 40 30 20 19 20
+    @3
+    CAGCGAGGC
+    +3
+    40 40 40 40 40 40 40 40 20
+
+Trimming with a cutoff of 20 and a minimum length of 12, we get the following FASTQ file::
+
+    @1
+    TATGGTCAGAAA
+    +1
+    40 40 40 40 40 40 40 40 40 40 40 20
+    @2
+    CAGCGAGGCTTT
+    +2
+    40 40 40 40 40 40 40 40 30 20 19 20
+    
+------
+
+This tool is based on `FASTX-toolkit`__ by Assaf Gordon.
+
+ .. __: http://hannonlab.cshl.edu/fastx_toolkit/
+    
+    </help>
+    <citations>
+        <citation type="bibtex"><![CDATA[
+            @ARTICLE{Gordon2009,
+            author = {Gordon, A.},
+            title = {fastx_toolkit},
+            year = {2009},
+            publisher = {GitHub},
+            journal = {GitHub repository},
+            howpublished = {\url{https://github.com/agordon/fastx_toolkit}},
+            commit = {ea0ca83ba24dce80c20ca589b838a281fe5deb0c}
+}]]></citation>
+    </citations>
+</tool>
+<!-- FASTX-Quality-Trimmer is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_test_data.sh	Fri Mar 11 03:56:00 2022 +0000
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+BASE_LINK="https://raw.githubusercontent.com/agordon/fastx_toolkit/master/galaxy/test-data"
+
+FQ_FILE="fastq_quality_trimmer.fastq"
+
+FQ_LINK=$BASE_LINK"/"$FQ_FILE
+
+function get_data {
+  local link=$1
+  local fname=$2
+
+  if [ ! -f $fname ]; then
+    echo "$fname not available locally, downloading.."
+    wget -O $fname --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 $link
+  fi
+}
+
+# Get test data
+pushd test-data
+
+get_data $FQ_LINK $FQ_FILE
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastq_quality_trimmer.fastq	Fri Mar 11 03:56:00 2022 +0000
@@ -0,0 +1,36 @@
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]`
+@CSHL_3_FC042AGLLWW:1:2:7:33
+CAATGCCTCCAATTGGTTAATCCCCCTATATATACT
++CSHL_3_FC042AGLLWW:1:2:7:33
+Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa
+@CSHL_3_FC042AGLLWW:1:2:7:169
+GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC
++CSHL_3_FC042AGLLWW:1:2:7:169
+a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR
+@CSHL_3_FC042AGLLWW:1:2:7:1436
+AATTATTTATTAAATTTTAATAATATGGGAGACACT
++CSHL_3_FC042AGLLWW:1:2:7:1436
+a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[
+@CSHL_3_FC042AGLLWW:1:2:7:292
+GGAGAAATACACACAATTGGTTAATCCCCCTATATA
++CSHL_3_FC042AGLLWW:1:2:7:292
+babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE
+@CSHL_3_FC042AGLLWW:1:2:7:1819
+AATTCAAACCACCCCAACCCACACACAGAGATACAA
++CSHL_3_FC042AGLLWW:1:2:7:1819
+a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU
+@CSHL_3_FC042AGLLWW:1:2:7:1875
+GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC
++CSHL_3_FC042AGLLWW:1:2:7:1875
+aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU
+@CSHL_3_FC042AGLLWW:1:2:8:624
+ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG
++CSHL_3_FC042AGLLWW:1:2:8:624
+aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa
+@CSHL_3_FC042AGLLWW:1:2:8:250
+TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA
++CSHL_3_FC042AGLLWW:1:2:8:250
+aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastq_quality_trimmer.out.fq	Fri Mar 11 03:56:00 2022 +0000
@@ -0,0 +1,32 @@
+@CSHL_3_FC042AGLLWW:1:2:7:203
+GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT
++CSHL_3_FC042AGLLWW:1:2:7:203
+aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]`
+@CSHL_3_FC042AGLLWW:1:2:7:33
+CAATGCCTCCAATTGGTTAATCCCCCTATATATACT
++CSHL_3_FC042AGLLWW:1:2:7:33
+Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa
+@CSHL_3_FC042AGLLWW:1:2:7:169
+GCAGCAGGCGCGTCAGAGAGCCCCCCCC
++CSHL_3_FC042AGLLWW:1:2:7:169
+a_M^a\Uaaa_M_aaaaaaaaaaaaaaa
+@CSHL_3_FC042AGLLWW:1:2:7:1436
+AATTATTTATTAAATTTTAATAATATGGGAGAC
++CSHL_3_FC042AGLLWW:1:2:7:1436
+a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaa
+@CSHL_3_FC042AGLLWW:1:2:7:292
+GGAGAAATACACACAATTGGTTAATCCCCCT
++CSHL_3_FC042AGLLWW:1:2:7:292
+babaaaaaaaUMaaaaaaaaaaa\XEUUEP_
+@CSHL_3_FC042AGLLWW:1:2:7:1875
+GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCC
++CSHL_3_FC042AGLLWW:1:2:7:1875
+aaaaaaaaaXUXXEXaaaaa`_Zaaaaaaaaa
+@CSHL_3_FC042AGLLWW:1:2:8:624
+ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG
++CSHL_3_FC042AGLLWW:1:2:8:624
+aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa
+@CSHL_3_FC042AGLLWW:1:2:8:250
+TGCCGCGCACACTGATGCAATTGGTTAAT
++CSHL_3_FC042AGLLWW:1:2:8:250
+aaaaaaaa^aaaaaabbb[KXPEU[RXZ^