# HG changeset patch # User ebi-gxa # Date 1646970960 0 # Node ID 27e997d5c273bdb04935bd73dd5fc592b14707d9 "planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/qc/fastq_quality_trimmer commit b8f23cd5bce0118cc9aab61253056b596e42a06f" diff -r 000000000000 -r 27e997d5c273 fastq_quality_trimmer.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_quality_trimmer.xml Fri Mar 11 03:56:00 2022 +0000 @@ -0,0 +1,117 @@ + + Trims reads at the 3'-end based on quality, discarding reads with trimmed length less than threshold. + + fastx_toolkit + + + + + + + + Nucleotides below this quality will be trimmed + + + + Sequences shorter than this length will be discard. Leave at zero to keep all sequences + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This tool scans the sequence from the end for the first nucleotide to possess the specified minimum quality score. It will then trim (remove nucleotides from) the sequence after this position. After trimming, sequences that are shorter than the minimum length are discarded. + +-------- + +**Example** + +Input Fasta file (with 20 bases in each sequences):: + + @1 + TATGGTCAGAAACCATATGC + +1 + 40 40 40 40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19 + @2 + CAGCGAGGCTTTAATGCCAT + +2 + 40 40 40 40 40 40 40 40 30 20 19 20 19 19 19 19 19 19 19 19 + @3 + CAGCGAGGCTTTAATGCCAT + +3 + 40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19 19 19 19 + + +Trimming with a cutoff of 20, we get the following FASTQ file:: + + @1 + TATGGTCAGAAA + +1 + 40 40 40 40 40 40 40 40 40 40 40 20 + @2 + CAGCGAGGCTTT + +2 + 40 40 40 40 40 40 40 40 30 20 19 20 + @3 + CAGCGAGGC + +3 + 40 40 40 40 40 40 40 40 20 + +Trimming with a cutoff of 20 and a minimum length of 12, we get the following FASTQ file:: + + @1 + TATGGTCAGAAA + +1 + 40 40 40 40 40 40 40 40 40 40 40 20 + @2 + CAGCGAGGCTTT + +2 + 40 40 40 40 40 40 40 40 30 20 19 20 + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + + + + + + + diff -r 000000000000 -r 27e997d5c273 get_test_data.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_test_data.sh Fri Mar 11 03:56:00 2022 +0000 @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +BASE_LINK="https://raw.githubusercontent.com/agordon/fastx_toolkit/master/galaxy/test-data" + +FQ_FILE="fastq_quality_trimmer.fastq" + +FQ_LINK=$BASE_LINK"/"$FQ_FILE + +function get_data { + local link=$1 + local fname=$2 + + if [ ! -f $fname ]; then + echo "$fname not available locally, downloading.." + wget -O $fname --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 $link + fi +} + +# Get test data +pushd test-data + +get_data $FQ_LINK $FQ_FILE diff -r 000000000000 -r 27e997d5c273 test-data/fastq_quality_trimmer.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastq_quality_trimmer.fastq Fri Mar 11 03:56:00 2022 +0000 @@ -0,0 +1,36 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` +@CSHL_3_FC042AGLLWW:1:2:7:33 +CAATGCCTCCAATTGGTTAATCCCCCTATATATACT ++CSHL_3_FC042AGLLWW:1:2:7:33 +Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa +@CSHL_3_FC042AGLLWW:1:2:7:169 +GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:169 +a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR +@CSHL_3_FC042AGLLWW:1:2:7:1436 +AATTATTTATTAAATTTTAATAATATGGGAGACACT ++CSHL_3_FC042AGLLWW:1:2:7:1436 +a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ +@CSHL_3_FC042AGLLWW:1:2:7:292 +GGAGAAATACACACAATTGGTTAATCCCCCTATATA ++CSHL_3_FC042AGLLWW:1:2:7:292 +babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE +@CSHL_3_FC042AGLLWW:1:2:7:1819 +AATTCAAACCACCCCAACCCACACACAGAGATACAA ++CSHL_3_FC042AGLLWW:1:2:7:1819 +a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU +@CSHL_3_FC042AGLLWW:1:2:7:1875 +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:1875 +aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU +@CSHL_3_FC042AGLLWW:1:2:8:624 +ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG ++CSHL_3_FC042AGLLWW:1:2:8:624 +aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa +@CSHL_3_FC042AGLLWW:1:2:8:250 +TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA ++CSHL_3_FC042AGLLWW:1:2:8:250 +aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE diff -r 000000000000 -r 27e997d5c273 test-data/fastq_quality_trimmer.out.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fastq_quality_trimmer.out.fq Fri Mar 11 03:56:00 2022 +0000 @@ -0,0 +1,32 @@ +@CSHL_3_FC042AGLLWW:1:2:7:203 +GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT ++CSHL_3_FC042AGLLWW:1:2:7:203 +aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` +@CSHL_3_FC042AGLLWW:1:2:7:33 +CAATGCCTCCAATTGGTTAATCCCCCTATATATACT ++CSHL_3_FC042AGLLWW:1:2:7:33 +Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa +@CSHL_3_FC042AGLLWW:1:2:7:169 +GCAGCAGGCGCGTCAGAGAGCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:169 +a_M^a\Uaaa_M_aaaaaaaaaaaaaaa +@CSHL_3_FC042AGLLWW:1:2:7:1436 +AATTATTTATTAAATTTTAATAATATGGGAGAC ++CSHL_3_FC042AGLLWW:1:2:7:1436 +a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaa +@CSHL_3_FC042AGLLWW:1:2:7:292 +GGAGAAATACACACAATTGGTTAATCCCCCT ++CSHL_3_FC042AGLLWW:1:2:7:292 +babaaaaaaaUMaaaaaaaaaaa\XEUUEP_ +@CSHL_3_FC042AGLLWW:1:2:7:1875 +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCC ++CSHL_3_FC042AGLLWW:1:2:7:1875 +aaaaaaaaaXUXXEXaaaaa`_Zaaaaaaaaa +@CSHL_3_FC042AGLLWW:1:2:8:624 +ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG ++CSHL_3_FC042AGLLWW:1:2:8:624 +aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa +@CSHL_3_FC042AGLLWW:1:2:8:250 +TGCCGCGCACACTGATGCAATTGGTTAAT ++CSHL_3_FC042AGLLWW:1:2:8:250 +aaaaaaaa^aaaaaabbb[KXPEU[RXZ^