| Next changeset 1:e7c65e398bdd (2013-07-10) |
|
Commit message:
Uploaded |
|
added:
._fasta_clipping_histogram.xml ._fasta_formatter.xml ._fasta_nucleotide_changer.xml ._fastq_masker.xml ._fastq_quality_boxplot.xml ._fastq_quality_converter.xml ._fastq_quality_filter.xml ._fastq_quality_trimmer.xml ._fastq_to_fasta.xml ._fastx_artifacts_filter.xml ._fastx_barcode_splitter.xml ._fastx_barcode_splitter_galaxy_wrapper.sh ._fastx_clipper.xml ._fastx_collapser.xml ._fastx_nucleotides_distribution.xml ._fastx_nucleotides_distribution_line.xml ._fastx_quality_statistics.xml ._fastx_quality_statistics_ng.xml ._fastx_renamer.xml ._fastx_reverse_complement.xml ._fastx_trimmer.xml ._fastx_trimmer_from_end.xml ._fastx_uncollapser.xml ._seqid_uncollapser.xml fasta_clipping_histogram.xml fasta_formatter.xml fasta_nucleotide_changer.xml fastq_masker.xml fastq_quality_boxplot.xml fastq_quality_converter.xml fastq_quality_filter.xml fastq_quality_trimmer.xml fastq_to_fasta.xml fastx_artifacts_filter.xml fastx_barcode_splitter.xml fastx_barcode_splitter_galaxy_wrapper.sh fastx_clipper.xml fastx_collapser.xml fastx_nucleotides_distribution.xml fastx_nucleotides_distribution_line.xml fastx_quality_statistics.xml fastx_quality_statistics_ng.xml fastx_renamer.xml fastx_reverse_complement.xml fastx_trimmer.xml fastx_trimmer_from_end.xml fastx_uncollapser.xml seqid_uncollapser.xml |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fasta_clipping_histogram.xml |
| b |
| Binary file ._fasta_clipping_histogram.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fasta_formatter.xml |
| b |
| Binary file ._fasta_formatter.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fasta_nucleotide_changer.xml |
| b |
| Binary file ._fasta_nucleotide_changer.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastq_masker.xml |
| b |
| Binary file ._fastq_masker.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastq_quality_boxplot.xml |
| b |
| Binary file ._fastq_quality_boxplot.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastq_quality_converter.xml |
| b |
| Binary file ._fastq_quality_converter.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastq_quality_filter.xml |
| b |
| Binary file ._fastq_quality_filter.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastq_quality_trimmer.xml |
| b |
| Binary file ._fastq_quality_trimmer.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastq_to_fasta.xml |
| b |
| Binary file ._fastq_to_fasta.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastx_artifacts_filter.xml |
| b |
| Binary file ._fastx_artifacts_filter.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastx_barcode_splitter.xml |
| b |
| Binary file ._fastx_barcode_splitter.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastx_barcode_splitter_galaxy_wrapper.sh |
| b |
| Binary file ._fastx_barcode_splitter_galaxy_wrapper.sh has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastx_clipper.xml |
| b |
| Binary file ._fastx_clipper.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastx_collapser.xml |
| b |
| Binary file ._fastx_collapser.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastx_nucleotides_distribution.xml |
| b |
| Binary file ._fastx_nucleotides_distribution.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastx_nucleotides_distribution_line.xml |
| b |
| Binary file ._fastx_nucleotides_distribution_line.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastx_quality_statistics.xml |
| b |
| Binary file ._fastx_quality_statistics.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastx_quality_statistics_ng.xml |
| b |
| Binary file ._fastx_quality_statistics_ng.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastx_renamer.xml |
| b |
| Binary file ._fastx_renamer.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastx_reverse_complement.xml |
| b |
| Binary file ._fastx_reverse_complement.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastx_trimmer.xml |
| b |
| Binary file ._fastx_trimmer.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastx_trimmer_from_end.xml |
| b |
| Binary file ._fastx_trimmer_from_end.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._fastx_uncollapser.xml |
| b |
| Binary file ._fastx_uncollapser.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 ._seqid_uncollapser.xml |
| b |
| Binary file ._seqid_uncollapser.xml has changed |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fasta_clipping_histogram.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta_clipping_histogram.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,104 @@ +<tool id="cshl_fasta_clipping_histogram" name="Length Distribution"> + <description>chart</description> + <command>fasta_clipping_histogram.pl $input $outfile</command> + + <inputs> + <param format="fasta" name="input" type="data" label="Library to analyze" /> + </inputs> + + <outputs> + <data format="png" name="outfile" metadata_source="input" + /> + </outputs> +<help> + +**What it does** + +This tool creates a histogram image of sequence lengths distribution in a given fasta dataset file. + +**TIP:** Use this tool after clipping your library (with **FASTX Clipper tool**), to visualize the clipping results. + +----- + +**Output Examples** + +In the following library, most sequences are 24-mers to 27-mers. +This could indicate an abundance of endo-siRNAs (depending of course of what you've tried to sequence in the first place). + +.. image:: ./static/fastx_icons/fasta_clipping_histogram_1.png + + +In the following library, most sequences are 19,22 or 23-mers. +This could indicate an abundance of miRNAs (depending of course of what you've tried to sequence in the first place). + +.. image:: ./static/fastx_icons/fasta_clipping_histogram_2.png + + +----- + + +**Input Formats** + +This tool accepts short-reads FASTA files. The reads don't have to be short, but they do have to be on a single line, like so:: + + >sequence1 + AGTAGTAGGTGATGTAGAGAGAGAGAGAGTAG + >sequence2 + GTGTGTGTGGGAAGTTGACACAGTA + >sequence3 + CCTTGAGATTAACGCTAATCAAGTAAAC + + +If the sequences span over multiple lines:: + + >sequence1 + CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAG + TCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAG + aactggtctttacctTTAAGTTG + +Use the **FASTA Width Formatter** tool to re-format the FASTA into a single-lined sequences:: + + >sequence1 + CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGaactggtctttacctTTAAGTTG + + +----- + + + +**Multiplicity counts (a.k.a reads-count)** + +If the sequence identifier (the text after the '>') contains a dash and a number, it is treated as a multiplicity count value (i.e. how many times that individual sequence repeated in the original FASTA file, before collapsing). + +Example 1 - The following FASTA file *does not* have multiplicity counts:: + + >seq1 + GGATCC + >seq2 + GGTCATGGGTTTAAA + >seq3 + GGGATATATCCCCACACACACACAC + +Each sequence is counts as one, to produce the following chart: + +.. image:: ./static/fastx_icons/fasta_clipping_histogram_3.png + + +Example 2 - The following FASTA file have multiplicity counts:: + + >seq1-2 + GGATCC + >seq2-10 + GGTCATGGGTTTAAA + >seq3-3 + GGGATATATCCCCACACACACACAC + +The first sequence counts as 2, the second as 10, the third as 3, to produce the following chart: + +.. image:: ./static/fastx_icons/fasta_clipping_histogram_4.png + +Use the **FASTA Collapser** tool to create FASTA files with multiplicity counts. + +</help> +</tool> +<!-- FASTA-Clipping-Histogram is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fasta_formatter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta_formatter.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,90 @@ +<tool id="cshl_fasta_formatter" name="FASTA Width"> + <description>formatter</description> + <!-- + Note: + fasta_formatter also has a tabular output mode (-t), + but Galaxy already contains such a tool, so no need + to offer the user a duplicated tool. + + So this XML tool only changes the width (line-wrapping) of a + FASTA file. + --> + <command> + cat '$input' | + fasta_formatter -w $width -o '$output'</command> + <inputs> + <param format="fasta" name="input" type="data" label="Library to re-format" /> + + <param name="width" type="integer" value="0" label="New width for nucleotides strings" help="Use 0 for single line output." /> + </inputs> + + <tests> + <test> + <!-- Re-format a FASTA file into a single line --> + <param name="input" value="fasta_formatter1.fasta" /> + <param name="width" value="0" /> + <output name="output" file="fasta_formatter1.out" /> + </test> + <test> + <!-- Re-format a FASTA file into multiple lines wrapping at 60 charactes --> + <param name="input" value="fasta_formatter1.fasta" /> + <param name="width" value="60" /> + <output name="output" file="fasta_formatter2.out" /> + </test> + </tests> + + <outputs> + <data format="input" name="output" metadata_source="input" + /> + </outputs> + +<help> +**What it does** + +This tool re-formats a FASTA file, changing the width of the nucleotides lines. + +**TIP:** Outputting a single line (with **width = 0**) can be useful for scripting (with **grep**, **awk**, and **perl**). Every odd line is a sequence identifier, and every even line is a nucleotides line. + +-------- + +**Example** + +Input FASTA file (each nucleotides line is 50 characters long):: + + >Scaffold3648 + AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTC + CCTAATGTCAGGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTG + TTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACA + ATTAAAGTCAATTTTAATGAACATGTAGTAAAAACT + >Scaffold9299 + CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAG + TCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAG + aactggtctttacctTTAAGTTG + + +Output FASTA file (with width=80):: + + >Scaffold3648 + AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCAGGGACCTACCTGTTTTTGTT + ATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACAATTAAAGTCA + ATTTTAATGAACATGTAGTAAAAACT + >Scaffold9299 + CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTAC + GTATATGACAAAGCTAATAGaactggtctttacctTTAAGTTG + +Output FASTA file (with width=0 => single line):: + + >Scaffold3648 + AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCAGGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACAATTAAAGTCAATTTTAATGAACATGTAGTAAAAACT + >Scaffold9299 + CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGaactggtctttacctTTAAGTTG + + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ +</help> +</tool> +<!-- FASTQ-to-FASTA is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fasta_nucleotide_changer.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta_nucleotide_changer.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,74 @@ +<tool id="cshl_fasta_nucleotides_changer" name="RNA/DNA" > + <description>converter</description> + <command> + cat '$input' | + fasta_nucleotide_changer $mode -v -o '$output'</command> + <inputs> + <param format="fasta" name="input" type="data" label="Library to convert" /> + + <param name="mode" type="select" label="Convert"> + <option value="-d">RNA to DNA (U to T)</option> + <option value="-r">DNA to RNA (T to U)</option> + </param> + </inputs> + + <tests> + <test> + <!-- DNA-to-RNA --> + <param name="input" value="fasta_nuc_changer1.fasta" /> + <param name="mode" value="DNA to RNA (T to U)" /> + <output name="output" file="fasta_nuc_changer1.out" /> + </test> + <test> + <!-- RNA-to-DNA --> + <param name="input" value="fasta_nuc_changer2.fasta" /> + <param name="mode" value="RNA to DNA (U to T)" /> + <output name="output" file="fasta_nuc_changer2.out" /> + </test> + </tests> + + + <outputs> + <data format="input" name="output" metadata_source="input" + /> + </outputs> + +<help> +**What it does** + +This tool converts RNA FASTA files to DNA (and vice-versa). + +In **RNA-to-DNA** mode, U's are changed into T's. + +In **DNA-to-RNA** mode, T's are changed into U's. + +-------- + +**Example** + +Input RNA FASTA file ( from Sanger's mirBase ):: + + >cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7 + UGAGGUAGUAGGUUGUAUAGUU + >cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4 + UCCCUGAGACCUCAAGUGUGA + >cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1 + UGGAAUGUAAAGAAGUAUGUA + +Output DNA FASTA file (with RNA-to-DNA mode):: + + >cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7 + TGAGGTAGTAGGTTGTATAGTT + >cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4 + TCCCTGAGACCTCAAGTGTGA + >cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1 + TGGAATGTAAAGAAGTATGTA + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ +</help> +</tool> +<!-- FASTQ-to-FASTA is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastq_masker.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_masker.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,87 @@ +<tool id="cshl_fastq_masker" name="Mask nucleotides"> + <description>(based on quality)</description> + <command> + cat '$input' | + fastq_masker +#if $input.ext == "fastqsanger": + -Q 33 +#elif $input.ext == "fastq": + -Q 64 +#end if + -v -q $cutoff -r '$maskchar' -o '$output'</command> + <inputs> + <param format="fastq,fastqsanger" name="input" type="data" label="Library to clip" /> + + <param name="cutoff" size="4" type="integer" value="20"> + <label>Minimum quality score</label> + <help>Nucleotides below this quality will be masked</help> + </param> + + <param name="maskchar" size="1" type="text" value="N"> + <label>Mask character</label> + <help>Replace low-quality nucleotides with this character. Common values: 'N' or '.'</help> + </param> + </inputs> + + <tests> + <test> + <param name="input" value="fastq_masker.fastq" ftype="fastq" /> + <param name="cutoff" value="29"/> + <param name="maskchar" value="x"/> + <output name="output" file="fastq_masker.out" /> + </test> + </tests> + + <outputs> + <data format="input" name="output" metadata_source="input" + /> + </outputs> + <help> +**What it does** + +This tool masks low-quality nucleotides in a FASTQ file, and replaces them with the specifed mask character (**N** by default). + +-------- + +**Example** + +Input FASTQ file:: + + @1 + TATGGTCAGAAACCATATGC + +1 + 40 40 40 40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19 + @2 + CAGCGAGGCTTTAATGCCAT + +2 + 40 40 40 40 40 40 40 40 30 20 19 20 19 19 19 19 19 19 19 19 + @3 + CAGCGAGGCTTTAATGCCAT + +3 + 40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19 19 19 19 + +After Masking nucleotides with quality lower than 20 with the character **N**:: + + @1 + TATGGTCAGAAANNNNNNNN + +1 + 40 40 40 40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19 + @2 + CAGCGAGGCTNTNNNNNNNN + +2 + 40 40 40 40 40 40 40 40 30 20 19 20 19 19 19 19 19 19 19 19 + @3 + CAGCGAGGCNNNNNNNNNNN + +3 + 40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19 19 19 19 + + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> +<!-- FASTQ-Masker part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastq_quality_boxplot.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_quality_boxplot.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,54 @@ +<tool id="cshl_fastq_quality_boxplot" name="Draw quality score boxplot"> + <description></description> + + <command>fastq_quality_boxplot_graph.sh -t '$input.name' -i $input -o '$output'</command> + + <inputs> + <param format="txt" name="input" type="data" label="Statistics report file" help="output of 'FASTQ Statistics' tool" /> + </inputs> + + <outputs> + <data format="png" name="output" metadata_source="input" + /> + </outputs> +<help> + +**What it does** + +Creates a boxplot graph for the quality scores in the library. + +.. class:: infomark + +**TIP:** Use the **FASTQ Statistics** tool to generate the report file needed for this tool. + +----- + +**Output Examples** + +* Black horizontal lines are medians +* Rectangular red boxes show the Inter-quartile Range (IQR) (top value is Q3, bottom value is Q1) +* Whiskers show outlier at max. 1.5*IQR + + +An excellent quality library (median quality is 40 for almost all 36 cycles): + +.. image:: ../static/fastx_icons/fastq_quality_boxplot_1.png + + +A relatively good quality library (median quality degrades towards later cycles): + +.. image:: ../static/fastx_icons/fastq_quality_boxplot_2.png + +A low quality library (median drops quickly): + +.. image:: ../static/fastx_icons/fastq_quality_boxplot_3.png + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> +<!-- FASTQ-Quality-Boxplot is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastq_quality_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_quality_converter.xml Wed Jul 10 06:13:48 2013 -0400 |
| [ |
| @@ -0,0 +1,99 @@ +<tool id="cshl_fastq_quality_converter" name="Quality format converter"> + <description>(ASCII-Numeric)</description> + <command> + cat '$input' | + fastq_quality_converter $QUAL_FORMAT -o '$output' -Q $offset</command> + <inputs> + <param format="fastq,fastqsanger" name="input" type="data" label="Library to convert" /> + + <param name="QUAL_FORMAT" type="select" label="Desired output format"> + <option value="-a">ASCII (letters) quality scores</option> + <option value="-n">Numeric quality scores</option> + </param> + + <param name="offset" type="select" label="FASTQ ASCII offset"> + <option value="33">33</option> + <option selected="true" value="64">64</option> + </param> + </inputs> + + <tests> + <test> + <!-- ASCII to NUMERIC --> + <param name="input" value="fastq_qual_conv1.fastq" /> + <param name="QUAL_FORMAT" value="Numeric quality scores" /> + <param name="offset" value="64" /> + <output name="output" file="fastq_qual_conv1.out" /> + </test> + <test> + <!-- ASCII to ASCII (basically, a no-op, but it should still produce a valid output --> + <param name="input" value="fastq_qual_conv1.fastq" /> + <param name="QUAL_FORMAT" value="ASCII (letters) quality scores" /> + <param name="offset" value="64" /> + <output name="output" file="fastq_qual_conv1a.out" /> + </test> + <test> + <!-- NUMERIC to ASCII --> + <param name="input" value="fastq_qual_conv2.fastq" /> + <param name="QUAL_FORMAT" value="ASCII (letters) quality scores" /> + <param name="offset" value="64" /> + <output name="output" file="fastq_qual_conv2.out" /> + </test> + <test> + <!-- NUMERIC to NUMERIC (basically, a no-op, but it should still produce a valid output --> + <param name="input" value="fastq_qual_conv2.fastq" /> + <param name="QUAL_FORMAT" value="Numeric quality scores" /> + <param name="offset" value="64" /> + <output name="output" file="fastq_qual_conv2n.out" /> + </test> + </tests> + + <outputs> + <data format="input" name="output" metadata_source="input" + /> + </outputs> +<help> + +**What it does** + +Converts a Solexa FASTQ file to/from numeric or ASCII quality format. + +.. class:: warningmark + +Re-scaling is **not** performed. (e.g. conversion from Phred scale to Solexa scale). + + +----- + +FASTQ with Numeric quality scores:: + + @CSHL__2_FC042AGWWWXX:8:1:120:202 + ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC + +CSHL__2_FC042AGWWWXX:8:1:120:202 + 40 40 40 40 20 40 40 40 40 6 40 40 28 40 40 25 40 20 40 -1 30 40 14 27 40 8 1 3 7 -1 11 10 -1 21 10 8 + @CSHL__2_FC042AGWWWXX:8:1:103:1185 + ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC + +CSHL__2_FC042AGWWWXX:8:1:103:1185 + 40 40 40 40 40 35 33 31 40 40 40 32 30 22 40 -0 9 22 17 14 8 36 15 34 22 12 23 3 10 -0 8 2 4 25 30 2 + + +FASTQ with ASCII quality scores:: + + @CSHL__2_FC042AGWWWXX:8:1:120:202 + ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC + +CSHL__2_FC042AGWWWXX:8:1:120:202 + hhhhThhhhFhh\hhYhTh?^hN[hHACG?KJ?UJH + @CSHL__2_FC042AGWWWXX:8:1:103:1185 + ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC + +CSHL__2_FC042AGWWWXX:8:1:103:1185 + hhhhhca_hhh`^Vh@IVQNHdObVLWCJ@HBDY^B + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> +<!-- FASTQ-Quality-Converter is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastq_quality_filter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_quality_filter.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,86 @@ +<tool id="cshl_fastq_quality_filter" name="Filter by quality"> + <description></description> + + <command> +cat '$input' | +fastq_quality_filter +#if $input.ext == "fastqsanger": + -Q 33 +#elif $input.ext == "fastq": + -Q 64 +#end if + -q $quality -p $percent -v -o '$output' +</command> + <inputs> + <param format="fastq,fastqsanger" name="input" type="data" label="Library to filter" /> + + <param name="quality" size="4" type="integer" value="20"> + <label>Quality cut-off value</label> + </param> + + <param name="percent" size="4" type="integer" value="90"> + <label>Percent of bases in sequence that must have quality equal to / higher than cut-off value</label> + </param> + </inputs> + + <tests> + <test> + <!-- Test1: 100% of bases with quality 33 or higher (pretty steep requirement...) --> + <param name="input" value="fastq_qual_filter1.fastq" /> + <param name="quality" value="33"/> + <param name="percent" value="100"/> + <output name="output" file="fastq_qual_filter1a.out" /> + </test> + <test> + <!-- Test2: 80% of bases with quality 20 or higher --> + <param name="input" value="fastq_qual_filter1.fastq" /> + <param name="quality" value="20"/> + <param name="percent" value="80"/> + <output name="output" file="fastq_qual_filter1b.out" /> + </test> + </tests> + + <outputs> + <data format="input" name="output" metadata_source="input" + /> + </outputs> + + <help> +**What it does** + +This tool filters reads based on quality scores. + +.. class:: infomark + +Using **percent = 100** requires all cycles of all reads to be at least the quality cut-off value. + +.. class:: infomark + +Using **percent = 50** requires the median quality of the cycles (in each read) to be at least the quality cut-off value. + +-------- + +Quality score distribution (of all cycles) is calculated for each read. If it is lower than the quality cut-off value - the read is discarded. + + +**Example**:: + + @CSHL_4_FC042AGOOII:1:2:214:584 + GACAATAAAC + +CSHL_4_FC042AGOOII:1:2:214:584 + 30 30 30 30 30 30 30 30 20 10 + +Using **percent = 50** and **cut-off = 30** - This read will not be discarded (the median quality is higher than 30). + +Using **percent = 90** and **cut-off = 30** - This read will be discarded (90% of the cycles do no have quality equal to / higher than 30). + +Using **percent = 100** and **cut-off = 20** - This read will be discarded (not all cycles have quality equal to / higher than 20). + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + </help> +</tool> +<!-- FASTQ-Quality-Filter is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastq_quality_trimmer.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_quality_trimmer.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,100 @@ +<tool id="cshl_fastq_quality_trimmer" name="Trim By Quality"> + <description></description> + <command> +cat '$input' | +fastq_quality_trimmer +#if $input.ext == "fastqsanger": + -Q 33 +#elif $input.ext == "fastq": + -Q 64 +#end if + -v -t $cutoff -l $minlen -o '$output' +</command> + + <inputs> + <param format="fastq,fastqsanger" name="input" type="data" label="Library to clip" /> + + <param name="cutoff" size="4" type="integer" value="20"> + <label>Minimum quality score</label> + <help>Nucleotides below this quality will be trimmed</help> + </param> + + <param name="minlen" size="4" type="integer" value="1"> + <label>Minimum sequence length</label> + <help>Sequences shorter than this length will be discard. Leave at zero to keep all sequences</help> + </param> + </inputs> + + <tests> + <test> + <param name="input" value="fastq_quality_trimmer.fastq" ftype="fastq" /> + <param name="cutoff" value="30"/> + <param name="minlen" value="16"/> + <output name="output" file="fastq_quality_trimmer.out" /> + </test> + </tests> + + <outputs> + <data format="input" name="output" metadata_source="input" + /> + </outputs> + <help> +**What it does** + +This tool scans the sequence from the end for the first nucleotide to possess the specified minimum quality score. It will then trim (remove nucleotides from) the sequence after this position. After trimming, sequences that are shorter than the minimum length are discarded. + +-------- + +**Example** + +Input Fasta file (with 20 bases in each sequences):: + + @1 + TATGGTCAGAAACCATATGC + +1 + 40 40 40 40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19 + @2 + CAGCGAGGCTTTAATGCCAT + +2 + 40 40 40 40 40 40 40 40 30 20 19 20 19 19 19 19 19 19 19 19 + @3 + CAGCGAGGCTTTAATGCCAT + +3 + 40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19 19 19 19 + + +Trimming with a cutoff of 20, we get the following FASTQ file:: + + @1 + TATGGTCAGAAA + +1 + 40 40 40 40 40 40 40 40 40 40 40 20 + @2 + CAGCGAGGCTTT + +2 + 40 40 40 40 40 40 40 40 30 20 19 20 + @3 + CAGCGAGGC + +3 + 40 40 40 40 40 40 40 40 20 + +Trimming with a cutoff of 20 and a minimum length of 12, we get the following FASTQ file:: + + @1 + TATGGTCAGAAA + +1 + 40 40 40 40 40 40 40 40 40 40 40 20 + @2 + CAGCGAGGCTTT + +2 + 40 40 40 40 40 40 40 40 30 20 19 20 + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> +<!-- FASTX-Quality-Trimmer is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastq_to_fasta.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_to_fasta.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,84 @@ +<tool id="cshl_fastq_to_fasta" name="FASTQ to FASTA"> + <description>converter</description> + <command> +cat '$input' | +fastq_to_fasta +#if $input.ext == "fastqsanger": + -Q 33 +#elif $input.ext == "fastq": + -Q 64 +#end if + $SKIPN $RENAMESEQ -o '$output' -v +</command> + <inputs> + <param format="fastq,fastqsanger" name="input" type="data" label="FASTQ Library to convert" /> + + <param name="SKIPN" type="select" label="Discard sequences with unknown (N) bases "> + <option value="">yes</option> + <option value="-n">no</option> + </param> + + <param name="RENAMESEQ" type="select" label="Rename sequence names in output file (reduces file size)"> + <option value="-r">yes</option> + <option value="">no</option> + </param> + + </inputs> + + <tests> + <test> + <!-- FASTQ-To-FASTA, keep N, don't rename --> + <param name="input" value="fastq_to_fasta1.fastq" /> + <param name="SKIPN" value=""/> + <param name="RENAMESEQ" value=""/> + <output name="output" file="fastq_to_fasta1a.out" /> + </test> + <test> + <!-- FASTQ-To-FASTA, discard N, rename --> + <param name="input" value="fastq_to_fasta1.fastq" /> + <param name="SKIPN" value="no"/> + <param name="RENAMESEQ" value="yes"/> + <output name="output" file="fastq_to_fasta1b.out" /> + </test> + </tests> + + <outputs> + <data format="fasta" name="output" metadata_source="input" + /> + </outputs> + +<help> + +**What it does** + +This tool converts data from Solexa format to FASTA format (scroll down for format description). + +-------- + +**Example** + +The following data in Solexa-FASTQ format:: + + @CSHL_4_FC042GAMMII_2_1_517_596 + GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + +CSHL_4_FC042GAMMII_2_1_517_596 + 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40 + +Will be converted to FASTA (with 'rename sequence names' = NO):: + + >CSHL_4_FC042GAMMII_2_1_517_596 + GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + +Will be converted to FASTA (with 'rename sequence names' = YES):: + + >1 + GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ +</help> +</tool> +<!-- FASTQ-to-FASTA is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastx_artifacts_filter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_artifacts_filter.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,95 @@ +<tool id="cshl_fastx_artifacts_filter" name="Remove sequencing artifacts"> + <description></description> + <command> +cat '$input' | +fastx_artifacts_filter +#if $input.ext == "fastqsanger": + -Q 33 +#elif $input.ext == "fastq": + -Q 64 +#end if + -v -o '$output' +</command> + + <inputs> + <param format="fasta,fastq,fastqsanger" name="input" type="data" label="Library to filter" /> + + </inputs> + + <tests> + <test> + <!-- Filter FASTA file --> + <param name="input" value="fastx_artifacts1.fasta" /> + <output name="output" file="fastx_artifacts1.out" /> + </test> + <test> + <!-- Filter FASTQ file --> + <param name="input" value="fastx_artifacts2.fastq" ftype="fastqsanger" /> + <output name="output" file="fastx_artifacts2.out" /> + </test> + </tests> + + <outputs> + <data format="input" name="output" metadata_source="input" + /> + </outputs> +<help> +**What it does** + +This tool filters sequencing artifacts (reads with all but 3 identical bases). + +-------- + +**The following is an example of sequences which will be filtered out**:: + + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAACACAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC + AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAA + AAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAA + AAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAA + AAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAA + AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAA + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ +</help> +</tool> +<!-- FASTX-Artifacts-filter is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastx_barcode_splitter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_barcode_splitter.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,75 @@ +<tool id="cshl_fastx_barcode_splitter" name="Barcode Splitter"> + <description></description> + <command interpreter="sh">fastx_barcode_splitter_galaxy_wrapper.sh $BARCODE $input "$input.name" "$output.files_path" --mismatches $mismatches --partial $partial $EOL > $output </command> + + <inputs> + <param format="txt" name="BARCODE" type="data" label="Barcodes to use" /> + <param format="fasta,fastq,fastqsanger" name="input" type="data" label="Library to split" /> + + <param name="EOL" type="select" label="Barcodes found at"> + <option value="--bol">Start of sequence (5' end)</option> + <option value="--eol">End of sequence (3' end)</option> + </param> + + <param name="mismatches" type="integer" size="3" value="2" label="Number of allowed mismatches" /> + + <param name="partial" type="integer" size="3" value="0" label="Number of allowed barcodes nucleotide deletions" /> + + </inputs> + + <tests> + <test> + <!-- Split a FASTQ file --> + <param name="BARCODE" value="fastx_barcode_splitter1.txt" /> + <param name="input" value="fastx_barcode_splitter1.fastq" /> + <param name="EOL" value="Start of sequence (5' end)" /> + <param name="mismatches" value="2" /> + <param name="partial" value="0" /> + <output name="output" file="fastx_barcode_splitter1.out" /> + </test> + </tests> + + <outputs> + <data format="html" name="output" + /> + </outputs> +<help> + +**What it does** + +This tool splits a FASTQ or FASTA file into several files, using barcodes as the split criteria. + +-------- + +**Barcode file Format** + +Barcode files are simple text files. +Each line should contain an identifier (descriptive name for the barcode), and the barcode itself (A/C/G/T), separated by a TAB character. +Example:: + + #This line is a comment (starts with a 'number' sign) + BC1 GATCT + BC2 ATCGT + BC3 GTGAT + BC4 TGTCT + +For each barcode, a new FASTQ file will be created (with the barcode's identifier as part of the file name). +Sequences matching the barcode will be stored in the appropriate file. + +One additional FASTQ file will be created (the 'unmatched' file), where sequences not matching any barcode will be stored. + +The output of this tool is an HTML file, displaying the split counts and the file locations. + +**Output Example** + +.. image:: ./static/fastx_icons/barcode_splitter_output_example.png + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> +<!-- FASTX-barcode-splitter is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastx_barcode_splitter_galaxy_wrapper.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_barcode_splitter_galaxy_wrapper.sh Wed Jul 10 06:13:48 2013 -0400 |
| [ |
| @@ -0,0 +1,85 @@ +#!/bin/sh + +# FASTX-toolkit - FASTA/FASTQ preprocessing tools. +# Copyright (C) 2009 A. Gordon (gordon@cshl.edu) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# +#This is a shell script wrapper for 'fastx_barcode_splitter.pl' +# +# 1. Output files are saved at the dataset's files_path directory. +# +# 2. 'fastx_barcode_splitter.pl' outputs a textual table. +# This script turns it into pretty HTML with working URL +# (so lazy users can just click on the URLs and get their files) + +if [ "$1x" = "x" ]; then + echo "Usage: $0 [BARCODE FILE] [FASTQ FILE] [LIBRARY_NAME] [OUTPUT_PATH]" >&2 + exit 1 +fi + +BARCODE_FILE="$1" +FASTQ_FILE="$2" +LIBNAME="$3" +OUTPUT_PATH="$4" +shift 4 +# The rest of the parameters are passed to the split program + +if [ "${OUTPUT_PATH}x" = "x" ]; then + echo "Usage: $0 [BARCODE FILE] [FASTQ FILE] [LIBRARY_NAME] [OUTPUT_PATH]" >&2 + exit 1 +fi + +#Sanitize library name, make sure we can create a file with this name +LIBNAME=${LIBNAME%.gz} +LIBNAME=${LIBNAME%.txt} +LIBNAME=$(echo "$LIBNAME" | tr -cd '[:alnum:]') + +if [ ! -r "$FASTQ_FILE" ]; then + echo "Error: Input file ($FASTQ_FILE) not found!" >&2 + exit 1 +fi +if [ ! -r "$BARCODE_FILE" ]; then + echo "Error: barcode file ($BARCODE_FILE) not found!" >&2 + exit 1 +fi +mkdir -p "$OUTPUT_PATH" +if [ ! -d "$OUTPUT_PATH" ]; then + echo "Error: failed to create output path '$OUTPUT_PATH'" >&2 + exit 1 +fi + +PUBLICURL="" +BASEPATH="$OUTPUT_PATH/" +#PREFIX="$BASEPATH"`date "+%Y-%m-%d_%H%M__"`"${LIBNAME}__" +PREFIX="$BASEPATH""${LIBNAME}__" +SUFFIX=".txt" + +RESULTS=`gzip -cdf "$FASTQ_FILE" | fastx_barcode_splitter.pl --bcfile "$BARCODE_FILE" --prefix "$PREFIX" --suffix "$SUFFIX" "$@"` +if [ $? != 0 ]; then + echo "error" +fi + +# +# Convert the textual tab-separated table into simple HTML table, +# with the local path replaces with a valid URL +echo "<html><body><table border=1>" +echo "$RESULTS" | sed -r "s|$BASEPATH(.*)|<a href=\"\\1\">\\1</a>|" | sed ' +i<tr><td> +s|\t|</td><td>|g +a<\/td><\/tr> +' +echo "<p>" +echo "</table></body></html>" |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastx_clipper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_clipper.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,117 @@ +<tool id="cshl_fastx_clipper_ng" name="Clip" version="1.0.1" > + <description>adapter sequences</description> + <command> +cat '$input' | +fastx_clipper +#if $input.ext == "fastqsanger": + -Q 33 +#elif $input.ext == "fastq": + -Q 64 +#end if + -l $minlength -a '$clip_source.clip_sequence' -d $keepdelta -o '$output' -v $KEEP_N $DISCARD_OPTIONS + </command> + + <inputs> + <param format="fasta,fastq,fastqsanger" name="input" type="data" label="Library to clip" /> + + <param name="minlength" size="4" type="integer" value="15"> + <label>Minimum sequence length (after clipping, sequences shorter than this length will be discarded)</label> + </param> + + <conditional name="clip_source"> + <param name="clip_source_list" type="select" label="Source"> + <option value="prebuilt" selected="true">Standard (select from the list below)</option> + <option value="user">Enter custom sequence</option> + </param> + + <when value="user"> + <param name="clip_sequence" size="30" label="Enter custom clipping sequence" type="text" value="AATTGGCC" /> + </when> + + <when value="prebuilt"> + <param name="clip_sequence" type="select" label="Choose Adapter"> + <options from_file="fastx_clipper_sequences.txt"> + <column name="name" index="1"/> + <column name="value" index="0"/> + </options> + </param> + </when> + </conditional> + + <param name="keepdelta" size="2" type="integer" value="0"> + <label>enter non-zero value to keep the adapter sequence and x bases that follow it</label> + <help>use this for hairpin barcoding. keep at 0 unless you know what you're doing.</help> + </param> + + <param name="KEEP_N" type="select" label="Discard sequences with unknown (N) bases"> + <option value="">Yes</option> + <option value="-n">No</option> + </param> + + <param name="DISCARD_OPTIONS" type="select" label="Output options"> + <option value="-c">Output only clipped sequences (i.e. sequences which contained the adapter)</option> + <option value="-C">Output only non-clipped sequences (i.e. sequences which did not contained the adapter)</option> + <option value="">Output both clipped and non-clipped sequences</option> + </param> + + </inputs> + <tests> + <test> + <param name="input" value="fastx_clipper1.fastq" /> + <param name="maxmismatches" value="2" /> + <param name="minlength" value="15" /> + <param name="clip_source_list" value="user" /> + <param name="clip_sequence" value="CAATTGGTTAATCCCCCTATATA" /> + <param name="keepdelta" value="0" /> + <param name="KEEP_N" value="No" /> + <param name="DISCARD_OPTIONS" value="Output only clipped sequences (i.e. sequences which contained the adapter)" /> + <output name="output" file="fastx_clipper1a.out" /> + </test> + </tests> + + <outputs> + <data format="input" name="output" metadata_source="input" + /> + </outputs> + +<help> +**What it does** + +This tool clips adapters from the 3'-end of the sequences in a FASTA/FASTQ file. + +-------- + + +**Clipping Illustration:** + +.. image:: ../static/fastx_icons/fastx_clipper_illustration.png + + + + + + + + +**Clipping Example:** + +.. image:: ../static/fastx_icons/fastx_clipper_example.png + + + +**In the above example:** + +* Sequence no. 1 was discarded since it wasn't clipped (i.e. didn't contain the adapter sequence). (**Output** parameter). +* Sequence no. 5 was discarded --- it's length (after clipping) was shorter than 15 nt (**Minimum Sequence Length** parameter). + + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> + +<!-- FASTX-Clipper is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastx_collapser.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_collapser.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,92 @@ +<tool id="cshl_fastx_collapser" name="Collapse"> + <description>sequences</description> + <command> +cat '$input' | +fastx_collapser +#if $input.ext == "fastqsanger": + -Q 33 +#elif $input.ext == "fastq": + -Q 64 +#end if + -v -o '$output' +</command> + + <inputs> + <param format="fastq,fastqsanger,fasta" name="input" type="data" label="Library to collapse" /> + </inputs> + + <tests> + <test> + <param name="input" value="fasta_collapser1.fasta" /> + <output name="output" file="fasta_collapser1.out" /> + </test> + </tests> + + <outputs> + <data format="fasta" name="output" metadata_source="input" + /> + </outputs> + <help> + +**What it does** + +This tool collapses identical sequences in a FASTQ or FASTA file into a single sequence. + +-------- + +**Example** + +Example Input File (Sequence "ATAT" appears multiple times):: + + >CSHL_2_FC0042AGLLOO_1_1_605_414 + TGCG + >CSHL_2_FC0042AGLLOO_1_1_537_759 + ATAT + >CSHL_2_FC0042AGLLOO_1_1_774_520 + TGGC + >CSHL_2_FC0042AGLLOO_1_1_742_502 + ATAT + >CSHL_2_FC0042AGLLOO_1_1_781_514 + TGAG + >CSHL_2_FC0042AGLLOO_1_1_757_487 + TTCA + >CSHL_2_FC0042AGLLOO_1_1_903_769 + ATAT + >CSHL_2_FC0042AGLLOO_1_1_724_499 + ATAT + +Example Output file:: + + >1-1 + TGCG + >2-4 + ATAT + >3-1 + TGGC + >4-1 + TGAG + >5-1 + TTCA + +.. class:: infomark + +Original Sequence Names / Lane descriptions (e.g. "CSHL_2_FC0042AGLLOO_1_1_742_502") are discarded. + +The output sequence name is composed of two numbers: the first is the sequence's number, the second is the multiplicity value. + +The following output:: + + >2-4 + ATAT + +means that the sequence "ATAT" is the second sequence in the file, and it appeared 4 times in the input FASTA file. + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> +<!-- FASTX-Collapser is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastx_nucleotides_distribution.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_nucleotides_distribution.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,51 @@ +<tool id="cshl_fastx_nucleotides_distribution" name="Draw nucleotides distribution chart"> + <description></description> + <command>fastx_nucleotide_distribution_graph.sh -t '$input.name' -i $input -o '$output'</command> + + <inputs> + <param format="txt" name="input" type="data" label="Statistics Text File" help="output of 'FASTX Statistics' tool" /> + </inputs> + + <outputs> + <data format="png" name="output" metadata_source="input" + /> + </outputs> +<help> + +**What it does** + +Creates a stacked-histogram graph for the nucleotide distribution in the Solexa library. + +.. class:: infomark + +**TIP:** Use the **FASTQ Statistics** tool to generate the report file needed for this tool. + +----- + +**Output Examples** + +The following chart clearly shows the barcode used at the 5'-end of the library: **GATCT** + +.. image:: ./static/fastx_icons/fastq_nucleotides_distribution_1.png + +In the following chart, one can almost 'read' the most abundant sequence by looking at the dominant values: **TGATA TCGTA TTGAT GACTG AA...** + +.. image:: ./static/fastx_icons/fastq_nucleotides_distribution_2.png + +The following chart shows a growing number of unknown (N) nucleotides towards later cycles (which might indicate a sequencing problem): + +.. image:: ./static/fastx_icons/fastq_nucleotides_distribution_3.png + +But most of the time, the chart will look rather random: + +.. image:: ./static/fastx_icons/fastq_nucleotides_distribution_4.png + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> +<!-- FASTQ-Nucleotides-Distribution is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastx_nucleotides_distribution_line.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_nucleotides_distribution_line.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,38 @@ +<tool id="cshl_fastx_nucleotides_distribution_line_plot" name="Draw nucleotides distribution line chart"> + <command> + fastx_nucleotide_distribution_line_graph.sh -i '$input' -o '$output' + </command> + + <inputs> + <param format="txt" name="input" type="data" label="Statistics Text File (output of 'FASTX Statistics' tool)" /> + </inputs> + + <outputs> + <data format="png" name="output" metadata_source="input" + /> + </outputs> +<help> + +**What it does** + +Creates a line and points graph for the nucleotide distribution in the Solexa library. + +.. class:: infomark + +**TIP:** Use the **FASTQ Statistics** tool to generate the report file needed for this tool. + +----- + +**Output Examples** + +.. image:: ../static/fastx_icons/fastq_nucleotides_distribution_line_graph.png + +------ + +This tool was created by Oliver Tam, based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> +<!-- FASTQ-Nucleotides-Distribution-Line is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastx_quality_statistics.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_quality_statistics.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,112 @@ +<tool id="cshl_fastx_quality_statistics" name="Compute quality statistics"> + <description></description> + <command> +cat '$input' | +fastx_quality_stats +#if $input.ext == "fastqsanger": + -Q 33 +#elif $input.ext == "fastq": + -Q 64 +#end if + -o '$output' +</command> + + <inputs> + <param format="fasta,fastq,fastqsanger" name="input" type="data" label="Library to analyse" /> + </inputs> + + <tests> + <test> + <param name="input" value="fastq_stats1.fastq" ftype="fastq"/> + <output name="output" file="fastq_stats1.out" /> + </test> + </tests> + + <outputs> + <data format="txt" name="output" metadata_source="input" + /> + </outputs> + +<help> + +**What it does** + +Creates quality statistics report for the given Solexa/FASTQ library. + +.. class:: infomark + +**TIP:** This statistics report can be used as input for **Quality Score** and **Nucleotides Distribution** tools. + +----- + +**The output file will contain the following fields:** + +* column = column number (1 to 36 for a 36-cycles read FASTQ file) +* count = number of bases found in this column. +* min = Lowest quality score value found in this column. +* max = Highest quality score value found in this column. +* sum = Sum of quality score values for this column. +* mean = Mean quality score value for this column. +* Q1 = 1st quartile quality score. +* med = Median quality score. +* Q3 = 3rd quartile quality score. +* IQR = Inter-Quartile range (Q3-Q1). +* lW = 'Left-Whisker' value (for boxplotting). +* rW = 'Right-Whisker' value (for boxplotting). +* A_Count = Count of 'A' nucleotides found in this column. +* C_Count = Count of 'C' nucleotides found in this column. +* G_Count = Count of 'G' nucleotides found in this column. +* T_Count = Count of 'T' nucleotides found in this column. +* N_Count = Count of 'N' nucleotides found in this column. + + + +**Output Example**:: + + column count min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count + 1 6362991 -4 40 250734117 39.41 40 40 40 0 40 40 1396976 1329101 678730 2958184 0 + 2 6362991 -5 40 250531036 39.37 40 40 40 0 40 40 1786786 1055766 1738025 1782414 0 + 3 6362991 -5 40 248722469 39.09 40 40 40 0 40 40 2296384 984875 1443989 1637743 0 + 4 6362991 -5 40 247654797 38.92 40 40 40 0 40 40 1683197 1410855 1722633 1546306 0 + 5 6362991 -4 40 248214827 39.01 40 40 40 0 40 40 2536861 1167423 1248968 1409739 0 + 6 6362991 -5 40 248499903 39.05 40 40 40 0 40 40 1598956 1236081 1568608 1959346 0 + 7 6362991 -4 40 247719760 38.93 40 40 40 0 40 40 1692667 1822140 1496741 1351443 0 + 8 6362991 -5 40 245745205 38.62 40 40 40 0 40 40 2230936 1343260 1529928 1258867 0 + 9 6362991 -5 40 245766735 38.62 40 40 40 0 40 40 1702064 1306257 1336511 2018159 0 + 10 6362991 -5 40 245089706 38.52 40 40 40 0 40 40 1519917 1446370 1450995 1945709 0 + 11 6362991 -5 40 242641359 38.13 40 40 40 0 40 40 1717434 1282975 1387804 1974778 0 + 12 6362991 -5 40 242026113 38.04 40 40 40 0 40 40 1662872 1202041 1519721 1978357 0 + 13 6362991 -5 40 238704245 37.51 40 40 40 0 40 40 1549965 1271411 1973291 1566681 1643 + 14 6362991 -5 40 235622401 37.03 40 40 40 0 40 40 2101301 1141451 1603990 1515774 475 + 15 6362991 -5 40 230766669 36.27 40 40 40 0 40 40 2344003 1058571 1440466 1519865 86 + 16 6362991 -5 40 224466237 35.28 38 40 40 2 35 40 2203515 1026017 1474060 1651582 7817 + 17 6362991 -5 40 219990002 34.57 34 40 40 6 25 40 1522515 1125455 2159183 1555765 73 + 18 6362991 -5 40 214104778 33.65 30 40 40 10 15 40 1479795 2068113 1558400 1249337 7346 + 19 6362991 -5 40 212934712 33.46 30 40 40 10 15 40 1432749 1231352 1769799 1920093 8998 + 20 6362991 -5 40 212787944 33.44 29 40 40 11 13 40 1311657 1411663 2126316 1513282 73 + 21 6362991 -5 40 211369187 33.22 28 40 40 12 10 40 1887985 1846300 1300326 1318380 10000 + 22 6362991 -5 40 213371720 33.53 30 40 40 10 15 40 542299 3446249 516615 1848190 9638 + 23 6362991 -5 40 221975899 34.89 36 40 40 4 30 40 347679 1233267 926621 3855355 69 + 24 6362991 -5 40 194378421 30.55 21 40 40 19 -5 40 433560 674358 3262764 1992242 67 + 25 6362991 -5 40 199773985 31.40 23 40 40 17 -2 40 944760 325595 1322800 3769641 195 + 26 6362991 -5 40 179404759 28.20 17 34 40 23 -5 40 3457922 156013 1494664 1254293 99 + 27 6362991 -5 40 163386668 25.68 13 28 40 27 -5 40 1392177 281250 3867895 821491 178 + 28 6362991 -5 40 156230534 24.55 12 25 40 28 -5 40 907189 981249 4174945 299437 171 + 29 6362991 -5 40 163236046 25.65 13 28 40 27 -5 40 1097171 3418678 1567013 280008 121 + 30 6362991 -5 40 151309826 23.78 12 23 40 28 -5 40 3514775 2036194 566277 245613 132 + 31 6362991 -5 40 141392520 22.22 10 21 40 30 -5 40 1569000 4571357 124732 97721 181 + 32 6362991 -5 40 143436943 22.54 10 21 40 30 -5 40 1453607 4519441 38176 351107 660 + 33 6362991 -5 40 114269843 17.96 6 14 30 24 -5 40 3311001 2161254 155505 734297 934 + 34 6362991 -5 40 140638447 22.10 10 20 40 30 -5 40 1501615 1637357 18113 3205237 669 + 35 6362991 -5 40 138910532 21.83 10 20 40 30 -5 40 1532519 3495057 23229 1311834 352 + 36 6362991 -5 40 117158566 18.41 7 15 30 23 -5 40 4074444 1402980 63287 822035 245 + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> +<!-- FASTQ-Statistics is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastx_quality_statistics_ng.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_quality_statistics_ng.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| b'@@ -0,0 +1,187 @@\n+<tool id="cshl_fastx_quality_statistics_ng" name="Compute quality statistics">\n+\t<description>(improved)</description>\n+\t<command>\n+cat \'$input\' |\n+fastx_quality_stats\n+#if $input.ext == "fastqsanger":\n+ -Q 33\n+#elif $input.ext == "fastq":\n+ -Q 64\n+#end if\n+ -N -o \'$output\'\n+</command>\n+\n+\t<inputs>\n+\t\t<param format="fasta,fastq,fastqsanger" name="input" type="data" label="Library to analyse" />\n+\t</inputs>\n+\n+\t<tests>\n+\t\t<test>\n+\t\t\t<param name="input" value="fastq_stats1.fastq" />\n+\t\t\t<output name="output" file="fastq_stats1.out" />\n+\t\t</test>\n+\t</tests>\n+\n+\t<outputs>\n+\t\t<data format="tabular" name="output" metadata_source="input"\n+\t\t/>\n+\t</outputs>\n+\n+<help>\n+\n+**What it does**\n+\n+Creates quality statistics report for the given Solexa/FASTQ library.\n+\n+.. class:: warningmark\n+\n+The output format is different than the old quality statistics tool. It can\'t be used for the quality-chart and nucleotide distribution tools (without further processing)\n+\n+-----\n+\n+**The output file will contain the following fields:**\n+\n+* cycle = cycle number (1 to 36 for a 36-cycles read solexa file)\n+* max-count = maximum number of bases (in all cycles)\n+\n+For each nucleotide type of each cycle (ALL/A/C/G/T/N), the following columns are generated:\n+\n+* count = number of bases found in this column.\n+* min = Lowest quality score value found in this column.\n+* max = Highest quality score value found in this column.\n+* sum = Sum of quality score values for this column.\n+* mean = Mean quality score value for this column.\n+* Q1\t= 1st quartile quality score.\n+* med\t= Median quality score.\n+* Q3\t= 3rd quartile quality score.\n+* IQR\t= Inter-Quartile range (Q3-Q1).\n+* lW\t= \'Left-Whisker\' value (for boxplotting).\n+* rW\t= \'Right-Whisker\' value (for boxplotting).\n+\n+\n+(see column list at the bottom of this page)\n+\n+-----\n+\n+**Output Example**::\n+\n+ cycle\tmax_count\tALL_count\tALL_min\tALL_max\tALL_sum\tALL_mean\tALL_Q1\tALL_med\tALL_Q3\tALL_IQR\tALL_lW\tALL_rW\tA_count\tA_min\tA_max\tA_sum\tA_mean\tA_Q1\tA_med\tA_Q3\tA_IQR\tA_lW\tA_rW\tC_count\tC_min\tC_max\tC_sum\tC_mean\tC_Q1\tC_med\tC_Q3\tC_IQR\tC_lW\tC_rW\tG_count\tG_min\tG_max\tG_sum\tG_mean\tG_Q1\tG_med\tG_Q3\tG_IQR\tG_lW\tG_rW\tT_count\tT_min\tT_max\tT_sum\tT_mean\tT_Q1\tT_med\tT_Q3\tT_IQR\tT_lW\tT_rW\tN_count\tN_min\tN_max\tN_sum\tN_mean\tN_Q1\tN_med\tN_Q3\tN_IQR\tN_lW\tN_rW\n+ 1\t2827201\t2827201\t5\t34\t86622739\t30.64\t33\t33\t33\t0\t33\t33\t31337\t5\t34\t841248\t26.85\t23\t30\t33\t10\t8\t34\t9269\t5\t34\t154582\t16.68\t5\t12\t30\t25\t5\t34\t2095406\t5\t34\t64401991\t30.73\t33\t33\t33\t0\t33\t33\t689133\t5\t34\t21214602\t30.78\t33\t33\t33\t0\t33\t33\t2056\t5\t13\t10316\t5.02\t5\t5\t5\t0\t5\t5\n+ 2\t2827201\t2827201\t5\t34\t81416729\t28.80\t27\t33\t33\t6\t18\t34\t1860337\t5\t34\t56188709\t30.20\t33\t33\t33\t0\t33\t33\t21274\t5\t34\t420221\t19.75\t11\t21\t30\t19\t5\t34\t862406\t5\t34\t22835654\t26.48\t21\t32\t33\t12\t5\t34\t81979\t5\t34\t1964575\t23.96\t17\t26\t33\t16\t5\t34\t1205\t5\t24\t7570\t6.28\t5\t5\t5\t0\t5\t5\n+ 3\t2827201\t2827201\t5\t34\t89142476\t31.53\t33\t33\t34\t1\t32\t34\t18121\t5\t34\t203489\t11.23\t5\t5\t15\t10\t5\t30\t45699\t5\t34\t944362\t20.66\t5\t26\t33\t28\t5\t34\t79472\t5\t34\t859251\t10.81\t5\t5\t12\t7\t5\t22\t2682082\t5\t34\t87126165\t32.48\t33\t33\t34\t1\t32\t34\t1827\t5\t18\t9209\t5.04\t5\t5\t5\t0\t5\t5\n+ 4\t2827201\t2827201\t5\t34\t90033575\t31.85\t33\t34\t34\t1\t32\t34\t172281\t5\t34\t2905831\t16.87\t5\t11\t33\t28\t5\t34\t2597111\t5\t34\t85653490\t32.98\t33\t34\t34\t1\t32\t34\t24461\t5\t34\t643275\t26.30\t23\t33\t33\t10\t8\t34\t32749\t5\t34\t827798\t25.28\t17\t33\t33\t16\t5\t34\t599\t5\t21\t3181\t5.31\t5\t5\t5\t0\t5\t5\n+ 5\t2827201\t2827201\t5\t34\t89641650\t31.71\t33\t33\t34\t1\t32\t34\t26774\t5\t34\t476388\t17.79\t5\t13\t33\t28\t5\t34\t58691\t5\t34\t891506\t15.19\t5\t5\t32\t27\t5\t34\t54916\t5\t34\t714335\t13.01\t5\t5\t24\t19\t5\t34\t2685062\t5\t34\t87550414\t32.61\t33\t33\t34\t1\t32\t34\t1758\t5\t21\t9007\t5.12\t5\t5\t5\t0\t5\t5\n+ 6\t2827201\t2827201\t5\t34\t84595812\t29.92\t29\t33\t33\t4\t23\t34\t1204450\t5\t34\t36229599\t30.08\t29\t33\t33\t4\t23\t34\t463119\t5\t34\t13924930\t30.07\t30\t33\t33\t3\t26\t34\t712076\t5\t34\t21093763\t29.62\t28\t33\t33\t5\t21\t34\t447508\t5\t34\t13347178\t29.83\t29\t33\t33\t4\t23\t34\t48\t5\t21\t342\t7.12\t5\t5\t7\t2\t5\t10\n+ 7\t2827201\t2827201\t5\t34\t81404399\t28.79\t26\t33\t33\t7\t16\t34\t912751\t5\t34\t26241597\t28.75\t26\t33\t33\t7\t16\t34\t540022\t5\t34\t1584'..b'\t5\t5\t11\t6\t5\t20\n+ 26\t2827201\t2827201\t5\t34\t77523225\t27.42\t24\t31\t33\t9\t11\t34\t783881\t5\t34\t21162231\t27.00\t24\t30\t33\t9\t11\t34\t645075\t5\t34\t18368273\t28.47\t27\t33\t33\t6\t18\t34\t617885\t5\t34\t15635967\t25.31\t21\t27\t33\t12\t5\t34\t779368\t5\t34\t22349766\t28.68\t27\t33\t33\t6\t18\t34\t992\t5\t27\t6988\t7.04\t5\t5\t5\t0\t5\t5\n+ 27\t2827201\t2827201\t5\t34\t76792679\t27.16\t24\t31\t33\t9\t11\t34\t788575\t5\t34\t21113021\t26.77\t23\t30\t33\t10\t8\t34\t638456\t5\t34\t18023093\t28.23\t26\t32\t33\t7\t16\t34\t624665\t5\t34\t15600176\t24.97\t21\t27\t33\t12\t5\t34\t774483\t5\t34\t22049478\t28.47\t27\t32\t33\t6\t18\t34\t1022\t5\t27\t6911\t6.76\t5\t5\t5\t0\t5\t5\n+ 28\t2827201\t2827201\t5\t34\t76446203\t27.04\t24\t30\t33\t9\t11\t34\t783001\t5\t34\t20828394\t26.60\t22\t30\t33\t11\t6\t34\t639424\t5\t34\t17921638\t28.03\t26\t32\t33\t7\t16\t34\t621361\t5\t34\t15437055\t24.84\t21\t27\t33\t12\t5\t34\t782313\t5\t34\t22251729\t28.44\t27\t32\t33\t6\t18\t34\t1102\t5\t26\t7387\t6.70\t5\t5\t5\t0\t5\t5\n+ 29\t2827201\t2827201\t5\t34\t75869397\t26.84\t24\t30\t33\t9\t11\t34\t777718\t5\t34\t20485923\t26.34\t22\t30\t33\t11\t6\t34\t645283\t5\t34\t18004108\t27.90\t26\t31\t33\t7\t16\t34\t627295\t5\t34\t15440771\t24.61\t21\t27\t33\t12\t5\t34\t775728\t5\t34\t21930783\t28.27\t26\t32\t33\t7\t16\t34\t1177\t5\t27\t7812\t6.64\t5\t5\t5\t0\t5\t5\n+ 30\t2827201\t2827201\t5\t34\t75137420\t26.58\t22\t30\t33\t11\t6\t34\t779313\t5\t34\t20336426\t26.10\t22\t29\t33\t11\t6\t34\t646974\t5\t34\t17887122\t27.65\t24\t31\t33\t9\t11\t34\t626980\t5\t34\t15205903\t24.25\t19\t26\t33\t14\t5\t34\t772774\t5\t34\t21699992\t28.08\t26\t31\t33\t7\t16\t34\t1160\t5\t27\t7977\t6.88\t5\t5\t5\t0\t5\t5\n+ 31\t2827201\t2827201\t5\t34\t74256817\t26.27\t22\t30\t33\t11\t6\t34\t780211\t5\t34\t20171360\t25.85\t21\t29\t33\t12\t5\t34\t645371\t5\t34\t17606830\t27.28\t24\t31\t33\t9\t11\t34\t629456\t5\t34\t14997599\t23.83\t18\t26\t33\t15\t5\t34\t771023\t5\t34\t21473316\t27.85\t26\t31\t33\t7\t16\t34\t1140\t5\t27\t7712\t6.76\t5\t5\t5\t0\t5\t5\n+ 32\t2827201\t2827201\t5\t34\t73624704\t26.04\t22\t29\t33\t11\t6\t34\t776741\t5\t34\t19802248\t25.49\t21\t28\t33\t12\t5\t34\t642994\t5\t34\t17408712\t27.07\t24\t30\t33\t9\t11\t34\t631699\t5\t34\t14925494\t23.63\t18\t26\t32\t14\t5\t34\t774316\t5\t34\t21478972\t27.74\t26\t31\t33\t7\t16\t34\t1451\t5\t27\t9278\t6.39\t5\t5\t5\t0\t5\t5\n+ 33\t2827201\t2827201\t5\t34\t72833249\t25.76\t21\t29\t33\t12\t5\t34\t775426\t5\t34\t19509710\t25.16\t21\t27\t33\t12\t5\t34\t644177\t5\t34\t17265182\t26.80\t24\t30\t33\t9\t11\t34\t627490\t5\t34\t14612407\t23.29\t18\t26\t31\t13\t5\t34\t778476\t5\t34\t21435400\t27.54\t24\t31\t33\t9\t11\t34\t1632\t5\t27\t10550\t6.46\t5\t5\t5\t0\t5\t5\n+ 34\t2827201\t2827201\t5\t34\t71937995\t25.44\t21\t28\t33\t12\t5\t34\t772803\t5\t34\t19226676\t24.88\t21\t27\t33\t12\t5\t34\t647127\t5\t34\t17098061\t26.42\t22\t30\t33\t11\t6\t34\t628686\t5\t34\t14382900\t22.88\t17\t24\t31\t14\t5\t34\t777289\t5\t34\t21221307\t27.30\t24\t30\t33\t9\t11\t34\t1296\t5\t27\t9051\t6.98\t5\t5\t5\t0\t5\t5\n+ 35\t2827201\t2827201\t5\t34\t70604895\t24.97\t21\t27\t33\t12\t5\t34\t769554\t5\t34\t18722160\t24.33\t19\t27\t32\t13\t5\t34\t643915\t5\t34\t16662802\t25.88\t21\t28\t33\t12\t5\t34\t627642\t5\t34\t14115224\t22.49\t17\t24\t30\t13\t5\t34\t784712\t5\t34\t21095775\t26.88\t24\t30\t33\t9\t11\t34\t1378\t5\t27\t8934\t6.48\t5\t5\t5\t0\t5\t5\n+ 36\t2827201\t2827201\t5\t34\t71705284\t25.36\t21\t28\t33\t12\t5\t34\t775278\t5\t34\t18770248\t24.21\t18\t27\t33\t15\t5\t34\t634906\t5\t34\t16703972\t26.31\t22\t30\t33\t11\t6\t34\t630819\t5\t34\t14421307\t22.86\t17\t24\t31\t14\t5\t34\t784826\t5\t34\t21800547\t27.78\t26\t32\t33\t7\t16\t34\t1372\t5\t27\t9210\t6.71\t5\t5\t5\t0\t5\t5\n+\n+-----\n+\n+All columns::\n+\n+ cycle\n+ max_count\n+ ALL_count\n+ ALL_min\n+ ALL_max\n+ ALL_sum\n+ ALL_mean\n+ ALL_Q1\n+ ALL_med\n+ ALL_Q3\n+ ALL_IQR\n+ ALL_lW\n+ ALL_rW\n+ A_count\n+ A_min\n+ A_max\n+ A_sum\n+ A_mean\n+ A_Q1\n+ A_med\n+ A_Q3\n+ A_IQR\n+ A_lW\n+ A_rW\n+ C_count\n+ C_min\n+ C_max\n+ C_sum\n+ C_mean\n+ C_Q1\n+ C_med\n+ C_Q3\n+ C_IQR\n+ C_lW\n+ C_rW\n+ G_count\n+ G_min\n+ G_max\n+ G_sum\n+ G_mean\n+ G_Q1\n+ G_med\n+ G_Q3\n+ G_IQR\n+ G_lW\n+ G_rW\n+ T_count\n+ T_min\n+ T_max\n+ T_sum\n+ T_mean\n+ T_Q1\n+ T_med\n+ T_Q3\n+ T_IQR\n+ T_lW\n+ T_rW\n+ N_count\n+ N_min\n+ N_max\n+ N_sum\n+ N_mean\n+ N_Q1\n+ N_med\n+ N_Q3\n+ N_IQR\n+ N_lW\n+ N_rW\n+\n+------\n+\n+This tool is based on `FASTX-toolkit`__ by Assaf Gordon.\n+\n+ .. __: http://hannonlab.cshl.edu/fastx_toolkit/\n+\n+</help>\n+</tool>\n+<!-- FASTQ-Statistics is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) -->\n' |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastx_renamer.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_renamer.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,75 @@ +<tool id="cshl_fastx_renamer" name="Rename sequences" version="0.0.11" > + <description></description> + <command> +cat '$input' | +fastx_renamer +#if $input.ext == "fastqsanger": + -Q 33 +#elif $input.ext == "fastq": + -Q 64 +#end if + -n $TYPE -o '$output' -v +</command> + <inputs> + <param format="fastq,fastqsanger,fasta" name="input" type="data" label="FASTQ/A Library to rename" /> + + <param name="TYPE" type="select" label="Rename sequence identifiers to"> + <option value="SEQ">Nucleotides sequence</option> + <option value="COUNT">Numeric Counter</option> + </param> + </inputs> + <tests> + <test> + <param name="input" value="fastx_renamer1.fastq" ftype="fastq"/> + <param name="TYPE" value="SEQ" /> + <output name="output" file="fastx_renamer1.out" /> + </test> + </tests> + + <outputs> + <data format="input" name="output" metadata_source="input" /> + </outputs> + +<help> + +**What it does** + +This tool renames the sequence identifiers in a FASTQ/A file. + +.. class:: infomark + +Use this tool at the beginning of your workflow, as a way to keep the original sequence (before trimming, clipping, barcode-removal, etc). + +-------- + +**Example** + +The following Solexa-FASTQ file:: + + @CSHL_4_FC042GAMMII_2_1_517_596 + GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + +CSHL_4_FC042GAMMII_2_1_517_596 + 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40 + +Renamed to **nucleotides sequence**:: + + @GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + +GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40 + +Renamed to **numeric counter**:: + + @1 + GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT + +1 + 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40 + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ +</help> +</tool> +<!-- FASTX-renamer is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastx_reverse_complement.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_reverse_complement.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,68 @@ +<tool id="cshl_fastx_reverse_complement" name="Reverse-Complement"> + <description></description> + <command> +cat '$input' | +fastx_reverse_complement +#if $input.ext == "fastqsanger": + -Q 33 +#elif $input.ext == "fastq": + -Q 64 +#end if + -v -o '$output' +</command> + <inputs> + <param format="fasta,fastq,fastqsanger" name="input" type="data" label="Library to reverse-complement" /> + </inputs> + + <tests> + <test> + <!-- Reverse-complement a FASTA file --> + <param name="input" value="fastx_rev_comp1.fasta" /> + <output name="output" file="fastx_reverse_complement1.out" /> + </test> + <test> + <!-- Reverse-complement a FASTQ file --> + <param name="input" value="fastx_rev_comp2.fastq" /> + <output name="output" file="fastx_reverse_complement2.out" /> + </test> + </tests> + + <outputs> + <data format="input" name="output" metadata_source="input" + /> + </outputs> + +<help> +**What it does** + +This tool reverse-complements each sequence in a library. +If the library is a FASTQ, the quality-scores are also reversed. + +-------- + +**Example** + +Input FASTQ file:: + + @CSHL_1_FC42AGWWWXX:8:1:3:740 + TGTCTGTAGCCTCNTCCTTGTAATTCAAAGNNGGTA + +CSHL_1_FC42AGWWWXX:8:1:3:740 + 33 33 33 34 33 33 33 33 33 33 33 33 27 5 27 33 33 33 33 33 33 27 21 27 33 32 31 29 26 24 5 5 15 17 27 26 + + +Output FASTQ file:: + + @CSHL_1_FC42AGWWWXX:8:1:3:740 + TACCNNCTTTGAATTACAAGGANGAGGCTACAGACA + +CSHL_1_FC42AGWWWXX:8:1:3:740 + 26 27 17 15 5 5 24 26 29 31 32 33 27 21 27 33 33 33 33 33 33 27 5 27 33 33 33 33 33 33 33 33 34 33 33 33 + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> +<!-- FASTX-reverse-complement is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastx_trimmer.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_trimmer.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,85 @@ +<tool id="cshl_fastx_trimmer" name="Trim sequences"> + <description>to fixed length</description> + <command> +cat '$input' | +fastx_trimmer +#if $input.ext == "fastqsanger": + -Q 33 +#elif $input.ext == "fastq": + -Q 64 +#end if + -v -f $first -l $last -o '$output' +</command> + <inputs> + <param format="fasta,fastq,fastqsanger" name="input" type="data" label="Library to clip" /> + + <param name="first" size="4" type="integer" value="1"> + <label>First base to keep</label> + </param> + + <param name="last" size="4" type="integer" value="21"> + <label>Last base to keep</label> + </param> + </inputs> + + <tests> + <test> + <!-- Trim a FASTA file - remove first four bases (e.g. a barcode) --> + <param name="input" value="fastx_trimmer1.fasta" /> + <param name="first" value="5"/> + <param name="last" value="36"/> + <output name="output" file="fastx_trimmer1.out" /> + </test> + <test> + <!-- Trim a FASTQ file - remove last 9 bases (e.g. keep only miRNA length sequences) --> + <param name="input" value="fastx_trimmer2.fastq" /> + <param name="first" value="1"/> + <param name="last" value="27"/> + <output name="output" file="fastx_trimmer2.out" /> + </test> + </tests> + + <outputs> + <data format="input" name="output" metadata_source="input" + /> + </outputs> + <help> +**What it does** + +This tool trims (cut nucleotides from) sequences in a FASTA/Q file. + +-------- + +**Example** + +Input Fasta file (with 36 bases in each sequences):: + + >1-1 + TATGGTCAGAAACCATATGCAGAGCCTGTAGGCACC + >2-1 + CAGCGAGGCTTTAATGCCATTTGGCTGTAGGCACCA + + +Trimming with First=1 and Last=21, we get a FASTA file with 21 bases in each sequences (starting from the first base):: + + >1-1 + TATGGTCAGAAACCATATGCA + >2-1 + CAGCGAGGCTTTAATGCCATT + +Trimming with First=6 and Last=10, will generate a FASTA file with 5 bases (bases 6,7,8,9,10) in each sequences:: + + >1-1 + TCAGA + >2-1 + AGGCT + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> +<!-- FASTX-Trimmer is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastx_trimmer_from_end.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_trimmer_from_end.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,81 @@ +<tool id="cshl_fastx_end_trimmer" name="Trim End"> + <description>of sequences</description> + <command> +cat '$input' | +fastx_trimmer +#if $input.ext == "fastqsanger": + -Q 33 +#elif $input.ext == "fastq": + -Q 64 +#end if + -v -t $trimnum -m $minlen -o '$output' +</command> + <inputs> + <param format="fasta,fastq,fastqsanger" name="input" type="data" label="Library to clip" /> + + <param name="trimnum" size="4" type="integer" value="5"> + <label>Number of nucleotides to be trimmed</label> + <help>This will trim from the end of the sequences</help> + </param> + + <param name="minlen" size="4" type="integer" value="10"> + <label>Minimum sequence length</label> + <help>Sequences shorter than this length will be discarded</help> + </param> + </inputs> + + <tests> + <test> + <param name="input" value="fastx_trimmer_from_end1.fasta" /> + <param name="trimnum" value="2"/> + <param name="minlen" value="16"/> + <output name="output" file="fastx_trimmer_from_end1.out" /> + </test> + </tests> + + <outputs> + <data format="input" name="output" metadata_source="input" + /> + </outputs> + <help> +**What it does** + +This tool trims (cut nucleotides from) sequences in a FASTQ/FASTA file from the 3' end. + +.. class:: infomark + +When trimming a FASTQ file, the quality scores will be trimmed appropriately (to the same length of the corresponding sequence). + +-------- + +**Example** + +Input Fasta file:: + + >1-1 + TATGGTCAGAAACCATATGCAGAGCCTGTAGGCACC + >2-1 + CAGCGAGGCTTTAATGCCATT + + +Trimming 5 nucleotides from the end, and discarding sequences shorter than 10 , we get the following FASTA file:: + + >1-1 + TATGGTCAGAAACCATATGCAGAGCCTGTAG + >2-1 + CAGCGAGGCTTTAATG + +Trimming 10 nucleotides from the end, and discarding sequences shorter than 15 , we get the following FASTA file:: + + >1-1 + TATGGTCAGAAACCATATGCAGAGCC + +------ + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> +<!-- FASTX-Trimmer-End is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 fastx_uncollapser.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_uncollapser.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,64 @@ +<tool id="cshl_fastx_uncollapser" name="Uncollapse" version="1.0.0"> + <description>sequences</description> + <command> +cat '$input' | +fastx_uncollapser -v -o '$output' +</command> + <inputs> + <param format="fasta" name="input" type="data" label="Collapsed FASTA file" /> + </inputs> + + <tests> + <test> + <param name="input" value="fasta_uncollapser1.fasta" /> + <output name="output" file="fasta_uncollapser1.out" /> + </test> + </tests> + + <outputs> + <data format="fasta" name="output" metadata_source="input" + /> + </outputs> + <help> + +**What it does** + +This tool uncollapses a previously-collapsed FASTA file. It reads each collapsed sequence and generates multiple sequences based on the collapsed read count. + +-------- + +**Example** + +Example Input - a collapsed FASTA file (Sequence "ATAT" has four collapsed reads):: + + >1-1 + TGCG + >2-4 + ATAT + +Example Output - uncollapsed FASTA file (Sequence "ATAT" now appears as 4 separate sequences):: + + >1 + TGCG + >2 + ATAT + >3 + ATAT + >4 + ATAT + >5 + ATAT + +.. class:: infomark + +The original sequence id (with the read counts) are discarded, with the sequence given a numerical name. + +----- + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> +<!-- FASTX-Uncollapser is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |
| b |
| diff -r 000000000000 -r 78a7d28f2a15 seqid_uncollapser.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seqid_uncollapser.xml Wed Jul 10 06:13:48 2013 -0400 |
| b |
| @@ -0,0 +1,75 @@ +<tool id="cshl_seqid_uncollapser" name="Uncollapse rows"> + <description>containing collapsed sequence IDs</description> + <command> +cat '$input' | +fastx_uncollapser -c $idcol -v -o '$output' +</command> + <inputs> + <param format="tabular,pslx" name="input" type="data" label="Library to uncollapse" /> + <param name="idcol" label="Column with collased sequence-identifier" type="data_column" data_ref="input" accept_default="false" > + <help>This column contains the sequence id from a collapsed FASTA file in the form of "(seq number)-(read count)" (e.g. 15-4). Use 10 if you're analyzing BLAT output</help> + </param> + </inputs> + <tests> + <test> + <param name="input" value="fastx_seqid_uncollapse1.psl" /> + <param name="idcol" value="10" /> + <param name="output" file="fastx_seqid_uncollapse1.out" /> + </test> + </tests> + + <outputs> + <data format="input" name="output" metadata_source="input" + /> + </outputs> + <help> + +**What it does** + +This tool reads a row (in a table) containing a collapsed sequence ID, and duplicates the . + +.. class:: warningmark + +You must specify the column containing the collapsed sequence ID (e.g. 15-4). + +-------- + +**Example Input File** + +The following input file contains two collapsed sequence identifiers at column 10: *84-2* and *87-5* + +(meaning the first has multiplicity-count of 2 and the second has multiplicity count of 5):: + + + 23 0 0 0 0 0 0 0 + 84-2 ... + 22 0 0 0 0 0 0 0 + 87-5 ... + + +**Output Example** + +After **uncollapsing** (on column 10), the line of the first sequence-identifier is repeated *twice*, and the line of the second sequence-identifier is repeated *five* times:: + + 23 0 0 0 0 0 0 0 + 84-2 ... + 23 0 0 0 0 0 0 0 + 84-2 ... + 22 0 0 0 0 0 0 0 + 87-5 ... + 22 0 0 0 0 0 0 0 + 87-5 ... + 22 0 0 0 0 0 0 0 + 87-5 ... + 22 0 0 0 0 0 0 0 + 87-5 ... + 22 0 0 0 0 0 0 0 + 87-5 ... + + +Uncollapsing a text file allows analsys of collapsed FASTA files to be used with any tool which doesn't 'understand' collapsed multiplicity counts. + +.. class:: infomark + +See the *Collapse* tool in the *FASTA Manipulation* category for more details about collapsing FASTA files. + +----- + +This tool is based on `FASTX-toolkit`__ by Assaf Gordon. + + .. __: http://hannonlab.cshl.edu/fastx_toolkit/ + +</help> +</tool> +<!-- FASTX-Uncollapser is part of the FASTX-toolkit, by A.Gordon (gordon@cshl.edu) --> |