changeset 0:2e150ed1b76e draft

Uploaded
author devteam
date Wed, 29 Apr 2015 12:06:47 -0400
parents
children 0fb894bd8eba
files cd_hit_dup.xml test-data/cd-hit-dup_in.fastqsanger test-data/cd-hit-dup_out.dup_clusters.tabular test-data/cd-hit-dup_out.fastqsanger test-data/cd-hit-dup_out_chimera.chimeric_clusters.tabular test-data/cd-hit-dup_out_chimera.dup_clusters.tabular test-data/cd-hit-dup_out_chimera.fastqsanger tool_dependencies.xml
diffstat 8 files changed, 229 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cd_hit_dup.xml	Wed Apr 29 12:06:47 2015 -0400
@@ -0,0 +1,135 @@
+<tool id="cd_hit_dup" name="cd-hit-dup" version="0.0.1">
+    <requirements>
+        <requirement type="package" version="0.5-2012-03-07-fix-dan-gh-0.0.1">cd-hit-auxtools</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
+
+    <command><![CDATA[
+        cd-hit-dup
+        -i "${ fastq_input.fastq_input1 }"
+        #if str( $fastq_input.fastq_input_selector ) == "paired":
+            -i2 "${ fastq_input.fastq_input2 }"
+        #elif str( $fastq_input.filter_chimeras.filter_chimeras_selector ) == "true":
+            -f "true"
+            -s "${ fastq_input.filter_chimeras.min_chimeric_length }"
+            -a "${ fastq_input.filter_chimeras.abundance_cutoff }"
+            -b "${ fastq_input.filter_chimeras.abundance_ratio }"
+            -p "${ fastq_input.filter_chimeras.dissimilarity_control }"
+        #end if
+        -u "${ prefix_length }"
+        -m "${ match_length }"
+        #if str( $mismatches_allowed ) != "":
+            #if float( str( $mismatches_allowed ) ) == int( float( str( $mismatches_allowed ) ) ):
+                -e "${ int( float( str( $mismatches_allowed ) ) ) }"
+            #else:
+                -e "${ mismatches_allowed }"
+            #end if
+        #end if
+        -d "${ description_length }"
+        -o "output"
+    ]]>
+    </command>
+    <inputs>
+        <conditional name="fastq_input">
+            <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="For joined Paired-end reads choose Single.">
+                <option value="paired">Paired</option>
+                <option value="single" selected="True">Single</option>
+            </param>
+            <when value="paired">
+                <param name="fastq_input1" type="data" format="fastqsanger,fasta" label="Select first set of reads" help="Specify dataset with forward reads"/>
+                <param name="fastq_input2" type="data" format="fastqsanger,fasta" label="Select second set of reads" help="Specify dataset with reverse reads"/>
+            </when>
+            <when value="single">
+                <param name="fastq_input1" type="data" format="fastqsanger,fasta" label="Select read dataset" help="Specify dataset with single reads"/>
+                <conditional name="filter_chimeras">
+                    <param name="filter_chimeras_selector" type="select" label="Filter out chimeric clusters">
+                        <option value="true">Yes</option>
+                        <option value="false" selected="True">No</option>
+                    </param>
+                    <when value="true">
+                        <param name="min_chimeric_length" type="integer" value="30" min="20" label="Minimum length of common sequence shared between a chimeric read and each of its parents" help="-s"/>
+                        <param name="abundance_cutoff" type="integer" value="1" min="1" label="Abundance cutoff" help="-a; Tool Author recommend default of 2, but this would require the chimera itself to need 2 copies"/>
+                        <param name="abundance_ratio" type="integer" value="1" min="1" label="Abundance ratio between a parent read and a chimeric read" help="-b"/>
+                        <param name="dissimilarity_control" type="integer" value="1" min="1" label="Dissimilarity control for chimeric filtering" help="-p"/>
+                    </when>
+                    <when value="false">
+                        <!-- do nothing here -->
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+        <param name="prefix_length" type="integer" value="0" min="0" label="Length of prefix to be used in the analysis" help="-u"/>
+        <param name="match_length" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Match length" help="-m; specifies whether the lengths of two reads should be exactly the same to be considered as duplicates. "/>
+        <param name="mismatches_allowed" type="float" optional="True" value="" min="0" label="Maximum number/percent of mismatches allowed" help="-e"/>
+        <param name="description_length" type="integer" value="0" min="0" label="Description length" help="-d; 0 means truncate at the first whitespace character"/>
+    </inputs>
+    <outputs>
+        <data format="fastqsanger" format_source="fastq_input1" name="output_reads" label="${tool.name} on ${on_string} (filtered reads)" from_work_dir="output"/>
+        <data format="tabular" name="output_duplicate_clusters" label="${tool.name} on ${on_string} (duplicate clusters)" from_work_dir="output.clstr"/>
+        <data format="tabular" name="output_chimeric_clusters" label="${tool.name} on ${on_string} (chimeric clusters)" from_work_dir="output2.clstr">
+            <filter>str( fastq_input['filter_chimeras']['filter_chimeras_selector'] ) == "true"</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="fastq_input|fastq_input_selector" value="single" />
+            <param name="fastq_input|fastq_input1" ftype="fastqsanger" value="cd-hit-dup_in.fastqsanger"/>
+            <output name="output_reads" ftype="fastqsanger" file="cd-hit-dup_out.fastqsanger" />
+            <output name="output_duplicate_clusters" ftype="tabular" file="cd-hit-dup_out.dup_clusters.tabular" />
+        </test>
+        <test>
+            <param name="fastq_input|fastq_input_selector" value="single" />
+            <param name="fastq_input|fastq_input1" ftype="fastqsanger" value="cd-hit-dup_in.fastqsanger"/>
+            <param name="fastq_input|filter_chimeras|filter_chimeras_selector" value="true"/>
+            <output name="output_reads" ftype="fastqsanger" file="cd-hit-dup_out_chimera.fastqsanger" />
+            <output name="output_duplicate_clusters" ftype="tabular" file="cd-hit-dup_out_chimera.dup_clusters.tabular" />
+            <output name="output_chimeric_clusters" ftype="tabular" file="cd-hit-dup_out_chimera.chimeric_clusters.tabular" />
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+        **What it does**
+
+        cd-hit-dup is a simple tool for removing duplicates from sequencing reads, with optional step to detect and remove chimeric reads. A number of options are provided to tune how the duplicates are removed.
+
+
+        **Options**
+
+            +--------+-------------------------------------------------------------------------------------------------------------------+
+            | Option | Description                                                                                                       |
+            +========+===================================================================================================================+
+            |-i      | Input file                                                                                                        |
+            +--------+-------------------------------------------------------------------------------------------------------------------+
+            |-i2     | Second input file                                                                                                 |
+            +--------+-------------------------------------------------------------------------------------------------------------------+
+            |-o      | Output file                                                                                                       |
+            +--------+-------------------------------------------------------------------------------------------------------------------+
+            |-d      | Description length (default 0, truncate at the first whitespace character)                                        |
+            +--------+-------------------------------------------------------------------------------------------------------------------+
+            |-u      | Length of prefix to be used in the analysis (default 0, for full/maximum length)                                  |
+            +--------+-------------------------------------------------------------------------------------------------------------------+
+            |-m      | Match length (true/false, default true)                                                                           |
+            +--------+-------------------------------------------------------------------------------------------------------------------+
+            |-e      | Maximum number/percent of mismatches allowed                                                                      |
+            +--------+-------------------------------------------------------------------------------------------------------------------+
+            |-f      | Filter out chimeric clusters (true/false, default false)                                                          |
+            +--------+-------------------------------------------------------------------------------------------------------------------+
+            |-s      | Minimum length of common sequence shared between a chimeric read and each of its parents (default 30, minimum 20) |
+            +--------+-------------------------------------------------------------------------------------------------------------------+
+            |-a      | Abundance cutoff (default 1 without chimeric filtering, 2 with chimeric filtering)                                |
+            +--------+-------------------------------------------------------------------------------------------------------------------+
+            |-b      | Abundance ratio between a parent read and a chimeric read (default 1)                                             |
+            +--------+-------------------------------------------------------------------------------------------------------------------+
+            |-p      | Dissimilarity control for chimeric filtering (default 1)                                                          |
+            +--------+-------------------------------------------------------------------------------------------------------------------+
+
+
+        ]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/bts565</citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cd-hit-dup_in.fastqsanger	Wed Apr 29 12:06:47 2015 -0400
@@ -0,0 +1,28 @@
+@A
+CCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGCGGGATGGAGGCCTTCGGGTTGTAAACCGCTTTTGATCGGGAGCAAGCCCTTCGGGGTGAGTGTACCCTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGCCCATCGCTTAACGGTGGGTCTGCGCCGGGTACGGGCGGGCTGGAGTGCGGTAGGGGAGACTGGAATTGCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCGCTGGGCCGTCACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCGGGTAGTA
++
+>AAAAAADADBBGGGGGGGGGGFGHHHHHGFHGHHHHHHGGGGEHFHHHHFHHHHGGEFFEFCGGGGGCGGGGGBCF?FHHHHGGHGGGHFHHHCDGGGHHFGHGGGGGGHHHEGEFHHH?DDDG.CGHBGFHHGGGG.;;.BFGFFGGAA?ABFB9FDAFEF/FFFAFFFFDAA.:FBBAB/AAE9ADFFFFDFBFFBFFFB;AF;FFF@EFFFBFFE>AEFFFFFE@B@@B-E@A?DFA-AACF/CAFFAB9------FFA9BAFBA-FEA9;@@A9-.C;9-A-@A@;AEF;FHHCCC@@<GEHGHHHGF111GF<>--//?G?1F/?</HGDG>GF1G>HGHGB/B//GG2F2GE??>111F/GCGHGEF0BEE>//0//E>///F12HFGGGGHGHHGAGCGGHGGEEEE0GGGHGGGGHHGGGGGBGGGF?ADDAFFFAA>11
+@A2
+CCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGCGGGATGGAGGCCTTCGGGTTGTAAACCGCTTTTGATCGGGAGCAAGCCCTTCGGGGTGAGTGTACCCTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGCCCATCGCTTAACGGTGGGTCTGCGCCGGGTACGGGCGGGCTGGAGTGCGGTAGGGGAGACTGGAATTGCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCGCTGGGCCGTCACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCGGGTAGTA
++
+>AAAAAADADBBGGGGGGGGGGFGHHHHHGFHGHHHHHHGGGGEHFHHHHFHHHHGGEFFEFCGGGGGCGGGGGBCF?FHHHHGGHGGGHFHHHCDGGGHHFGHGGGGGGHHHEGEFHHH?DDDG.CGHBGFHHGGGG.;;.BFGFFGGAA?ABFB9FDAFEF/FFFAFFFFDAA.:FBBAB/AAE9ADFFFFDFBFFBFFFB;AF;FFF@EFFFBFFE>AEFFFFFE@B@@B-E@A?DFA-AACF/CAFFAB9------FFA9BAFBA-FEA9;@@A9-.C;9-A-@A@;AEF;FHHCCC@@<GEHGHHHGF111GF<>--//?G?1F/?</HGDG>GF1G>HGHGB/B//GG2F2GE??>111F/GCGHGEF0BEE>//0//E>///F12HFGGGGHGHHGAGCGGHGGEEEE0GGGHGGGGHHGGGGGBGGGF?ADDAFFFAA>11
+@B
+CCTACGGGCGGCAGCAGTGGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCGACGCCGCGTGAAGGACGACGTATTTCGGTATGTAAACTTCTATCAGCAAGGAAGATGATGACGGTACTTGACTAAGAAGCCCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGGCGGCCATGCAAGTCAGAAGTGAAAGCCCGGGGCTCAACCCCGGGACTGCTTTTGAAACTGTAAGGCTAGATTGCCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGGTGAATGACGCTGAGGCTCGGAAGCGTGGGGAGCAAACAGGATTAGATACCCCAGTAGTA
++
+>>AAAA>DBBBBEGGGGGGGGGGGHHHHHHHHHHHHHHGGGGHHHGHHHHHHHHHGGGGGGGGGGGGGGHHHGGGGDHHHHHHGGHGHHHHHHHHHHHHHHHHFHHGHHHHHHHHHHHGGGHGHHHHHHHHHHHHHHFGGGGGGGGGFGGGGGGGGGGGGGBBBBFFFFFFFFFFFFFBBBFFFFFFFFFFFFFFFFFFFFFFBDBFFFFFFFFFFFFBFFFFBFBFFFFFFFFFFFFFFFFGFFFFFGGGFFE@A@CCGCHGHGFFG/GD00HHGGDBCBHDCFFFG><-?C<?GHGHHHGGGGGHHGGGD1CF0FGBC/CCAGGFGGEE/GFGBHGF2F1BHFHHHHGHBGEFFGGGFEHGGEECFD12BGFGGGHGHGDFHHGGGGHGHHGGGGGGHGGFGGGHGGGHHFHHFHFGGGGCFGGGGADA@BFFAA>11
+@B2
+CCTACGGGCGGCAGCAGTGGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCGACGCCGCGTGAAGGACGACGTATTTCGGTATGTAAACTTCTATCAGCAAGGAAGATGATGACGGTACTTGACTAAGAAGCCCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGGCGGCCATGCAAGTCAGAAGTGAAAGCCCGGGGCTCAACCCCGGGACTGCTTTTGAAACTGTAAGGCTAGATTGCCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGGTGAATGACGCTGAGGCTCGGAAGCGTGGGGAGCAAACAGGATTAGATACCCCAGTAGTA
++
+>>AAAA>DBBBBEGGGGGGGGGGGHHHHHHHHHHHHHHGGGGHHHGHHHHHHHHHGGGGGGGGGGGGGGHHHGGGGDHHHHHHGGHGHHHHHHHHHHHHHHHHFHHGHHHHHHHHHHHGGGHGHHHHHHHHHHHHHHFGGGGGGGGGFGGGGGGGGGGGGGBBBBFFFFFFFFFFFFFBBBFFFFFFFFFFFFFFFFFFFFFFBDBFFFFFFFFFFFFBFFFFBFBFFFFFFFFFFFFFFFFGFFFFFGGGFFE@A@CCGCHGHGFFG/GD00HHGGDBCBHDCFFFG><-?C<?GHGHHHGGGGGHHGGGD1CF0FGBC/CCAGGFGGEE/GFGBHGF2F1BHFHHHHGHBGEFFGGGFEHGGEECFD12BGFGGGHGHGDFHHGGGGHGHHGGGGGGHGGFGGGHGGGHHFHHFHFGGGGCFGGGGADA@BFFAA>11
+@C
+CCTACGGGCGGCTGCAGTGGGGAATCTTCCGCAATGGACGAAAGTCTGACGGAGCAACGCCGCGTGAGTGATGACGGCCTTCGGGTTGTAAAGCTCTGTTAATCGGGACGAAAGGCCTTCTTGCGAATAGTGAGAAGGATTGACGGTACCGGAATAGAAAGCCACGGCTAGCTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGATAGGTCAGTCTGTCTTAAAAGTTCGGGGCTTAACCCCGTGATGGGATGGAAACTGCCAATCTAGAGTATCGGAGAGGAAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAAGAACACCAGTGGCGAAGGCGACTTTCTGGACGAAAACTGACGCTGAGGCGCGAAAGCCAGGGGAGCGAACGGGATTAGATACCCGGGTAGTC
++
+ABBCCDCCCCCCGGGGGGGGGGGGHHHHHHGGGGGHHHHGGGGGHHHHHHGGGGGHHHGGGGGGGGGGHHHHHHHGGGGGHHHGGHGGGHHHHHHHHHHHHHHHGGGGGGGGGGHHHHHHHHHFG-DGGGGGGGGGGGGGGGFGGGGGGGGGGGFFFFFFFFFFFFFFFFFFBFFFFFFFHFFFFFFFFFFFFFFFHFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF/B/FBFFGFBGFFGGGGFFAGGGGGGGFCGCDAGCGBHHHFFHHHHHGGGHFGD0HHHHHHGGGGGHHHHHHHHHEHHHHHFHHGHHHHHGGGGGHHHHHGGGGGHHHHHHHHGHHHHFHGHGFHHHGGGGHHGGGGGFGGHHHHGGGHGHHGHHGGGGGHHHGGGGGGGHHHHHHGGGGGHGGGHHHFFGGGGGGGGGBBBBFFFBBBA3
+@D
+CCTACGGGTGGCTGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGGAGGCCTTCGGGTTGTAAACCTCTTTTGTTAGGGAGCAAGGCACTTTGTGTTGAGTGTACCTTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCCGCGCCGGGTACGGGCGGGCTTGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCGAGTAGTC
++
+DDDDDDDCCCCFGGGGGGGGGGGGHHHHHHHCHHHHHHHGGGGGHGHHHHHHHHHGGGGGGGGGGGGGHGGHGGGHGGGHHHHGGHGGGHGHHHHHHHHHHHHHHHHHGGHHHGHHHHHGHHHFGHHHHHHHHHHHHHGGFGGHHHFHHGGGGGHHHGGGGGGGFFGGGGGGADFFFEFFFFFFFDFBAFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFDFFFFFF@DFFFFFFFFFFFFFFFFFFFFBDA9.;FFFFFFFFFFFDC@??D9?-?GGGGGGGGFGHEFGGHHGGFCFHHGHFFHHHHHGHHHHGCGGGCFHHGGC?GHHHHGHHHHEHHHHEGEEDFHHHHHGHGHHGF1GGGGHHHHHF3FHFG1HGGGGGEFFHHGGGGGHHHHHGGFGFHGGFEGGGGGGHGGGHHHHFGGGGGGGGGBBBBFBFBBBBB
+@chimeric_read
+CCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGCGGGATGGAGGCCTTCGGGTTGTAAACCGCTTTTGATCGGGAGCAAGCCCTTCGGGGTGAGTGTACCCTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCCATGCAAGTCAGAAGTGAAAGCCCGGGGCTCAACCCCGGGACTGCTTTTGAAACTGTAAGGCTAGATTGCCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGGTGAATGACGCTGAGGCTCGGAAGCGTGGGGAGCAAACAGGATTAGATACCCCAGTAGTA
++
+FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cd-hit-dup_out.dup_clusters.tabular	Wed Apr 29 12:06:47 2015 -0400
@@ -0,0 +1,12 @@
+>Cluster 0
+0	465nt, >C... *
+>Cluster 1
+0	450nt, >D... *
+>Cluster 2
+0	449nt, >A... *
+1	449nt, >A2... at 1:449:1:449/+/100.00%
+>Cluster 3
+0	440nt, >B... *
+1	440nt, >B2... at 1:440:1:440/+/100.00%
+>Cluster 4
+0	440nt, >chimeric_read... *
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cd-hit-dup_out.fastqsanger	Wed Apr 29 12:06:47 2015 -0400
@@ -0,0 +1,20 @@
+@C
+CCTACGGGCGGCTGCAGTGGGGAATCTTCCGCAATGGACGAAAGTCTGACGGAGCAACGCCGCGTGAGTGATGACGGCCTTCGGGTTGTAAAGCTCTGTTAATCGGGACGAAAGGCCTTCTTGCGAATAGTGAGAAGGATTGACGGTACCGGAATAGAAAGCCACGGCTAGCTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGATAGGTCAGTCTGTCTTAAAAGTTCGGGGCTTAACCCCGTGATGGGATGGAAACTGCCAATCTAGAGTATCGGAGAGGAAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAAGAACACCAGTGGCGAAGGCGACTTTCTGGACGAAAACTGACGCTGAGGCGCGAAAGCCAGGGGAGCGAACGGGATTAGATACCCGGGTAGTC
++C
+ABBCCDCCCCCCGGGGGGGGGGGGHHHHHHGGGGGHHHHGGGGGHHHHHHGGGGGHHHGGGGGGGGGGHHHHHHHGGGGGHHHGGHGGGHHHHHHHHHHHHHHHGGGGGGGGGGHHHHHHHHHFG-DGGGGGGGGGGGGGGGFGGGGGGGGGGGFFFFFFFFFFFFFFFFFFBFFFFFFFHFFFFFFFFFFFFFFFHFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF/B/FBFFGFBGFFGGGGFFAGGGGGGGFCGCDAGCGBHHHFFHHHHHGGGHFGD0HHHHHHGGGGGHHHHHHHHHEHHHHHFHHGHHHHHGGGGGHHHHHGGGGGHHHHHHHHGHHHHFHGHGFHHHGGGGHHGGGGGFGGHHHHGGGHGHHGHHGGGGGHHHGGGGGGGHHHHHHGGGGGHGGGHHHFFGGGGGGGGGBBBBFFFBBBA3
+@D
+CCTACGGGTGGCTGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGGAGGCCTTCGGGTTGTAAACCTCTTTTGTTAGGGAGCAAGGCACTTTGTGTTGAGTGTACCTTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCCGCGCCGGGTACGGGCGGGCTTGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCGAGTAGTC
++D
+DDDDDDDCCCCFGGGGGGGGGGGGHHHHHHHCHHHHHHHGGGGGHGHHHHHHHHHGGGGGGGGGGGGGHGGHGGGHGGGHHHHGGHGGGHGHHHHHHHHHHHHHHHHHGGHHHGHHHHHGHHHFGHHHHHHHHHHHHHGGFGGHHHFHHGGGGGHHHGGGGGGGFFGGGGGGADFFFEFFFFFFFDFBAFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFDFFFFFF@DFFFFFFFFFFFFFFFFFFFFBDA9.;FFFFFFFFFFFDC@??D9?-?GGGGGGGGFGHEFGGHHGGFCFHHGHFFHHHHHGHHHHGCGGGCFHHGGC?GHHHHGHHHHEHHHHEGEEDFHHHHHGHGHHGF1GGGGHHHHHF3FHFG1HGGGGGEFFHHGGGGGHHHHHGGFGFHGGFEGGGGGGHGGGHHHHFGGGGGGGGGBBBBFBFBBBBB
+@A
+CCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGCGGGATGGAGGCCTTCGGGTTGTAAACCGCTTTTGATCGGGAGCAAGCCCTTCGGGGTGAGTGTACCCTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGCCCATCGCTTAACGGTGGGTCTGCGCCGGGTACGGGCGGGCTGGAGTGCGGTAGGGGAGACTGGAATTGCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCGCTGGGCCGTCACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCGGGTAGTA
++A
+>AAAAAADADBBGGGGGGGGGGFGHHHHHGFHGHHHHHHGGGGEHFHHHHFHHHHGGEFFEFCGGGGGCGGGGGBCF?FHHHHGGHGGGHFHHHCDGGGHHFGHGGGGGGHHHEGEFHHH?DDDG.CGHBGFHHGGGG.;;.BFGFFGGAA?ABFB9FDAFEF/FFFAFFFFDAA.:FBBAB/AAE9ADFFFFDFBFFBFFFB;AF;FFF@EFFFBFFE>AEFFFFFE@B@@B-E@A?DFA-AACF/CAFFAB9------FFA9BAFBA-FEA9;@@A9-.C;9-A-@A@;AEF;FHHCCC@@<GEHGHHHGF111GF<>--//?G?1F/?</HGDG>GF1G>HGHGB/B//GG2F2GE??>111F/GCGHGEF0BEE>//0//E>///F12HFGGGGHGHHGAGCGGHGGEEEE0GGGHGGGGHHGGGGGBGGGF?ADDAFFFAA>11
+@B
+CCTACGGGCGGCAGCAGTGGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCGACGCCGCGTGAAGGACGACGTATTTCGGTATGTAAACTTCTATCAGCAAGGAAGATGATGACGGTACTTGACTAAGAAGCCCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGGCGGCCATGCAAGTCAGAAGTGAAAGCCCGGGGCTCAACCCCGGGACTGCTTTTGAAACTGTAAGGCTAGATTGCCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGGTGAATGACGCTGAGGCTCGGAAGCGTGGGGAGCAAACAGGATTAGATACCCCAGTAGTA
++B
+>>AAAA>DBBBBEGGGGGGGGGGGHHHHHHHHHHHHHHGGGGHHHGHHHHHHHHHGGGGGGGGGGGGGGHHHGGGGDHHHHHHGGHGHHHHHHHHHHHHHHHHFHHGHHHHHHHHHHHGGGHGHHHHHHHHHHHHHHFGGGGGGGGGFGGGGGGGGGGGGGBBBBFFFFFFFFFFFFFBBBFFFFFFFFFFFFFFFFFFFFFFBDBFFFFFFFFFFFFBFFFFBFBFFFFFFFFFFFFFFFFGFFFFFGGGFFE@A@CCGCHGHGFFG/GD00HHGGDBCBHDCFFFG><-?C<?GHGHHHGGGGGHHGGGD1CF0FGBC/CCAGGFGGEE/GFGBHGF2F1BHFHHHHGHBGEFFGGGFEHGGEECFD12BGFGGGHGHGDFHHGGGGHGHHGGGGGGHGGFGGGHGGGHHFHHFHFGGGGCFGGGGADA@BFFAA>11
+@chimeric_read
+CCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGCGGGATGGAGGCCTTCGGGTTGTAAACCGCTTTTGATCGGGAGCAAGCCCTTCGGGGTGAGTGTACCCTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCCATGCAAGTCAGAAGTGAAAGCCCGGGGCTCAACCCCGGGACTGCTTTTGAAACTGTAAGGCTAGATTGCCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGGTGAATGACGCTGAGGCTCGGAAGCGTGGGGAGCAAACAGGATTAGATACCCCAGTAGTA
++chimeric_read
+FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cd-hit-dup_out_chimera.chimeric_clusters.tabular	Wed Apr 29 12:06:47 2015 -0400
@@ -0,0 +1,2 @@
+>Cluster 0 chimeric_parent1=0,chimeric_parent2=1
+0	440nt, >chimeric_read... *
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cd-hit-dup_out_chimera.dup_clusters.tabular	Wed Apr 29 12:06:47 2015 -0400
@@ -0,0 +1,10 @@
+>Cluster 0
+0	449nt, >A... *
+1	449nt, >A2... at 1:449:1:449/+/100.00%
+>Cluster 1
+0	440nt, >B... *
+1	440nt, >B2... at 1:440:1:440/+/100.00%
+>Cluster 2
+0	465nt, >C... *
+>Cluster 3
+0	450nt, >D... *
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cd-hit-dup_out_chimera.fastqsanger	Wed Apr 29 12:06:47 2015 -0400
@@ -0,0 +1,16 @@
+@A
+CCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGCGGGATGGAGGCCTTCGGGTTGTAAACCGCTTTTGATCGGGAGCAAGCCCTTCGGGGTGAGTGTACCCTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGCCCATCGCTTAACGGTGGGTCTGCGCCGGGTACGGGCGGGCTGGAGTGCGGTAGGGGAGACTGGAATTGCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCGCTGGGCCGTCACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCGGGTAGTA
++A
+>AAAAAADADBBGGGGGGGGGGFGHHHHHGFHGHHHHHHGGGGEHFHHHHFHHHHGGEFFEFCGGGGGCGGGGGBCF?FHHHHGGHGGGHFHHHCDGGGHHFGHGGGGGGHHHEGEFHHH?DDDG.CGHBGFHHGGGG.;;.BFGFFGGAA?ABFB9FDAFEF/FFFAFFFFDAA.:FBBAB/AAE9ADFFFFDFBFFBFFFB;AF;FFF@EFFFBFFE>AEFFFFFE@B@@B-E@A?DFA-AACF/CAFFAB9------FFA9BAFBA-FEA9;@@A9-.C;9-A-@A@;AEF;FHHCCC@@<GEHGHHHGF111GF<>--//?G?1F/?</HGDG>GF1G>HGHGB/B//GG2F2GE??>111F/GCGHGEF0BEE>//0//E>///F12HFGGGGHGHHGAGCGGHGGEEEE0GGGHGGGGHHGGGGGBGGGF?ADDAFFFAA>11
+@B
+CCTACGGGCGGCAGCAGTGGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCGACGCCGCGTGAAGGACGACGTATTTCGGTATGTAAACTTCTATCAGCAAGGAAGATGATGACGGTACTTGACTAAGAAGCCCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGGCGGCCATGCAAGTCAGAAGTGAAAGCCCGGGGCTCAACCCCGGGACTGCTTTTGAAACTGTAAGGCTAGATTGCCGGAGAGGTAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAACACCAGTGGCGAAGGCGGCTTACTGGACGGTGAATGACGCTGAGGCTCGGAAGCGTGGGGAGCAAACAGGATTAGATACCCCAGTAGTA
++B
+>>AAAA>DBBBBEGGGGGGGGGGGHHHHHHHHHHHHHHGGGGHHHGHHHHHHHHHGGGGGGGGGGGGGGHHHGGGGDHHHHHHGGHGHHHHHHHHHHHHHHHHFHHGHHHHHHHHHHHGGGHGHHHHHHHHHHHHHHFGGGGGGGGGFGGGGGGGGGGGGGBBBBFFFFFFFFFFFFFBBBFFFFFFFFFFFFFFFFFFFFFFBDBFFFFFFFFFFFFBFFFFBFBFFFFFFFFFFFFFFFFGFFFFFGGGFFE@A@CCGCHGHGFFG/GD00HHGGDBCBHDCFFFG><-?C<?GHGHHHGGGGGHHGGGD1CF0FGBC/CCAGGFGGEE/GFGBHGF2F1BHFHHHHGHBGEFFGGGFEHGGEECFD12BGFGGGHGHGDFHHGGGGHGHHGGGGGGHGGFGGGHGGGHHFHHFHFGGGGCFGGGGADA@BFFAA>11
+@C
+CCTACGGGCGGCTGCAGTGGGGAATCTTCCGCAATGGACGAAAGTCTGACGGAGCAACGCCGCGTGAGTGATGACGGCCTTCGGGTTGTAAAGCTCTGTTAATCGGGACGAAAGGCCTTCTTGCGAATAGTGAGAAGGATTGACGGTACCGGAATAGAAAGCCACGGCTAGCTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGATAGGTCAGTCTGTCTTAAAAGTTCGGGGCTTAACCCCGTGATGGGATGGAAACTGCCAATCTAGAGTATCGGAGAGGAAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAAGAACACCAGTGGCGAAGGCGACTTTCTGGACGAAAACTGACGCTGAGGCGCGAAAGCCAGGGGAGCGAACGGGATTAGATACCCGGGTAGTC
++C
+ABBCCDCCCCCCGGGGGGGGGGGGHHHHHHGGGGGHHHHGGGGGHHHHHHGGGGGHHHGGGGGGGGGGHHHHHHHGGGGGHHHGGHGGGHHHHHHHHHHHHHHHGGGGGGGGGGHHHHHHHHHFG-DGGGGGGGGGGGGGGGFGGGGGGGGGGGFFFFFFFFFFFFFFFFFFBFFFFFFFHFFFFFFFFFFFFFFFHFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF/B/FBFFGFBGFFGGGGFFAGGGGGGGFCGCDAGCGBHHHFFHHHHHGGGHFGD0HHHHHHGGGGGHHHHHHHHHEHHHHHFHHGHHHHHGGGGGHHHHHGGGGGHHHHHHHHGHHHHFHGHGFHHHGGGGHHGGGGGFGGHHHHGGGHGHHGHHGGGGGHHHGGGGGGGHHHHHHGGGGGHGGGHHHFFGGGGGGGGGBBBBFFFBBBA3
+@D
+CCTACGGGTGGCTGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGGAGGCCTTCGGGTTGTAAACCTCTTTTGTTAGGGAGCAAGGCACTTTGTGTTGAGTGTACCTTTCGAATAAGCACCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTATCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTCGTCGCGTCCGGTGTGAAAGTCCATCGCTTAACGGTGGATCCGCGCCGGGTACGGGCGGGCTTGAGTGCGGTAGGGGAGACTGGAATTCCCGGTGTAACGGTGGAATGTGTAGATATCGGGAAGAACACCAATGGCGAAGGCAGGTCTCTGGGCCGTTACTGACGCTGAGGAGCGAAAGCGTGGGGAGCGAACAGGATTAGATACCCGAGTAGTC
++D
+DDDDDDDCCCCFGGGGGGGGGGGGHHHHHHHCHHHHHHHGGGGGHGHHHHHHHHHGGGGGGGGGGGGGHGGHGGGHGGGHHHHGGHGGGHGHHHHHHHHHHHHHHHHHGGHHHGHHHHHGHHHFGHHHHHHHHHHHHHGGFGGHHHFHHGGGGGHHHGGGGGGGFFGGGGGGADFFFEFFFFFFFDFBAFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFDFFFFFF@DFFFFFFFFFFFFFFFFFFFFBDA9.;FFFFFFFFFFFDC@??D9?-?GGGGGGGGFGHEFGGHHGGFCFHHGHFFHHHHHGHHHHGCGGGCFHHGGC?GHHHHGHHHHEHHHHEGEEDFHHHHHGHGHHGF1GGGGHHHHHF3FHFG1HGGGGGEFFHHGGGGGHHHHHGGFGFHGGFEGGGGGGHGGGHHHHFGGGGGGGGGBBBBFBFBBBBB
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Wed Apr 29 12:06:47 2015 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="cd-hit-auxtools" version="0.5-2012-03-07-fix-dan-gh-0.0.1">
+      <repository changeset_revision="d0db9d974498" name="package_cd_hit_auxtools_0_5_2012_03_07_fix_dan" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>