Repository 'sixgill'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/sixgill

Changeset 0:cf8eee8343fb (2016-10-13)
Commit message:
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/sixgill commit 547a3bb05a08bc4eaed224b6864a82434e09289d-dirty
added:
macros.xml
sixgill_build.xml
sixgill_filter.xml
sixgill_makefasta.xml
sixgill_merge.xml
test-data/metagene_nometagene_merged.metapeptides.tsv
test-data/metagene_nometagene_merged.min2reads.metapeptides.tsv
test-data/metagene_output.txt
test-data/small.fq
test-data/testdb_metagene.metapeptides.fasta
test-data/testdb_metagene.metapeptides.tsv
test-data/testdb_nometagene.metapeptides.tsv
b
diff -r 000000000000 -r cf8eee8343fb macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Oct 13 08:38:04 2016 -0400
b
@@ -0,0 +1,20 @@
+<macros>
+    <token name="@VERSION@">0.2.4</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="0.2.4">sixgill</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" />
+        </stdio>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1021/acs.jproteome.6b00239</citation>
+            <yield />
+        </citations>
+    </xml>
+</macros>
b
diff -r 000000000000 -r cf8eee8343fb sixgill_build.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sixgill_build.xml Thu Oct 13 08:38:04 2016 -0400
[
@@ -0,0 +1,157 @@
+<tool id="sixgill_build" name="sixgill build" version="@VERSION@.0">
+    <description>a metapeptide database from metagenome fastq files</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <version_command>sixgill_build --version</version_command>
+    <command><![CDATA[
+        sixgill_build 
+            --nogzipout
+            --out=metapeptides_db_output.tsv
+            #if 'fa' in str($output_choice):
+                --outfasta=metapeptides_fa_output.fa
+            #end if
+            #if str($sec_filter.minlength):
+                --minlength=$sec_filter.minlength
+            #end if
+            #if str($sec_filter.minqualscore):
+                --minqualscore=$sec_filter.minqualscore
+            #end if
+            #if str($sec_filter.minorflength):
+                --minorflength=$sec_filter.minorflength
+            #end if
+            #if str($sec_filter.minlongesttryppeplen):
+                --minlongesttryppeplen=$sec_filter.minlongesttryppeplen
+            #end if
+            #if str($sec_filter.minreadcount):
+                --minreadcount=$sec_filter.minreadcount
+            #end if
+            #if str($sec_filter.maxreads):
+                --maxreads=$sec_filter.maxreads
+            #end if
+            #if $sec_mg.metagenefile:
+                --metagenefile="$sec_mg.metagenefile"    
+                #if $sec_mg.minmetagenescore:
+                    --minmetagenescore=$sec_mg.minmetagenescore
+                #end if
+            #end if
+            #for $i, $fastqfile in enumerate($fastqfiles):
+              "$fastqfile"
+            #end for
+    ]]></command>
+    <inputs>
+        <param name="fastqfiles" type="data" format="fastq" multiple="true" optional="false" label="metagenomic fastq files" 
+         help=""/>
+        <section name="sec_filter" expanded="false" title="filter">
+            <param name="minlength" type="integer" value="10" min="0" optional="true" label="minlength"
+                help="min AA length of a metapeptide"/>
+            <param name="minqualscore" type="integer" value="30" min="0" optional="true" label="minqualscore"
+                help="min base-call phred score across any NT in a metapeptide"/>
+            <param name="minorflength" type="integer" value="40" min="0" optional="true" label="minorflength"
+                help="min length of ORF-portion"/>
+            <param name="minlongesttryppeplen" type="integer" value="7" min="0" optional="true" label="minlongesttryppeplen"
+                help="minimum length of the longest tryptic peptide"/>
+            <param name="minreadcount" type="integer" value="2" min="1" optional="true" label="minreadcount"
+                help="minimum read count"/>
+            <param name="maxreads" type="integer" value="" optional="true" label="maxreads"
+                help="stop early if we hit this many reads"/>
+        </section>
+        <section name="sec_mg" expanded="false" title="MetaGene Annotator">
+            <param name="metagenefile" type="data" format="txt" optional="true" label="metagenefile"
+                help="MetaGene Annotator output file. Records must be in same linear order as reads in fastqfiles"/>
+            <param name="minmetagenescore" type="integer" value="" min="-1" optional="true" label="minmetagenescore"
+                help="minimum MetaGene score"/>
+        </section>
+        <param name="output_choice" type="select" multiple="true" optional="false" label="select outputs">
+            <option value="db" selected="true">metapeptide database</option>
+            <option value="fa">metapeptide protein fasta</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output_db" format="tabular" label="${tool.name} on ${on_string}: metapeptides.tsv" from_work_dir="metapeptides_db_output.tsv">
+            <filter>'db' in output_choice</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="sequence,length,min_qualscore,partial_orf_length,metagene_score,read_ids" />
+            </actions>
+        </data>
+        <data name="output_fa" format="fasta" label="${tool.name} on ${on_string}: metapeptides.fa" from_work_dir="metapeptides_fa_output.fa">
+            <filter>'fa' in output_choice</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="fastqfiles" ftype="fastq" value="small.fq"/>
+            <param name="minreadcount" value="1"/>
+            <param name="output_choice" value="db,fa"/>
+            <output name="output_db">
+                <assert_contents>
+                    <has_text text="DLRILLRERLVAGDSDEAAVDFIVDR" />
+                </assert_contents>
+            </output>
+            <output name="output_fa">
+                <assert_contents>
+                    <has_text text="DLRILLRERLVAGDSDEAAVDFIVDR" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="fastqfiles" ftype="fastq" value="small.fq"/>
+            <param name="minreadcount" value="1"/>
+            <param name="metagenefile" ftype="fastq" value="metagene_output.txt"/>
+            <param name="output_choice" value="db"/>
+            <output name="output_db">
+                <assert_contents>
+                    <has_text text="DLRILLRERLVAGDSDEAAVDFIVDR" />
+                </assert_contents>
+            </output>
+        </test>
+
+    </tests>
+    <help><![CDATA[
+        usage: sixgill_build [-h] [--minlength MINLENGTH]
+                     [--minqualscore MINQUALSCORE]
+                     [--metagenefile METAGENEFILE]
+                     [--minmetagenescore MINMETAGENESCORE]
+                     [--minorflength MINORFLENGTH]
+                     [--minlongesttryppeplen MINLONGESTTRYPPEPLEN]
+                     [--maxreads MAXREADS] [--minreadcount MINREADCOUNT] --out
+                     OUT [--outfasta OUTFASTA] [--debug]
+                     fastqfiles [fastqfiles ...]
+
+Read in one or more fastq files. For each read, do a 6-frame translation and
+add all metapeptides that pass the specified filtering criteria. If
+--metagenefile is specified, start with the output of MetaGene Annotator
+instead of raw reads.
+
+positional arguments:
+  fastqfiles            input fastq file(s), bgzipped
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --minlength MINLENGTH
+                        min AA length of a metapeptide
+  --minqualscore MINQUALSCORE
+                        min base-call phred score across any NT in a
+                        metapeptide
+  --metagenefile METAGENEFILE
+                        input MetaGene Annotator output file. Records must be
+                        in same linear order as reads in fastqfiles
+  --minmetagenescore MINMETAGENESCORE
+                        minimum MetaGene score
+  --minorflength MINORFLENGTH
+                        min length of ORF-portion
+  --minlongesttryppeplen MINLONGESTTRYPPEPLEN
+                        minimum length of the longest tryptic peptide
+  --maxreads MAXREADS   stop early if we hit this many reads
+  --minreadcount MINREADCOUNT
+                        minimum read count
+  --out OUT             Output metapeptide database file
+  --outfasta OUTFASTA   Output metapeptide fasta database file
+  --debug               Enable debug logging
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r cf8eee8343fb sixgill_filter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sixgill_filter.xml Thu Oct 13 08:38:04 2016 -0400
[
@@ -0,0 +1,120 @@
+<tool id="sixgill_filter" name="sixgill filter" version="@VERSION@.0">
+    <description>a metapeptide database</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <version_command>sixgill_filter --version</version_command>
+    <command><![CDATA[
+        sixgill_filter 
+            --nogzipout
+            --out=metapeptides_db_output.tsv
+            #if str($minorflength) != '':
+                --minorflength=$minorflength
+            #end if
+            #if str($minaaseqlength) != '':
+                --minaaseqlength=$minaaseqlength
+            #end if
+            #if str($minqualscore) != '':
+                --minqualscore=$minqualscore
+            #end if
+            #if str($minlongesttryppeplen) != '':
+                --minlongesttryppeplen=$minlongesttryppeplen
+            #end if
+            #if str($minreadcount) != '':
+                --minreadcount=$minreadcount
+            #end if
+            #if str($maxmetapeptides) != '':
+                --maxmetapeptides=$maxmetapeptides
+            #end if
+            $metapeptide_db
+            #if 'fa' in str($output_choice):
+                && sixgill_makefasta --type=aa --out="metapeptides_fa_output.fa" "metapeptides_db_output.tsv"
+            #end if
+    ]]></command>
+    <inputs>
+        <param name="metapeptide_db" type="data" format="tabular" label="metapeptide database" 
+         help="Can be generated with sixgill_build"/>
+        <param name="minorflength" type="integer" value="40" min="0" optional="true" label="minorflength"
+                help="min length of ORF-portion"/>
+        <param name="minaaseqlength" type="integer" value="10" min="0" optional="true" label="minaaseqlength"
+                help="min AA sequence length"/>
+        <param name="minqualscore" type="integer" value="30" min="0" optional="true" label="minqualscore"
+                help="min base-call phred score across any NT in a metapeptide"/>
+        <param name="minlongesttryppeplen" type="integer" value="7" min="0" optional="true" label="minlongesttryppeplen"
+                help="minimum length of the longest tryptic peptide"/>
+        <param name="minreadcount" type="integer" value="2" min="1" optional="true" label="minreadcount"
+                help="minimum read count"/>
+        <param name="minmetagenescore" type="integer" value="" min="-1" optional="true" label="minmetagenescore"
+                help="minimum MetaGene score"/>
+        <param name="maxmetapeptides" type="integer" value="" optional="true" label="maxmetapeptides"
+                help="maximum number of metapeptides to write"/>
+        <param name="output_choice" type="select" multiple="true" optional="false" label="select outputs">
+            <option value="db" selected="true">metapeptide database</option>
+            <option value="fa">metapeptide protein fasta</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output_db" format="tabular" label="${tool.name} on ${on_string}: metapeptides.tsv" from_work_dir="metapeptides_db_output.tsv">
+            <filter>'db' in output_choice</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="sequence,length,min_qualscore,partial_orf_length,metagene_score,read_ids" />
+            </actions>
+        </data>
+        <data name="output_fa" format="fasta" label="${tool.name} on ${on_string}: metapeptides.fa" from_work_dir="metapeptides_fa_output.fa">
+            <filter>'fa' in output_choice</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="metapeptide_db" ftype="tabular" value="metagene_nometagene_merged.metapeptides.tsv"/>
+            <param name="minreadcount" value="2"/>
+            <param name="minaaseqlength" value="12"/>
+            <param name="output_choice" value="db"/>
+            <output name="output_db">
+                <assert_contents>
+                    <has_text text="DLRILLRERLVAGDSDEAAVDFIVDR" />
+                    <not_has_text text="YHNFEGYRWR" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+usage: sixgill_filter [-h] --out OUT [--minorflength MINORFLENGTH]
+                      [--minaaseqlength MINAASEQLENGTH]
+                      [--minreadcount MINREADCOUNT]
+                      [--minqualscore MINQUALSCORE]
+                      [--minlongesttryppeplen MINLONGESTTRYPPEPLEN]
+                      [--minmetagenescore MINMETAGENESCORE]
+                      [--maxmetapeptides MAXMETAPEPTIDES] [--debug]
+                      metapeptidefile
+
+Filter a metapeptide database.
+
+positional arguments:
+  metapeptidefile       input metapeptide database file
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --out OUT             output metapeptide database file
+  --minorflength MINORFLENGTH
+                        minimum ORF length
+  --minaaseqlength MINAASEQLENGTH
+                        minimum AA sequence length
+  --minreadcount MINREADCOUNT
+                        minimum read count
+  --minqualscore MINQUALSCORE
+                        minimum basecall quality
+  --minlongesttryppeplen MINLONGESTTRYPPEPLEN
+                        minimum length of the longest tryptic peptide
+  --minmetagenescore MINMETAGENESCORE
+                        Minimum MetaGene score (-1 for none)
+  --maxmetapeptides MAXMETAPEPTIDES
+                        maximum number of metapeptides to write
+  --debug               Enable debug logging
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r cf8eee8343fb sixgill_makefasta.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sixgill_makefasta.xml Thu Oct 13 08:38:04 2016 -0400
[
@@ -0,0 +1,76 @@
+<tool id="sixgill_makefasta" name="sixgill makefasta" version="@VERSION@.0">
+    <description>from a metapeptide database</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <version_command>sixgill_makefasta --version</version_command>
+    <command><![CDATA[
+        sixgill_makefasta 
+            --out=metapeptides_fa_output.fa
+            --type="$fa_type.fasta_type"
+            #if $fa_type.fasta_type == 'peptide':
+                --missedcleavages=$fa_type.missedcleavages
+                --minpeptidelength=$fa_type.minpeptidelength
+            #end if
+            "$metapeptide_db"
+    ]]></command>
+    <inputs>
+        <param name="metapeptide_db" type="data" format="tabular" label="metapeptide database" 
+         help="Can be generated with sixgill_build"/>
+        <conditional name="fa_type">
+            <param name="fasta_type" type="select" label="fasta type">
+                <help> peptide with specified missed cleavages requires holding
+                       all peptides in the database in memory, as each is only written once
+                </help>
+                <option value="aa" selected="true">aa - amino acid</option>
+                <option value="peptide">peptide - with specified missed cleavages</option>
+            </param>
+            <when value="aa"/>
+            <when value="peptide">
+                <param name="missedcleavages" type="integer" value="0" min="0" max="6" label="missedcleavages"/>
+                <param name="minpeptidelength" type="integer" value="7" min="1" label="minpeptidelength"/>
+            </when>
+        </conditional>
+        
+    </inputs>
+    <outputs>
+        <data name="output_fa" format="fasta" label="${tool.name} on ${on_string}: metapeptide.fa" from_work_dir="metapeptides_fa_output.fa"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="metapeptide_db" ftype="tabular" value="testdb_metagene.metapeptides.tsv"/>
+            <param name="fasta_type" value="aa"/>
+            <output name="output_fa">
+                <assert_contents>
+                    <has_text text="DLRILLRERLVAGDSDEAAVDFIVDR" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+usage: sixgill_makefasta [-h] --out OUT --type {aa,peptide}
+                         [--missedcleavages MISSEDCLEAVAGES]
+                         [--minpeptidelength MINPEPTIDELENGTH] [--debug]
+                         metapeptidedbfile
+
+Build a fasta database from a metapeptide database. Either nucleotide or amino
+acid.
+
+positional arguments:
+  metapeptidedbfile     input metapeptide database file
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --out OUT             output file
+  --type {aa,peptide}   database type
+  --missedcleavages MISSEDCLEAVAGES
+                        missed cleavages (for type peptide only)
+  --minpeptidelength MINPEPTIDELENGTH
+                        minimum peptide length (for type peptide only)
+  --debug               Enable debug logging
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r cf8eee8343fb sixgill_merge.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sixgill_merge.xml Thu Oct 13 08:38:04 2016 -0400
[
@@ -0,0 +1,70 @@
+<tool id="sixgill_merge" name="sixgill merge" version="@VERSION@.0">
+    <description>metapeptide databases</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <version_command>sixgill_merge --version</version_command>
+    <command><![CDATA[
+        sixgill_merge 
+            --nogzipout
+            --out=metapeptides_db_output.tsv
+            #for ($i,$metapeptide_db) in enumerate($metapeptide_dbs)
+                "$metapeptide_db"
+            #end for
+            #if 'fa' in str($output_choice):
+                &&
+                sixgill_makefasta --type=aa --out="metapeptides_fa_output.fa" "metapeptides_db_output.tsv"
+            #end if
+    ]]></command>
+    <inputs>
+        <param name="metapeptide_dbs" type="data" format="tabular" multiple="true" optional="false" label="metapeptide databases" 
+         help="Can be generated with sixgill_build"/>
+        <param name="output_choice" type="select" multiple="true" optional="false" label="select outputs">
+            <option value="db" selected="true">metapeptide database</option>
+            <option value="fa">metapeptide protein fasta</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output_db" format="tabular" label="${tool.name} on ${on_string}: metapeptides.tsv" from_work_dir="metapeptides_db_output.tsv">
+            <filter>'db' in output_choice</filter>
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="sequence,length,min_qualscore,partial_orf_length,metagene_score,read_ids" />
+            </actions>
+        </data>
+        <data name="output_fa" format="fasta" label="${tool.name} on ${on_string}: metapeptides.fa" from_work_dir="metapeptides_fa_output.fa">
+            <filter>'fa' in output_choice</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="metapeptide_dbs" ftype="tabular" 
+                   value="testdb_metagene.metapeptides.tsv,testdb_nometagene.metapeptides.tsv"/>
+            <output name="output_db">
+                <assert_contents>
+                    <has_text text="DLRILLRERLVAGDSDEAAVDFIVDR" />
+                    <has_text text="YHNFEGYRWR" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+usage: sixgill_merge [-h] --out OUT [--debug]
+                     metapeptidedbfiles [metapeptidedbfiles ...]
+
+Merge multiple metapeptide database files into a single metapeptide database.
+Optionally, filter simultaneously.
+
+positional arguments:
+  metapeptidedbfiles  input metapeptide database files
+
+optional arguments:
+  -h, --help          show this help message and exit
+  --out OUT           output file
+  --debug             Enable debug logging
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r cf8eee8343fb test-data/metagene_nometagene_merged.metapeptides.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metagene_nometagene_merged.metapeptides.tsv Thu Oct 13 08:38:04 2016 -0400
b
@@ -0,0 +1,51 @@
+sequence length min_qualscore partial_orf_length metagene_score read_ids
+TTLIERVVDECSLKK 15 36 96 7.73277 C57KNANXX:5:1101:10000:79229/1,C57KNANXX:5:1101:10000:79229/1
+DLRILLRERLVAGDSDEAAVDFIVDR 26 31 96 6.45153 C57KNANXX:5:1101:10001:25583/1,C57KNANXX:5:1101:10001:25583/1
+KNNSSCSFCGKKR 13 34 54 -1.0 C57KNANXX:5:1101:10000:48824/1
+AATPVGATTIFLSEHSSTTLSIR 23 36 96 -1.0 C57KNANXX:5:1101:10000:79229/1
+RITTLKPFSCNNFTVK 16 38 96 -1.0 C57KNANXX:5:1101:10000:23019/1
+FHLNYQKFFFQKHLPPLIIK 20 34 96 -1.0 C57KNANXX:5:1101:10000:91317/1
+HASIHQFGIVGCNIIWAKPK 20 32 96 -1.0 C57KNANXX:5:1101:10000:60887/1
+NLSYQSNTELKSNLNFQLVCEIRILIK 27 34 96 -1.0 C57KNANXX:5:1101:10000:46059/1
+ISPKLSKVLFSKTSATFNNKADNSSSICFR 30 34 96 -1.0 C57KNANXX:5:1101:10000:91317/1
+LLQEKGFNVVILFSGKEHK 19 38 96 4.26594 C57KNANXX:5:1101:10000:23019/1,C57KNANXX:5:1101:10000:23019/1
+SIPQISTGDMLRAAVK 16 36 93 3.85469 C57KNANXX:5:1101:10000:12567/1,C57KNANXX:5:1101:10000:12567/1
+CSKRSPCSHWYYRR 14 35 96 -1.0 C57KNANXX:5:1101:10000:9540/1
+SVQFEPGVTRFR 12 37 96 -1.0 C57KNANXX:5:1101:10000:58282/1
+VLRYDEGIDSLISIGQSCYGK 21 37 69 2.77044 C57KNANXX:5:1101:10000:99203/1
+HWRVICVFCCANVWLLGTVTKRR 23 35 96 -1.0 C57KNANXX:5:1101:10001:25583/1
+QHPFLQTNHCPLVFVSPVLQIILGSMR 27 38 96 -1.0 C57KNANXX:5:1101:10000:76185/1
+LSAKQVKQILMDSGLSVK 18 38 96 5.18638 C57KNANXX:5:1101:10000:76185/1,C57KNANXX:5:1101:10000:76185/1
+SSDLILPKIICKTGETNTNGQWFVCK 26 38 93 -1.0 C57KNANXX:5:1101:10000:76185/1
+DIANIISYGVSTSNSCIYRISSNNNR 26 34 93 -1.0 C57KNANXX:5:1101:10000:3140/1
+LIMFLTNNSSIQEVLFFPQMRPEVVQKK 28 34 96 5.30784 C57KNANXX:5:1101:10000:92875/1,C57KNANXX:5:1101:10000:92875/1
+QIRHLACEPDVVSSQR 16 36 96 -1.0 C57KNANXX:5:1101:10000:29932/1
+YHNFEGYRWR 10 34 93 -1.0 C57KNANXX:5:1101:10000:68440/1
+RTAGWIRHQAHRPSALFAVGKNQR 24 36 93 -1.0 C57KNANXX:5:1101:10000:29932/1
+ACFAGKDIVVPGVANKLTAIITHFFSK 27 33 96 6.29703 C57KNANXX:5:1101:10001:17691/1,C57KNANXX:5:1101:10001:17691/1
+ELFFGKIGRAVQQECRDR 18 38 96 6.34832 C57KNANXX:5:1101:10000:24094/1,C57KNANXX:5:1101:10000:24094/1
+SPEEIRELSKFKKPIFYDGKFFDIK 25 38 96 13.1057 C57KNANXX:5:1101:10000:84210/1,C57KNANXX:5:1101:10000:84210/1
+SSDLDQAAQQFIVPRKGVLELAR 23 35 96 4.65375 C57KNANXX:5:1101:10000:86801/1,C57KNANXX:5:1101:10000:86801/1
+VNGVVITVSPPTITLKVVISKPR 23 34 93 5.75404 C57KNANXX:5:1101:10000:68440/1,C57KNANXX:5:1101:10000:68440/1
+DFTPKRQSYWATFR 14 38 96 6.38445 C57KNANXX:5:1101:10001:17914/1,C57KNANXX:5:1101:10001:17914/1
+LRERIVFWQDRK 12 38 96 -1.0 C57KNANXX:5:1101:10000:24094/1
+KCVIIAVSLLATPGTTMSFPAK 22 33 96 -1.0 C57KNANXX:5:1101:10001:17691/1
+NSLKFSILQIYIIYIYITIK 20 37 96 -1.0 C57KNANXX:5:1101:10000:29175/1
+RASSNTPLRGTINCCAAWSRSEERFSR 27 35 96 -1.0 C57KNANXX:5:1101:10000:86801/1
+FAPYVTRSMLLRAYRRRDRRHVER 24 31 96 -1.0 C57KNANXX:5:1101:10000:73874/1
+AKVNNAVVDFQFFDR 15 36 96 10.6388 C57KNANXX:5:1101:10001:15683/1,C57KNANXX:5:1101:10001:15683/1
+LTGQVPYLPWAKIK 14 36 96 5.61044 C57KNANXX:5:1101:10000:29932/1,C57KNANXX:5:1101:10000:29932/1
+LGLGPDDVTSDNPKLVYGR 19 32 96 2.81286 C57KNANXX:5:1101:10000:60887/1,C57KNANXX:5:1101:10000:60887/1
+SVPAFTAARSISPVEIWGIDRK 22 31 90 -1.0 C57KNANXX:5:1101:10000:12567/1
+NIIKKMDLVLTGEGSFDVQSTYGK 24 34 93 8.76308 C57KNANXX:5:1101:10000:99902/1,C57KNANXX:5:1101:10000:99902/1
+LRILLLSVNLTGRLLETNPKHSK 23 36 93 -1.0 C57KNANXX:5:1101:10001:17914/1
+SQHPAAMTHSGVPKDRRAVIGITEGLIR 28 35 96 -1.0 C57KNANXX:5:1101:10000:9540/1
+KIIQVVHSVEKK 12 34 51 -1.0 C57KNANXX:5:1101:10000:48824/1
+QIDEELSALLLKVADVFEKR 20 36 93 13.3198 C57KNANXX:5:1101:10000:91317/1,C57KNANXX:5:1101:10000:91317/1
+IIELEHQILKVEGSIMELEKTIVDK 25 36 96 19.4442 C57KNANXX:5:1101:10000:73724/1,C57KNANXX:5:1101:10000:73724/1
+SNFVQSKLNYLYLHR 15 37 93 -1.0 C57KNANXX:5:1101:10000:99203/1
+NANKNVIFFMILILYKNNK 19 38 66 4.82132 C57KNANXX:5:1101:10000:61940/1
+VLEIYPTNIIYFLIVK 16 36 60 5.37246 C57KNANXX:5:1101:10000:33905/1
+KFDEITLDVYYKKGKESSK 19 33 96 22.2262 C57KNANXX:5:1101:10001:101410/1,C57KNANXX:5:1101:10001:101410/1
+DVIECRAPQGVCSHYMLVFLRMEPKFCKEV 30 38 93 0.240116 C57KNANXX:5:1101:10000:19758/1
+QETIALENEIELLQAQIEQIGRAVQQECR 29 35 96 3.85699 C57KNANXX:5:1101:10000:58282/1,C57KNANXX:5:1101:10000:58282/1
b
diff -r 000000000000 -r cf8eee8343fb test-data/metagene_nometagene_merged.min2reads.metapeptides.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metagene_nometagene_merged.min2reads.metapeptides.tsv Thu Oct 13 08:38:04 2016 -0400
b
@@ -0,0 +1,21 @@
+sequence length min_qualscore partial_orf_length metagene_score read_ids
+TTLIERVVDECSLKK 15 36 96 7.73277 C57KNANXX:5:1101:10000:79229/1,C57KNANXX:5:1101:10000:79229/1
+DLRILLRERLVAGDSDEAAVDFIVDR 26 31 96 6.45153 C57KNANXX:5:1101:10001:25583/1,C57KNANXX:5:1101:10001:25583/1
+LLQEKGFNVVILFSGKEHK 19 38 96 4.26594 C57KNANXX:5:1101:10000:23019/1,C57KNANXX:5:1101:10000:23019/1
+SIPQISTGDMLRAAVK 16 36 93 3.85469 C57KNANXX:5:1101:10000:12567/1,C57KNANXX:5:1101:10000:12567/1
+LSAKQVKQILMDSGLSVK 18 38 96 5.18638 C57KNANXX:5:1101:10000:76185/1,C57KNANXX:5:1101:10000:76185/1
+LIMFLTNNSSIQEVLFFPQMRPEVVQKK 28 34 96 5.30784 C57KNANXX:5:1101:10000:92875/1,C57KNANXX:5:1101:10000:92875/1
+ACFAGKDIVVPGVANKLTAIITHFFSK 27 33 96 6.29703 C57KNANXX:5:1101:10001:17691/1,C57KNANXX:5:1101:10001:17691/1
+ELFFGKIGRAVQQECRDR 18 38 96 6.34832 C57KNANXX:5:1101:10000:24094/1,C57KNANXX:5:1101:10000:24094/1
+SPEEIRELSKFKKPIFYDGKFFDIK 25 38 96 13.1057 C57KNANXX:5:1101:10000:84210/1,C57KNANXX:5:1101:10000:84210/1
+SSDLDQAAQQFIVPRKGVLELAR 23 35 96 4.65375 C57KNANXX:5:1101:10000:86801/1,C57KNANXX:5:1101:10000:86801/1
+VNGVVITVSPPTITLKVVISKPR 23 34 93 5.75404 C57KNANXX:5:1101:10000:68440/1,C57KNANXX:5:1101:10000:68440/1
+DFTPKRQSYWATFR 14 38 96 6.38445 C57KNANXX:5:1101:10001:17914/1,C57KNANXX:5:1101:10001:17914/1
+AKVNNAVVDFQFFDR 15 36 96 10.6388 C57KNANXX:5:1101:10001:15683/1,C57KNANXX:5:1101:10001:15683/1
+LTGQVPYLPWAKIK 14 36 96 5.61044 C57KNANXX:5:1101:10000:29932/1,C57KNANXX:5:1101:10000:29932/1
+LGLGPDDVTSDNPKLVYGR 19 32 96 2.81286 C57KNANXX:5:1101:10000:60887/1,C57KNANXX:5:1101:10000:60887/1
+NIIKKMDLVLTGEGSFDVQSTYGK 24 34 93 8.76308 C57KNANXX:5:1101:10000:99902/1,C57KNANXX:5:1101:10000:99902/1
+QIDEELSALLLKVADVFEKR 20 36 93 13.3198 C57KNANXX:5:1101:10000:91317/1,C57KNANXX:5:1101:10000:91317/1
+IIELEHQILKVEGSIMELEKTIVDK 25 36 96 19.4442 C57KNANXX:5:1101:10000:73724/1,C57KNANXX:5:1101:10000:73724/1
+KFDEITLDVYYKKGKESSK 19 33 96 22.2262 C57KNANXX:5:1101:10001:101410/1,C57KNANXX:5:1101:10001:101410/1
+QETIALENEIELLQAQIEQIGRAVQQECR 29 35 96 3.85699 C57KNANXX:5:1101:10000:58282/1,C57KNANXX:5:1101:10000:58282/1
b
diff -r 000000000000 -r cf8eee8343fb test-data/metagene_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metagene_output.txt Thu Oct 13 08:38:04 2016 -0400
b
@@ -0,0 +1,168 @@
+# C57KNANXX:5:1101:10000:12567/1
+# gc = 0.553191, rbs = -1
+# self: -
+gene_1 1 94 - 0 00 3.85469 p - - -
+# C57KNANXX:5:1101:10000:19758/1
+# gc = 0.402062, rbs = -1
+# self: -
+gene_1 1 97 - 1 01 0.240116 b - - -
+# C57KNANXX:5:1101:10000:23019/1
+# gc = 0.329897, rbs = -1
+# self: -
+gene_1 1 97 + 1 00 4.26594 b - - -
+# C57KNANXX:5:1101:10000:24094/1
+# gc = 0.474227, rbs = -1
+# self: -
+gene_1 1 97 + 1 00 6.34832 p - - -
+# C57KNANXX:5:1101:10000:25279/1
+# gc = 0.360825, rbs = -1
+# self: -
+gene_1 1 97 + 1 00 16.3044 p - - -
+# C57KNANXX:5:1101:10000:28635/1
+# gc = 0.278351, rbs = -1
+# self: -
+gene_1 1 97 + 2 00 4.69095 a - - -
+# C57KNANXX:5:1101:10000:29175/1
+# gc = 0.154639, rbs = -1
+# self: -
+# C57KNANXX:5:1101:10000:29932/1
+# gc = 0.536082, rbs = -1
+# self: -
+gene_1 1 97 + 0 10 5.61044 p - - -
+# C57KNANXX:5:1101:10000:3140/1
+# gc = 0.443299, rbs = -1
+# self: -
+gene_1 1 97 + 2 00 8.08669 b - - -
+# C57KNANXX:5:1101:10000:32777/1
+# gc = 0.484536, rbs = -1
+# self: -
+gene_1 1 97 - 0 00 6.4402 b - - -
+# C57KNANXX:5:1101:10000:33905/1
+# gc = 0.216495, rbs = -1
+# self: -
+gene_1 34 97 - 1 01 5.37246 b - - -
+# C57KNANXX:5:1101:10000:34777/1
+# gc = 0.195876, rbs = -1
+# self: -
+# C57KNANXX:5:1101:10000:46059/1
+# gc = 0.257732, rbs = -1
+# self: -
+# C57KNANXX:5:1101:10000:47265/1
+# gc = 0.564516, rbs = -1
+# self: -
+# C57KNANXX:5:1101:10000:54860/1
+# gc = 0.257732, rbs = -1
+# self: -
+gene_1 1 97 + 2 00 10.6181 b - - -
+# C57KNANXX:5:1101:10000:58282/1
+# gc = 0.515464, rbs = -1
+# self: -
+gene_1 1 97 + 0 00 3.85699 b - - -
+# C57KNANXX:5:1101:10000:60887/1
+# gc = 0.56701, rbs = -1
+# self: -
+gene_1 1 97 - 1 00 2.81286 p - - -
+# C57KNANXX:5:1101:10000:61940/1
+# gc = 0.134021, rbs = -1
+# self: -
+gene_1 1 70 + 1 01 4.82132 p - - -
+# C57KNANXX:5:1101:10000:68440/1
+# gc = 0.453608, rbs = -1
+# self: -
+gene_1 1 97 + 2 00 5.75404 a - - -
+# C57KNANXX:5:1101:10000:73724/1
+# gc = 0.237113, rbs = -1
+# self: -
+gene_1 1 97 + 1 00 19.4442 b - - -
+# C57KNANXX:5:1101:10000:73874/1
+# gc = 0.556701, rbs = -1
+# self: -
+gene_1 1 97 + 1 00 4.20317 b - - -
+# C57KNANXX:5:1101:10000:76185/1
+# gc = 0.371134, rbs = -1
+# self: -
+gene_1 1 97 - 0 00 5.18638 a - - -
+# C57KNANXX:5:1101:10000:76393/1
+# gc = 0.28866, rbs = -1
+# self: -
+gene_1 1 71 - 0 10 17.9709 a - - -
+# C57KNANXX:5:1101:10000:79229/1
+# gc = 0.412371, rbs = -1
+# self: -
+gene_1 1 97 + 1 00 7.73277 a - - -
+# C57KNANXX:5:1101:10000:84210/1
+# gc = 0.257732, rbs = -1
+# self: -
+gene_1 1 97 - 0 00 13.1057 p - - -
+# C57KNANXX:5:1101:10000:85460/1
+# gc = 0.329897, rbs = -1
+# self: -
+# C57KNANXX:5:1101:10000:86801/1
+# gc = 0.525773, rbs = -1
+# self: -
+gene_1 1 97 - 1 00 4.65375 b - - -
+# C57KNANXX:5:1101:10000:88288/1
+# gc = 0.329897, rbs = -1
+# self: -
+gene_1 1 97 + 0 00 15.2122 p - - -
+# C57KNANXX:5:1101:10000:90246/1
+# gc = 0.278351, rbs = -1
+# self: -
+# C57KNANXX:5:1101:10000:91317/1
+# gc = 0.268041, rbs = -1
+# self: -
+gene_1 1 97 - 2 00 13.3198 b - - -
+# C57KNANXX:5:1101:10000:92875/1
+# gc = 0.309278, rbs = -1
+# self: -
+gene_1 1 97 - 0 10 5.30784 b - - -
+# C57KNANXX:5:1101:10000:9540/1
+# gc = 0.453608, rbs = -1
+# self: -
+# C57KNANXX:5:1101:10000:99203/1
+# gc = 0.381443, rbs = -1
+# self: -
+gene_1 29 97 + 0 10 2.77044 b 20 25 -1.94891
+# C57KNANXX:5:1101:10000:99902/1
+# gc = 0.28866, rbs = -1
+# self: -
+gene_1 1 97 - 2 00 8.76308 a - - -
+# C57KNANXX:5:1101:10001:100058/1
+# gc = 0.22619, rbs = -1
+# self: -
+gene_1 3 84 + 0 10 5.19281 b - - -
+# C57KNANXX:5:1101:10001:101410/1
+# gc = 0.28866, rbs = -1
+# self: -
+gene_1 1 97 + 0 00 22.2262 p - - -
+# C57KNANXX:5:1101:10001:15683/1
+# gc = 0.381443, rbs = -1
+# self: -
+gene_1 1 97 + 1 00 10.6388 b - - -
+# C57KNANXX:5:1101:10001:17691/1
+# gc = 0.381443, rbs = -1
+# self: -
+gene_1 1 97 + 1 00 6.29703 a - - -
+# C57KNANXX:5:1101:10001:17914/1
+# gc = 0.42268, rbs = -1
+# self: -
+gene_1 1 97 + 1 00 6.38445 p - - -
+# C57KNANXX:5:1101:10001:18762/1
+# gc = 0.43299, rbs = -1
+# self: -
+gene_1 1 97 - 2 00 3.80632 b - - -
+# C57KNANXX:5:1101:10001:19988/1
+# gc = 0.350515, rbs = -1
+# self: -
+gene_1 1 97 - 0 00 14.6087 a - - -
+# C57KNANXX:5:1101:10001:20542/1
+# gc = 0.43299, rbs = -1
+# self: -
+# C57KNANXX:5:1101:10001:25583/1
+# gc = 0.546392, rbs = -1
+# self: -
+gene_1 1 97 - 0 00 6.45153 b - - -
+# C57KNANXX:5:1101:10001:26391/1
+# gc = 0.463918, rbs = -1
+# self: -
+gene_1 1 97 - 2 00 9.77924 p - - -
b
diff -r 000000000000 -r cf8eee8343fb test-data/small.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/small.fq Thu Oct 13 08:38:04 2016 -0400
b
b'@@ -0,0 +1,200 @@\n+@C57KNANXX:5:1101:10000:12567/1\n+TCTTGCCTAAGTCAGTGCCCGCCTTTACGGCAGCACGGAGCATATCGCCCGTAGAAATCTGGGGGATAGATCGGAAGAGCGGTTCAGCAGGAAT\n++\n+CCGGGGGGGDGGG@GGGGGGGGGGGGEGGGGGGGGGFGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGFGGG\n+@C57KNANXX:5:1101:10000:19758/1\n+CTAAACTTCTTTACAAAACTTAGGTTCCATTCTTAAAAACACCAGCATATAATGGCTACAGACTCCCTGTGGTGCTCGACATTCGATAACGTCCTTG\n++\n+CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+@C57KNANXX:5:1101:10000:23019/1\n+CAACCACACACTTTTAACGGTGAAATTATTACAAGAAAAGGGTTTTAATGTCGTTATTCTTTTTTCTGGGAAGGAGCATAAAACGACAGAAGAAATT\n++\n+CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+@C57KNANXX:5:1101:10000:24094/1\n+GGCTATTTTTGGTGCAGACTCAGAGAAAGAATTGTTTTTTGGCAAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGTGCAGTGAATCTCGTA\n++\n+CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=;DGGGG\n+@C57KNANXX:5:1101:10000:25279/1\n+AATCTTATTTGGTCCTCCTGGAGCAGGTAAAGGAACACAAGCTCAACACATTGTAAAAGATTATGACTATTTTCAAATATCAACTGGCGATATGTTG\n++\n+CCGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGEGGGGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+@C57KNANXX:5:1101:10000:28635/1\n+AATTAAAGTCATCAGTACCAAAAGATTTTGAATCTACAAGCTATAAAGATTCAAAATCTGATACCCTACATTATTGGTTTAAACCTTTCGTAGAAAA\n++\n+CCFFGGGGGGGGFFGGGGGGGGGGG=FGGGGGGGGGGGCGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGEGGGGGGGGBGGFEGGGGGGGG\n+@C57KNANXX:5:1101:10000:29175/1\n+ACAAATATAAGAATTTAATTGTAATATAAATATAAATAATGTATATTTGTAATATGGAAAATTTTAAAGAATTCTTAGAGGAGTTAGAAGATAATAA\n++\n+CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGDGGGGEGCGGGGGGGG\n+@C57KNANXX:5:1101:10000:29932/1\n+GTGATTGGGGATTACACCGAAGAACCGCTGGCTGGATACGACATCAGGCTCACAGGCCAAGTGCCTTATTTGCCGTGGGCAAAAATCAAAGGCACCC\n++\n+CCGFGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEFGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+@C57KNANXX:5:1101:10000:3140/1\n+CTACACTCACGGTTGTTGTTGGAGGAGATACGGTAGATACAGCTATTGGAGGTACTTACTCCGTAACTTATAATGTTAGCGATGTCTCTGGCAATGC\n++\n+CCGGCGGGGGFGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGCGGG\n+@C57KNANXX:5:1101:10000:32777/1\n+CAAATACCCCCGCGGGTCCTAGAACCAGTATTTCGAAAGCACGGGTATAGCGTTTATGTTCACCGGCAAAGCGTATTTTTACCTGACGAATCGCTTG\n++\n+CCGGGGGGGGGGGGGGGGGGGGGGGGGGG>FGGFGGGGGGGGGBACDGGGGGGGGGGGGGGGGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGB\n+@C57KNANXX:5:1101:10000:33905/1\n+GTTAGAAAGTAAACCAAACATAGGAACAACTTTTTATTTTACAATTAAAAAATAAATAATATTAGTAGGATATATTTCTAATACTCTAGTAACAAGC\n++\n+CCGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGEGGGGFGGGGGGD\n+@C57KNANXX:5:1101:10000:34777/1\n+ATATTAAATCATTAAGATATTCGTCGGAGCTAAAAAAATATAATTGACAAATATTAGTTAAGTAAATTTTCTTGCTCATTTTAAAAAGTTAAACTAA\n++\n+CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+@C57KNANXX:5:1101:10000:46059/1\n+CTTTTTAATTAAGATTCTGATTTCGCAAACAAGCTGAAAATTCAGATTTGATTTTAATTCAGTATTTGACTGATAACTCAGATTTCTATTTAGATTC\n++\n+CCGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGGE>FFF\n+@C57KNANXX:5:1101:10000:47265/1\n+TTATCTAGAGTGCCCCCGGTATGGCCTAAGCCACGGCCAGAAATCATCGGCACATATGCCCC\n++\n+CCGGGGGGGGGGGGGGFG<EGGGEGGGGGGGGGGDGGGGGGGFGGGGGGGGGGEGGGGG>GG\n+@C57KNANXX:5:1101:10000:48824/1\n+AGATACATCATCTCTTTTTTTTCCACAGAATGAACAACTTGAATTATTTTTTTTA\n++\n+CBGCDFGGGGGFGFGGGGGGGGCGEEGGGGGGGGGGGEGGGEGGGFGGGGGGGG@\n+@C57KNANXX:5:1101:10000:54860/1\n+CTATTTTTACTTTTTCGGTTTTTAGTGACGTATCAAAAGATTTAATGCAACTTAGAATGATTAAATCTGCTGAAGAAATAGAAATCATTAAAAATGG\n++\n+CCGGGFGGGGGGGGGGGGBGGGGGGGG>GGGGGGGFGGEGGGGGGGGGGGGGGFEGGGG>GG@GGGGGGGCDGFGGGEGGGFGGGGFGGGGEGGGGG\n+@C57KNANXX:5:1101:10000:58282/1\n+AGGCAAGAGACGATTGCCCTGGAAAACGAAATCGAGTTACTCCAGGCTCAAATTGAACAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGTG\n++\n+CCGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+@C57KNANXX:5:1101:10000:60158/1\n+TATGACAGAGGCAGTCCCAAAACCAAGAACGGTCCTGAAGCTGGTGT\n++\n+BBGGGGGGGGDGGGGGGGEGGGGGGBGBEFCEFGGGGGGGGGGGC@<\n+@C57KNANXX:5:1101:10000:60887'..b'GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGG\n+@C57KNANXX:5:1101:10000:88288/1\n+ATTATCTCAGGCGATTTGACTATAACAGGTGACGCCACAGAACTTCAAACAACTAATACTGCAATTACAGATAATGTTATTGTATTAAACAAAGAGA\n++\n+BBGGGGGGGGGFGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGG>FGGGGGGGGGGGGGGGGGGGEGGGGG>FBDGGGGGGGGGGGGGGGEG>FG\n+@C57KNANXX:5:1101:10000:90246/1\n+CTACCCTAAATTAAAGAGCGGCTTGTAAATAAAAAATCCCTACTAAAAGCAAGGATTGTATCTTAAAGGGTTATTATTTTATTAAAAACAAACTTCA\n++\n+CCGGGGGGGCEGEGGGGGGGGGGGGGEGGCEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGCGEG>GCFGGG@FGDGGGGGGG\n+@C57KNANXX:5:1101:10000:91317/1\n+AAAATTTCACCTAAATTATCAAAAGTTCTTTTTTCAAAAACATCTGCCACCTTTAATAATAAAGCAGATAATTCTTCATCTATTTGCTTCAGGTCAT\n++\n+CCEGEGGGFGGEGEGGDCGEGGGEEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGFGEGGEFGGGGGGGGGGGGGGFGGG\n+@C57KNANXX:5:1101:10000:92875/1\n+CAATTTTCTTTTGAACTACCTCAGGTCTCATTTGAGGAAAAAACAATACTTCTTGGATGGATGAATTGTTCGTTAAGAACATAATTAATCTATCCAT\n++\n+CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGFGCDFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+@C57KNANXX:5:1101:10000:9540/1\n+AACTGATATTCTGATTAAACCTTCGGTAATACCAATGACTGCACGGCGATCTTTTGGAACACCTGAATGTGTCATTGCTGCTGGGTGCTGAGATCGG\n++\n+CCDGGFGGGGGGGGGGGGGGGGGGGFGGGEDGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+@C57KNANXX:5:1101:10000:99203/1\n+CGTCGATGAGTAGAGTTGTGGGTAAAAAGTGTTACGTTACGATGAAGGTATAGATAGTTTAATTTCGATTGGACAAAGTTGCTACGGTAAATCCAGA\n++\n+CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+@C57KNANXX:5:1101:10000:99902/1\n+ATTTTATAGGGTAATTTCCCATAAGTACTTTGAACGTCAAAACTTCCTTCTCCTGTAAGAACTAGATCCATTTTTTTTATAATGTTTTTTAGGTTTG\n++\n+CBF>FEGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCFGGGGGGGGGEGGGGGGGGEGGGGGGGGGGGGGGGBF>GFG8\n+@C57KNANXX:5:1101:10001:100058/1\n+AAGTGTTAACTAAAAATTTTAATAAAAGAACAGTTAATCAAGTTGTTGATCCAAAAATTTTAAAATTTTGGAGAAAGCAACTGT\n++\n+CCGGGGGGGGGGGG>GGGCGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGEDEFGGCFDGGGEGEFGGGFGGGGGGGGGGG\n+@C57KNANXX:5:1101:10001:101410/1\n+GGTGAAAAGAAATTTGATGAAATTACTTTAGATGTTTATTATAAAAAAGGGAAAGAATCCTCTAAATTATATGATGATGCACATGAGATCGGAAGAG\n++\n+BBGEDGGCGEGGGGGGGGGGGGGGFGGGGGGGGGGEGGEGGGGGGGGDGBBGGGGGGGGGGGGGGGGGGGGGGGCGC@GGGGGGGGGGGGGGGGGGG\n+@C57KNANXX:5:1101:10001:15683/1\n+ATTAGAACTAGAATTAATTCAGGCGGCCTCAACAGCTGCAAAAGCGAAGGTTAATAACGCCGTTGTAGATTTTCAATTTTTTGATCGACTCGATCAA\n++\n+CCGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+@C57KNANXX:5:1101:10001:17691/1\n+AATTGGTATAAAAGCCTGTTTTGCAGGGAAGGACATTGTAGTTCCCGGTGTTGCCAATAAACTTACAGCAATAATTACGCACTTCTTTTCAAAGACA\n++\n+CCGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFBGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGGGGGGGGDGGGGG\n+@C57KNANXX:5:1101:10001:17914/1\n+TAGCCTTGAAATTAAGGATTTTACTCCTAAGCGTCAATCTTACTGGGCGACTTTTAGAGACCAACCCCAAGCATTCGAAATTCTGGTGCTCAACAAG\n++\n+CCEGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+@C57KNANXX:5:1101:10001:18762/1\n+AAAGCCGTTGCAGAAGCTGTAATATAAGCCCCAATACCAAAAAATGCCTGTTGTCCAAAAGAGACCTCCCCAACCACTAGCACCAAATAAGCAGAAA\n++\n+CBFGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGG@BG>GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+@C57KNANXX:5:1101:10001:19988/1\n+TTCTCACCATGTCAAATTGAGCTTCTGGATAGTTTATAACACCAGATGTGTTTAAGGTATGAGCTATTGGAGTGTAGTCTAAATACTCATAAACTTC\n++\n+BBGGGGGGGGGGFGFGGGGGGGCD>FGGGGGGEEGFGGDGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGGGGGGGGFGGGGFGGGGGGGGG\n+@C57KNANXX:5:1101:10001:20542/1\n+GCAATAGTCATCTCATCTATCAATACAGAACCAATGTATTGAGAACCATTTTTGTAAGAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGTG\n++\n+CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+@C57KNANXX:5:1101:10001:25583/1\n+CGTAGCGATCAACGATAAAATCCACCGCCGCTTCGTCACTGTCCCCAGCAACCAAACGTTCGCGCAACAGAATACGCAAATCACGCGCCAATGACGC\n++\n+CCGGGGD@GGGGGGBGGBGGGGGGGGGGGGGGGGGGGGFDGGGEGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG\n+@C57KNANXX:5:1101:10001:26391/1\n+AGAACAATACCTTCCGCTCCTAATCCCATAACGTCATCAGCGGTTAATGCATCATTCCGATGAACAACAACTTCTGTCCCCAGCTCGCCAATGTAGT\n++\n+CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG<GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFFGGGG\n'
b
diff -r 000000000000 -r cf8eee8343fb test-data/testdb_metagene.metapeptides.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/testdb_metagene.metapeptides.fasta Thu Oct 13 08:38:04 2016 -0400
b
@@ -0,0 +1,48 @@
+>TTLIERVVDECSLKK
+TTLIERVVDECSLKK
+>DLRILLRERLVAGDSDEAAVDFIVDR
+DLRILLRERLVAGDSDEAAVDFIVDR
+>VLRYDEGIDSLISIGQSCYGK
+VLRYDEGIDSLISIGQSCYGK
+>QIDEELSALLLKVADVFEKR
+QIDEELSALLLKVADVFEKR
+>LLQEKGFNVVILFSGKEHK
+LLQEKGFNVVILFSGKEHK
+>SIPQISTGDMLRAAVK
+SIPQISTGDMLRAAVK
+>SPEEIRELSKFKKPIFYDGKFFDIK
+SPEEIRELSKFKKPIFYDGKFFDIK
+>LSAKQVKQILMDSGLSVK
+LSAKQVKQILMDSGLSVK
+>LIMFLTNNSSIQEVLFFPQMRPEVVQKK
+LIMFLTNNSSIQEVLFFPQMRPEVVQKK
+>ACFAGKDIVVPGVANKLTAIITHFFSK
+ACFAGKDIVVPGVANKLTAIITHFFSK
+>ELFFGKIGRAVQQECRDR
+ELFFGKIGRAVQQECRDR
+>SSDLDQAAQQFIVPRKGVLELAR
+SSDLDQAAQQFIVPRKGVLELAR
+>VNGVVITVSPPTITLKVVISKPR
+VNGVVITVSPPTITLKVVISKPR
+>DFTPKRQSYWATFR
+DFTPKRQSYWATFR
+>AKVNNAVVDFQFFDR
+AKVNNAVVDFQFFDR
+>LTGQVPYLPWAKIK
+LTGQVPYLPWAKIK
+>LGLGPDDVTSDNPKLVYGR
+LGLGPDDVTSDNPKLVYGR
+>NIIKKMDLVLTGEGSFDVQSTYGK
+NIIKKMDLVLTGEGSFDVQSTYGK
+>IIELEHQILKVEGSIMELEKTIVDK
+IIELEHQILKVEGSIMELEKTIVDK
+>NANKNVIFFMILILYKNNK
+NANKNVIFFMILILYKNNK
+>VLEIYPTNIIYFLIVK
+VLEIYPTNIIYFLIVK
+>KFDEITLDVYYKKGKESSK
+KFDEITLDVYYKKGKESSK
+>DVIECRAPQGVCSHYMLVFLRMEPKFCKEV
+DVIECRAPQGVCSHYMLVFLRMEPKFCKEV
+>QETIALENEIELLQAQIEQIGRAVQQECR
+QETIALENEIELLQAQIEQIGRAVQQECR
b
diff -r 000000000000 -r cf8eee8343fb test-data/testdb_metagene.metapeptides.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/testdb_metagene.metapeptides.tsv Thu Oct 13 08:38:04 2016 -0400
b
@@ -0,0 +1,25 @@
+sequence length min_qualscore partial_orf_length metagene_score read_ids
+TTLIERVVDECSLKK 15 36 96 7.73277 C57KNANXX:5:1101:10000:79229/1
+DLRILLRERLVAGDSDEAAVDFIVDR 26 31 96 6.45153 C57KNANXX:5:1101:10001:25583/1
+VLRYDEGIDSLISIGQSCYGK 21 37 69 2.77044 C57KNANXX:5:1101:10000:99203/1
+QIDEELSALLLKVADVFEKR 20 36 93 13.3198 C57KNANXX:5:1101:10000:91317/1
+LLQEKGFNVVILFSGKEHK 19 38 96 4.26594 C57KNANXX:5:1101:10000:23019/1
+SIPQISTGDMLRAAVK 16 36 93 3.85469 C57KNANXX:5:1101:10000:12567/1
+SPEEIRELSKFKKPIFYDGKFFDIK 25 38 96 13.1057 C57KNANXX:5:1101:10000:84210/1
+LSAKQVKQILMDSGLSVK 18 38 96 5.18638 C57KNANXX:5:1101:10000:76185/1
+LIMFLTNNSSIQEVLFFPQMRPEVVQKK 28 34 96 5.30784 C57KNANXX:5:1101:10000:92875/1
+ACFAGKDIVVPGVANKLTAIITHFFSK 27 33 96 6.29703 C57KNANXX:5:1101:10001:17691/1
+ELFFGKIGRAVQQECRDR 18 38 96 6.34832 C57KNANXX:5:1101:10000:24094/1
+SSDLDQAAQQFIVPRKGVLELAR 23 35 96 4.65375 C57KNANXX:5:1101:10000:86801/1
+VNGVVITVSPPTITLKVVISKPR 23 34 93 5.75404 C57KNANXX:5:1101:10000:68440/1
+DFTPKRQSYWATFR 14 38 96 6.38445 C57KNANXX:5:1101:10001:17914/1
+AKVNNAVVDFQFFDR 15 36 96 10.6388 C57KNANXX:5:1101:10001:15683/1
+LTGQVPYLPWAKIK 14 36 96 5.61044 C57KNANXX:5:1101:10000:29932/1
+LGLGPDDVTSDNPKLVYGR 19 32 96 2.81286 C57KNANXX:5:1101:10000:60887/1
+NIIKKMDLVLTGEGSFDVQSTYGK 24 34 93 8.76308 C57KNANXX:5:1101:10000:99902/1
+IIELEHQILKVEGSIMELEKTIVDK 25 36 96 19.4442 C57KNANXX:5:1101:10000:73724/1
+NANKNVIFFMILILYKNNK 19 38 66 4.82132 C57KNANXX:5:1101:10000:61940/1
+VLEIYPTNIIYFLIVK 16 36 60 5.37246 C57KNANXX:5:1101:10000:33905/1
+KFDEITLDVYYKKGKESSK 19 33 96 22.2262 C57KNANXX:5:1101:10001:101410/1
+DVIECRAPQGVCSHYMLVFLRMEPKFCKEV 30 38 93 0.240116 C57KNANXX:5:1101:10000:19758/1
+QETIALENEIELLQAQIEQIGRAVQQECR 29 35 96 3.85699 C57KNANXX:5:1101:10000:58282/1
b
diff -r 000000000000 -r cf8eee8343fb test-data/testdb_nometagene.metapeptides.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/testdb_nometagene.metapeptides.tsv Thu Oct 13 08:38:04 2016 -0400
b
@@ -0,0 +1,47 @@
+sequence length min_qualscore partial_orf_length metagene_score read_ids
+TTLIERVVDECSLKK 15 36 96 -1.0 C57KNANXX:5:1101:10000:79229/1
+DLRILLRERLVAGDSDEAAVDFIVDR 26 31 96 -1.0 C57KNANXX:5:1101:10001:25583/1
+KNNSSCSFCGKKR 13 34 54 -1.0 C57KNANXX:5:1101:10000:48824/1
+AATPVGATTIFLSEHSSTTLSIR 23 36 96 -1.0 C57KNANXX:5:1101:10000:79229/1
+RITTLKPFSCNNFTVK 16 38 96 -1.0 C57KNANXX:5:1101:10000:23019/1
+FHLNYQKFFFQKHLPPLIIK 20 34 96 -1.0 C57KNANXX:5:1101:10000:91317/1
+HASIHQFGIVGCNIIWAKPK 20 32 96 -1.0 C57KNANXX:5:1101:10000:60887/1
+SNFVQSKLNYLYLHR 15 37 93 -1.0 C57KNANXX:5:1101:10000:99203/1
+NLSYQSNTELKSNLNFQLVCEIRILIK 27 34 96 -1.0 C57KNANXX:5:1101:10000:46059/1
+LLQEKGFNVVILFSGKEHK 19 38 96 -1.0 C57KNANXX:5:1101:10000:23019/1
+SIPQISTGDMLRAAVK 16 36 93 -1.0 C57KNANXX:5:1101:10000:12567/1
+SVQFEPGVTRFR 12 37 96 -1.0 C57KNANXX:5:1101:10000:58282/1
+DFTPKRQSYWATFR 14 38 96 -1.0 C57KNANXX:5:1101:10001:17914/1
+HWRVICVFCCANVWLLGTVTKRR 23 35 96 -1.0 C57KNANXX:5:1101:10001:25583/1
+QHPFLQTNHCPLVFVSPVLQIILGSMR 27 38 96 -1.0 C57KNANXX:5:1101:10000:76185/1
+LSAKQVKQILMDSGLSVK 18 38 96 -1.0 C57KNANXX:5:1101:10000:76185/1
+SSDLILPKIICKTGETNTNGQWFVCK 26 38 93 -1.0 C57KNANXX:5:1101:10000:76185/1
+DIANIISYGVSTSNSCIYRISSNNNR 26 34 93 -1.0 C57KNANXX:5:1101:10000:3140/1
+LIMFLTNNSSIQEVLFFPQMRPEVVQKK 28 34 96 -1.0 C57KNANXX:5:1101:10000:92875/1
+QIRHLACEPDVVSSQR 16 36 96 -1.0 C57KNANXX:5:1101:10000:29932/1
+YHNFEGYRWR 10 34 93 -1.0 C57KNANXX:5:1101:10000:68440/1
+RTAGWIRHQAHRPSALFAVGKNQR 24 36 93 -1.0 C57KNANXX:5:1101:10000:29932/1
+ACFAGKDIVVPGVANKLTAIITHFFSK 27 33 96 -1.0 C57KNANXX:5:1101:10001:17691/1
+ELFFGKIGRAVQQECRDR 18 38 96 -1.0 C57KNANXX:5:1101:10000:24094/1
+SPEEIRELSKFKKPIFYDGKFFDIK 25 38 96 -1.0 C57KNANXX:5:1101:10000:84210/1
+SSDLDQAAQQFIVPRKGVLELAR 23 35 96 -1.0 C57KNANXX:5:1101:10000:86801/1
+VNGVVITVSPPTITLKVVISKPR 23 34 93 -1.0 C57KNANXX:5:1101:10000:68440/1
+ISPKLSKVLFSKTSATFNNKADNSSSICFR 30 34 96 -1.0 C57KNANXX:5:1101:10000:91317/1
+LRERIVFWQDRK 12 38 96 -1.0 C57KNANXX:5:1101:10000:24094/1
+KCVIIAVSLLATPGTTMSFPAK 22 33 96 -1.0 C57KNANXX:5:1101:10001:17691/1
+NSLKFSILQIYIIYIYITIK 20 37 96 -1.0 C57KNANXX:5:1101:10000:29175/1
+RASSNTPLRGTINCCAAWSRSEERFSR 27 35 96 -1.0 C57KNANXX:5:1101:10000:86801/1
+FAPYVTRSMLLRAYRRRDRRHVER 24 31 96 -1.0 C57KNANXX:5:1101:10000:73874/1
+AKVNNAVVDFQFFDR 15 36 96 -1.0 C57KNANXX:5:1101:10001:15683/1
+LTGQVPYLPWAKIK 14 36 96 -1.0 C57KNANXX:5:1101:10000:29932/1
+LGLGPDDVTSDNPKLVYGR 19 32 96 -1.0 C57KNANXX:5:1101:10000:60887/1
+SQHPAAMTHSGVPKDRRAVIGITEGLIR 28 35 96 -1.0 C57KNANXX:5:1101:10000:9540/1
+NIIKKMDLVLTGEGSFDVQSTYGK 24 34 93 -1.0 C57KNANXX:5:1101:10000:99902/1
+LRILLLSVNLTGRLLETNPKHSK 23 36 93 -1.0 C57KNANXX:5:1101:10001:17914/1
+SVPAFTAARSISPVEIWGIDRK 22 31 90 -1.0 C57KNANXX:5:1101:10000:12567/1
+KIIQVVHSVEKK 12 34 51 -1.0 C57KNANXX:5:1101:10000:48824/1
+QIDEELSALLLKVADVFEKR 20 36 93 -1.0 C57KNANXX:5:1101:10000:91317/1
+IIELEHQILKVEGSIMELEKTIVDK 25 36 96 -1.0 C57KNANXX:5:1101:10000:73724/1
+CSKRSPCSHWYYRR 14 35 96 -1.0 C57KNANXX:5:1101:10000:9540/1
+KFDEITLDVYYKKGKESSK 19 33 96 -1.0 C57KNANXX:5:1101:10001:101410/1
+QETIALENEIELLQAQIEQIGRAVQQECR 29 35 96 -1.0 C57KNANXX:5:1101:10000:58282/1