Mercurial > repos > galaxyp > sixgill
changeset 0:cf8eee8343fb draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/sixgill commit 547a3bb05a08bc4eaed224b6864a82434e09289d-dirty
author | galaxyp |
---|---|
date | Thu, 13 Oct 2016 08:38:04 -0400 |
parents | |
children | |
files | macros.xml sixgill_build.xml sixgill_filter.xml sixgill_makefasta.xml sixgill_merge.xml test-data/metagene_nometagene_merged.metapeptides.tsv test-data/metagene_nometagene_merged.min2reads.metapeptides.tsv test-data/metagene_output.txt test-data/small.fq test-data/testdb_metagene.metapeptides.fasta test-data/testdb_metagene.metapeptides.tsv test-data/testdb_nometagene.metapeptides.tsv |
diffstat | 12 files changed, 1003 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Oct 13 08:38:04 2016 -0400 @@ -0,0 +1,20 @@ +<macros> + <token name="@VERSION@">0.2.4</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="0.2.4">sixgill</requirement> + <yield/> + </requirements> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1:" /> + </stdio> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1021/acs.jproteome.6b00239</citation> + <yield /> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sixgill_build.xml Thu Oct 13 08:38:04 2016 -0400 @@ -0,0 +1,157 @@ +<tool id="sixgill_build" name="sixgill build" version="@VERSION@.0"> + <description>a metapeptide database from metagenome fastq files</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <version_command>sixgill_build --version</version_command> + <command><![CDATA[ + sixgill_build + --nogzipout + --out=metapeptides_db_output.tsv + #if 'fa' in str($output_choice): + --outfasta=metapeptides_fa_output.fa + #end if + #if str($sec_filter.minlength): + --minlength=$sec_filter.minlength + #end if + #if str($sec_filter.minqualscore): + --minqualscore=$sec_filter.minqualscore + #end if + #if str($sec_filter.minorflength): + --minorflength=$sec_filter.minorflength + #end if + #if str($sec_filter.minlongesttryppeplen): + --minlongesttryppeplen=$sec_filter.minlongesttryppeplen + #end if + #if str($sec_filter.minreadcount): + --minreadcount=$sec_filter.minreadcount + #end if + #if str($sec_filter.maxreads): + --maxreads=$sec_filter.maxreads + #end if + #if $sec_mg.metagenefile: + --metagenefile="$sec_mg.metagenefile" + #if $sec_mg.minmetagenescore: + --minmetagenescore=$sec_mg.minmetagenescore + #end if + #end if + #for $i, $fastqfile in enumerate($fastqfiles): + "$fastqfile" + #end for + ]]></command> + <inputs> + <param name="fastqfiles" type="data" format="fastq" multiple="true" optional="false" label="metagenomic fastq files" + help=""/> + <section name="sec_filter" expanded="false" title="filter"> + <param name="minlength" type="integer" value="10" min="0" optional="true" label="minlength" + help="min AA length of a metapeptide"/> + <param name="minqualscore" type="integer" value="30" min="0" optional="true" label="minqualscore" + help="min base-call phred score across any NT in a metapeptide"/> + <param name="minorflength" type="integer" value="40" min="0" optional="true" label="minorflength" + help="min length of ORF-portion"/> + <param name="minlongesttryppeplen" type="integer" value="7" min="0" optional="true" label="minlongesttryppeplen" + help="minimum length of the longest tryptic peptide"/> + <param name="minreadcount" type="integer" value="2" min="1" optional="true" label="minreadcount" + help="minimum read count"/> + <param name="maxreads" type="integer" value="" optional="true" label="maxreads" + help="stop early if we hit this many reads"/> + </section> + <section name="sec_mg" expanded="false" title="MetaGene Annotator"> + <param name="metagenefile" type="data" format="txt" optional="true" label="metagenefile" + help="MetaGene Annotator output file. Records must be in same linear order as reads in fastqfiles"/> + <param name="minmetagenescore" type="integer" value="" min="-1" optional="true" label="minmetagenescore" + help="minimum MetaGene score"/> + </section> + <param name="output_choice" type="select" multiple="true" optional="false" label="select outputs"> + <option value="db" selected="true">metapeptide database</option> + <option value="fa">metapeptide protein fasta</option> + </param> + </inputs> + <outputs> + <data name="output_db" format="tabular" label="${tool.name} on ${on_string}: metapeptides.tsv" from_work_dir="metapeptides_db_output.tsv"> + <filter>'db' in output_choice</filter> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="sequence,length,min_qualscore,partial_orf_length,metagene_score,read_ids" /> + </actions> + </data> + <data name="output_fa" format="fasta" label="${tool.name} on ${on_string}: metapeptides.fa" from_work_dir="metapeptides_fa_output.fa"> + <filter>'fa' in output_choice</filter> + </data> + </outputs> + <tests> + <test> + <param name="fastqfiles" ftype="fastq" value="small.fq"/> + <param name="minreadcount" value="1"/> + <param name="output_choice" value="db,fa"/> + <output name="output_db"> + <assert_contents> + <has_text text="DLRILLRERLVAGDSDEAAVDFIVDR" /> + </assert_contents> + </output> + <output name="output_fa"> + <assert_contents> + <has_text text="DLRILLRERLVAGDSDEAAVDFIVDR" /> + </assert_contents> + </output> + </test> + <test> + <param name="fastqfiles" ftype="fastq" value="small.fq"/> + <param name="minreadcount" value="1"/> + <param name="metagenefile" ftype="fastq" value="metagene_output.txt"/> + <param name="output_choice" value="db"/> + <output name="output_db"> + <assert_contents> + <has_text text="DLRILLRERLVAGDSDEAAVDFIVDR" /> + </assert_contents> + </output> + </test> + + </tests> + <help><![CDATA[ + usage: sixgill_build [-h] [--minlength MINLENGTH] + [--minqualscore MINQUALSCORE] + [--metagenefile METAGENEFILE] + [--minmetagenescore MINMETAGENESCORE] + [--minorflength MINORFLENGTH] + [--minlongesttryppeplen MINLONGESTTRYPPEPLEN] + [--maxreads MAXREADS] [--minreadcount MINREADCOUNT] --out + OUT [--outfasta OUTFASTA] [--debug] + fastqfiles [fastqfiles ...] + +Read in one or more fastq files. For each read, do a 6-frame translation and +add all metapeptides that pass the specified filtering criteria. If +--metagenefile is specified, start with the output of MetaGene Annotator +instead of raw reads. + +positional arguments: + fastqfiles input fastq file(s), bgzipped + +optional arguments: + -h, --help show this help message and exit + --minlength MINLENGTH + min AA length of a metapeptide + --minqualscore MINQUALSCORE + min base-call phred score across any NT in a + metapeptide + --metagenefile METAGENEFILE + input MetaGene Annotator output file. Records must be + in same linear order as reads in fastqfiles + --minmetagenescore MINMETAGENESCORE + minimum MetaGene score + --minorflength MINORFLENGTH + min length of ORF-portion + --minlongesttryppeplen MINLONGESTTRYPPEPLEN + minimum length of the longest tryptic peptide + --maxreads MAXREADS stop early if we hit this many reads + --minreadcount MINREADCOUNT + minimum read count + --out OUT Output metapeptide database file + --outfasta OUTFASTA Output metapeptide fasta database file + --debug Enable debug logging + + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sixgill_filter.xml Thu Oct 13 08:38:04 2016 -0400 @@ -0,0 +1,120 @@ +<tool id="sixgill_filter" name="sixgill filter" version="@VERSION@.0"> + <description>a metapeptide database</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <version_command>sixgill_filter --version</version_command> + <command><![CDATA[ + sixgill_filter + --nogzipout + --out=metapeptides_db_output.tsv + #if str($minorflength) != '': + --minorflength=$minorflength + #end if + #if str($minaaseqlength) != '': + --minaaseqlength=$minaaseqlength + #end if + #if str($minqualscore) != '': + --minqualscore=$minqualscore + #end if + #if str($minlongesttryppeplen) != '': + --minlongesttryppeplen=$minlongesttryppeplen + #end if + #if str($minreadcount) != '': + --minreadcount=$minreadcount + #end if + #if str($maxmetapeptides) != '': + --maxmetapeptides=$maxmetapeptides + #end if + $metapeptide_db + #if 'fa' in str($output_choice): + && sixgill_makefasta --type=aa --out="metapeptides_fa_output.fa" "metapeptides_db_output.tsv" + #end if + ]]></command> + <inputs> + <param name="metapeptide_db" type="data" format="tabular" label="metapeptide database" + help="Can be generated with sixgill_build"/> + <param name="minorflength" type="integer" value="40" min="0" optional="true" label="minorflength" + help="min length of ORF-portion"/> + <param name="minaaseqlength" type="integer" value="10" min="0" optional="true" label="minaaseqlength" + help="min AA sequence length"/> + <param name="minqualscore" type="integer" value="30" min="0" optional="true" label="minqualscore" + help="min base-call phred score across any NT in a metapeptide"/> + <param name="minlongesttryppeplen" type="integer" value="7" min="0" optional="true" label="minlongesttryppeplen" + help="minimum length of the longest tryptic peptide"/> + <param name="minreadcount" type="integer" value="2" min="1" optional="true" label="minreadcount" + help="minimum read count"/> + <param name="minmetagenescore" type="integer" value="" min="-1" optional="true" label="minmetagenescore" + help="minimum MetaGene score"/> + <param name="maxmetapeptides" type="integer" value="" optional="true" label="maxmetapeptides" + help="maximum number of metapeptides to write"/> + <param name="output_choice" type="select" multiple="true" optional="false" label="select outputs"> + <option value="db" selected="true">metapeptide database</option> + <option value="fa">metapeptide protein fasta</option> + </param> + </inputs> + <outputs> + <data name="output_db" format="tabular" label="${tool.name} on ${on_string}: metapeptides.tsv" from_work_dir="metapeptides_db_output.tsv"> + <filter>'db' in output_choice</filter> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="sequence,length,min_qualscore,partial_orf_length,metagene_score,read_ids" /> + </actions> + </data> + <data name="output_fa" format="fasta" label="${tool.name} on ${on_string}: metapeptides.fa" from_work_dir="metapeptides_fa_output.fa"> + <filter>'fa' in output_choice</filter> + </data> + </outputs> + <tests> + <test> + <param name="metapeptide_db" ftype="tabular" value="metagene_nometagene_merged.metapeptides.tsv"/> + <param name="minreadcount" value="2"/> + <param name="minaaseqlength" value="12"/> + <param name="output_choice" value="db"/> + <output name="output_db"> + <assert_contents> + <has_text text="DLRILLRERLVAGDSDEAAVDFIVDR" /> + <not_has_text text="YHNFEGYRWR" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +usage: sixgill_filter [-h] --out OUT [--minorflength MINORFLENGTH] + [--minaaseqlength MINAASEQLENGTH] + [--minreadcount MINREADCOUNT] + [--minqualscore MINQUALSCORE] + [--minlongesttryppeplen MINLONGESTTRYPPEPLEN] + [--minmetagenescore MINMETAGENESCORE] + [--maxmetapeptides MAXMETAPEPTIDES] [--debug] + metapeptidefile + +Filter a metapeptide database. + +positional arguments: + metapeptidefile input metapeptide database file + +optional arguments: + -h, --help show this help message and exit + --out OUT output metapeptide database file + --minorflength MINORFLENGTH + minimum ORF length + --minaaseqlength MINAASEQLENGTH + minimum AA sequence length + --minreadcount MINREADCOUNT + minimum read count + --minqualscore MINQUALSCORE + minimum basecall quality + --minlongesttryppeplen MINLONGESTTRYPPEPLEN + minimum length of the longest tryptic peptide + --minmetagenescore MINMETAGENESCORE + Minimum MetaGene score (-1 for none) + --maxmetapeptides MAXMETAPEPTIDES + maximum number of metapeptides to write + --debug Enable debug logging + + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sixgill_makefasta.xml Thu Oct 13 08:38:04 2016 -0400 @@ -0,0 +1,76 @@ +<tool id="sixgill_makefasta" name="sixgill makefasta" version="@VERSION@.0"> + <description>from a metapeptide database</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <version_command>sixgill_makefasta --version</version_command> + <command><![CDATA[ + sixgill_makefasta + --out=metapeptides_fa_output.fa + --type="$fa_type.fasta_type" + #if $fa_type.fasta_type == 'peptide': + --missedcleavages=$fa_type.missedcleavages + --minpeptidelength=$fa_type.minpeptidelength + #end if + "$metapeptide_db" + ]]></command> + <inputs> + <param name="metapeptide_db" type="data" format="tabular" label="metapeptide database" + help="Can be generated with sixgill_build"/> + <conditional name="fa_type"> + <param name="fasta_type" type="select" label="fasta type"> + <help> peptide with specified missed cleavages requires holding + all peptides in the database in memory, as each is only written once + </help> + <option value="aa" selected="true">aa - amino acid</option> + <option value="peptide">peptide - with specified missed cleavages</option> + </param> + <when value="aa"/> + <when value="peptide"> + <param name="missedcleavages" type="integer" value="0" min="0" max="6" label="missedcleavages"/> + <param name="minpeptidelength" type="integer" value="7" min="1" label="minpeptidelength"/> + </when> + </conditional> + + </inputs> + <outputs> + <data name="output_fa" format="fasta" label="${tool.name} on ${on_string}: metapeptide.fa" from_work_dir="metapeptides_fa_output.fa"/> + </outputs> + <tests> + <test> + <param name="metapeptide_db" ftype="tabular" value="testdb_metagene.metapeptides.tsv"/> + <param name="fasta_type" value="aa"/> + <output name="output_fa"> + <assert_contents> + <has_text text="DLRILLRERLVAGDSDEAAVDFIVDR" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +usage: sixgill_makefasta [-h] --out OUT --type {aa,peptide} + [--missedcleavages MISSEDCLEAVAGES] + [--minpeptidelength MINPEPTIDELENGTH] [--debug] + metapeptidedbfile + +Build a fasta database from a metapeptide database. Either nucleotide or amino +acid. + +positional arguments: + metapeptidedbfile input metapeptide database file + +optional arguments: + -h, --help show this help message and exit + --out OUT output file + --type {aa,peptide} database type + --missedcleavages MISSEDCLEAVAGES + missed cleavages (for type peptide only) + --minpeptidelength MINPEPTIDELENGTH + minimum peptide length (for type peptide only) + --debug Enable debug logging + + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sixgill_merge.xml Thu Oct 13 08:38:04 2016 -0400 @@ -0,0 +1,70 @@ +<tool id="sixgill_merge" name="sixgill merge" version="@VERSION@.0"> + <description>metapeptide databases</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <version_command>sixgill_merge --version</version_command> + <command><![CDATA[ + sixgill_merge + --nogzipout + --out=metapeptides_db_output.tsv + #for ($i,$metapeptide_db) in enumerate($metapeptide_dbs) + "$metapeptide_db" + #end for + #if 'fa' in str($output_choice): + && + sixgill_makefasta --type=aa --out="metapeptides_fa_output.fa" "metapeptides_db_output.tsv" + #end if + ]]></command> + <inputs> + <param name="metapeptide_dbs" type="data" format="tabular" multiple="true" optional="false" label="metapeptide databases" + help="Can be generated with sixgill_build"/> + <param name="output_choice" type="select" multiple="true" optional="false" label="select outputs"> + <option value="db" selected="true">metapeptide database</option> + <option value="fa">metapeptide protein fasta</option> + </param> + </inputs> + <outputs> + <data name="output_db" format="tabular" label="${tool.name} on ${on_string}: metapeptides.tsv" from_work_dir="metapeptides_db_output.tsv"> + <filter>'db' in output_choice</filter> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="sequence,length,min_qualscore,partial_orf_length,metagene_score,read_ids" /> + </actions> + </data> + <data name="output_fa" format="fasta" label="${tool.name} on ${on_string}: metapeptides.fa" from_work_dir="metapeptides_fa_output.fa"> + <filter>'fa' in output_choice</filter> + </data> + </outputs> + <tests> + <test> + <param name="metapeptide_dbs" ftype="tabular" + value="testdb_metagene.metapeptides.tsv,testdb_nometagene.metapeptides.tsv"/> + <output name="output_db"> + <assert_contents> + <has_text text="DLRILLRERLVAGDSDEAAVDFIVDR" /> + <has_text text="YHNFEGYRWR" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +usage: sixgill_merge [-h] --out OUT [--debug] + metapeptidedbfiles [metapeptidedbfiles ...] + +Merge multiple metapeptide database files into a single metapeptide database. +Optionally, filter simultaneously. + +positional arguments: + metapeptidedbfiles input metapeptide database files + +optional arguments: + -h, --help show this help message and exit + --out OUT output file + --debug Enable debug logging + + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/metagene_nometagene_merged.metapeptides.tsv Thu Oct 13 08:38:04 2016 -0400 @@ -0,0 +1,51 @@ +sequence length min_qualscore partial_orf_length metagene_score read_ids +TTLIERVVDECSLKK 15 36 96 7.73277 C57KNANXX:5:1101:10000:79229/1,C57KNANXX:5:1101:10000:79229/1 +DLRILLRERLVAGDSDEAAVDFIVDR 26 31 96 6.45153 C57KNANXX:5:1101:10001:25583/1,C57KNANXX:5:1101:10001:25583/1 +KNNSSCSFCGKKR 13 34 54 -1.0 C57KNANXX:5:1101:10000:48824/1 +AATPVGATTIFLSEHSSTTLSIR 23 36 96 -1.0 C57KNANXX:5:1101:10000:79229/1 +RITTLKPFSCNNFTVK 16 38 96 -1.0 C57KNANXX:5:1101:10000:23019/1 +FHLNYQKFFFQKHLPPLIIK 20 34 96 -1.0 C57KNANXX:5:1101:10000:91317/1 +HASIHQFGIVGCNIIWAKPK 20 32 96 -1.0 C57KNANXX:5:1101:10000:60887/1 +NLSYQSNTELKSNLNFQLVCEIRILIK 27 34 96 -1.0 C57KNANXX:5:1101:10000:46059/1 +ISPKLSKVLFSKTSATFNNKADNSSSICFR 30 34 96 -1.0 C57KNANXX:5:1101:10000:91317/1 +LLQEKGFNVVILFSGKEHK 19 38 96 4.26594 C57KNANXX:5:1101:10000:23019/1,C57KNANXX:5:1101:10000:23019/1 +SIPQISTGDMLRAAVK 16 36 93 3.85469 C57KNANXX:5:1101:10000:12567/1,C57KNANXX:5:1101:10000:12567/1 +CSKRSPCSHWYYRR 14 35 96 -1.0 C57KNANXX:5:1101:10000:9540/1 +SVQFEPGVTRFR 12 37 96 -1.0 C57KNANXX:5:1101:10000:58282/1 +VLRYDEGIDSLISIGQSCYGK 21 37 69 2.77044 C57KNANXX:5:1101:10000:99203/1 +HWRVICVFCCANVWLLGTVTKRR 23 35 96 -1.0 C57KNANXX:5:1101:10001:25583/1 +QHPFLQTNHCPLVFVSPVLQIILGSMR 27 38 96 -1.0 C57KNANXX:5:1101:10000:76185/1 +LSAKQVKQILMDSGLSVK 18 38 96 5.18638 C57KNANXX:5:1101:10000:76185/1,C57KNANXX:5:1101:10000:76185/1 +SSDLILPKIICKTGETNTNGQWFVCK 26 38 93 -1.0 C57KNANXX:5:1101:10000:76185/1 +DIANIISYGVSTSNSCIYRISSNNNR 26 34 93 -1.0 C57KNANXX:5:1101:10000:3140/1 +LIMFLTNNSSIQEVLFFPQMRPEVVQKK 28 34 96 5.30784 C57KNANXX:5:1101:10000:92875/1,C57KNANXX:5:1101:10000:92875/1 +QIRHLACEPDVVSSQR 16 36 96 -1.0 C57KNANXX:5:1101:10000:29932/1 +YHNFEGYRWR 10 34 93 -1.0 C57KNANXX:5:1101:10000:68440/1 +RTAGWIRHQAHRPSALFAVGKNQR 24 36 93 -1.0 C57KNANXX:5:1101:10000:29932/1 +ACFAGKDIVVPGVANKLTAIITHFFSK 27 33 96 6.29703 C57KNANXX:5:1101:10001:17691/1,C57KNANXX:5:1101:10001:17691/1 +ELFFGKIGRAVQQECRDR 18 38 96 6.34832 C57KNANXX:5:1101:10000:24094/1,C57KNANXX:5:1101:10000:24094/1 +SPEEIRELSKFKKPIFYDGKFFDIK 25 38 96 13.1057 C57KNANXX:5:1101:10000:84210/1,C57KNANXX:5:1101:10000:84210/1 +SSDLDQAAQQFIVPRKGVLELAR 23 35 96 4.65375 C57KNANXX:5:1101:10000:86801/1,C57KNANXX:5:1101:10000:86801/1 +VNGVVITVSPPTITLKVVISKPR 23 34 93 5.75404 C57KNANXX:5:1101:10000:68440/1,C57KNANXX:5:1101:10000:68440/1 +DFTPKRQSYWATFR 14 38 96 6.38445 C57KNANXX:5:1101:10001:17914/1,C57KNANXX:5:1101:10001:17914/1 +LRERIVFWQDRK 12 38 96 -1.0 C57KNANXX:5:1101:10000:24094/1 +KCVIIAVSLLATPGTTMSFPAK 22 33 96 -1.0 C57KNANXX:5:1101:10001:17691/1 +NSLKFSILQIYIIYIYITIK 20 37 96 -1.0 C57KNANXX:5:1101:10000:29175/1 +RASSNTPLRGTINCCAAWSRSEERFSR 27 35 96 -1.0 C57KNANXX:5:1101:10000:86801/1 +FAPYVTRSMLLRAYRRRDRRHVER 24 31 96 -1.0 C57KNANXX:5:1101:10000:73874/1 +AKVNNAVVDFQFFDR 15 36 96 10.6388 C57KNANXX:5:1101:10001:15683/1,C57KNANXX:5:1101:10001:15683/1 +LTGQVPYLPWAKIK 14 36 96 5.61044 C57KNANXX:5:1101:10000:29932/1,C57KNANXX:5:1101:10000:29932/1 +LGLGPDDVTSDNPKLVYGR 19 32 96 2.81286 C57KNANXX:5:1101:10000:60887/1,C57KNANXX:5:1101:10000:60887/1 +SVPAFTAARSISPVEIWGIDRK 22 31 90 -1.0 C57KNANXX:5:1101:10000:12567/1 +NIIKKMDLVLTGEGSFDVQSTYGK 24 34 93 8.76308 C57KNANXX:5:1101:10000:99902/1,C57KNANXX:5:1101:10000:99902/1 +LRILLLSVNLTGRLLETNPKHSK 23 36 93 -1.0 C57KNANXX:5:1101:10001:17914/1 +SQHPAAMTHSGVPKDRRAVIGITEGLIR 28 35 96 -1.0 C57KNANXX:5:1101:10000:9540/1 +KIIQVVHSVEKK 12 34 51 -1.0 C57KNANXX:5:1101:10000:48824/1 +QIDEELSALLLKVADVFEKR 20 36 93 13.3198 C57KNANXX:5:1101:10000:91317/1,C57KNANXX:5:1101:10000:91317/1 +IIELEHQILKVEGSIMELEKTIVDK 25 36 96 19.4442 C57KNANXX:5:1101:10000:73724/1,C57KNANXX:5:1101:10000:73724/1 +SNFVQSKLNYLYLHR 15 37 93 -1.0 C57KNANXX:5:1101:10000:99203/1 +NANKNVIFFMILILYKNNK 19 38 66 4.82132 C57KNANXX:5:1101:10000:61940/1 +VLEIYPTNIIYFLIVK 16 36 60 5.37246 C57KNANXX:5:1101:10000:33905/1 +KFDEITLDVYYKKGKESSK 19 33 96 22.2262 C57KNANXX:5:1101:10001:101410/1,C57KNANXX:5:1101:10001:101410/1 +DVIECRAPQGVCSHYMLVFLRMEPKFCKEV 30 38 93 0.240116 C57KNANXX:5:1101:10000:19758/1 +QETIALENEIELLQAQIEQIGRAVQQECR 29 35 96 3.85699 C57KNANXX:5:1101:10000:58282/1,C57KNANXX:5:1101:10000:58282/1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/metagene_nometagene_merged.min2reads.metapeptides.tsv Thu Oct 13 08:38:04 2016 -0400 @@ -0,0 +1,21 @@ +sequence length min_qualscore partial_orf_length metagene_score read_ids +TTLIERVVDECSLKK 15 36 96 7.73277 C57KNANXX:5:1101:10000:79229/1,C57KNANXX:5:1101:10000:79229/1 +DLRILLRERLVAGDSDEAAVDFIVDR 26 31 96 6.45153 C57KNANXX:5:1101:10001:25583/1,C57KNANXX:5:1101:10001:25583/1 +LLQEKGFNVVILFSGKEHK 19 38 96 4.26594 C57KNANXX:5:1101:10000:23019/1,C57KNANXX:5:1101:10000:23019/1 +SIPQISTGDMLRAAVK 16 36 93 3.85469 C57KNANXX:5:1101:10000:12567/1,C57KNANXX:5:1101:10000:12567/1 +LSAKQVKQILMDSGLSVK 18 38 96 5.18638 C57KNANXX:5:1101:10000:76185/1,C57KNANXX:5:1101:10000:76185/1 +LIMFLTNNSSIQEVLFFPQMRPEVVQKK 28 34 96 5.30784 C57KNANXX:5:1101:10000:92875/1,C57KNANXX:5:1101:10000:92875/1 +ACFAGKDIVVPGVANKLTAIITHFFSK 27 33 96 6.29703 C57KNANXX:5:1101:10001:17691/1,C57KNANXX:5:1101:10001:17691/1 +ELFFGKIGRAVQQECRDR 18 38 96 6.34832 C57KNANXX:5:1101:10000:24094/1,C57KNANXX:5:1101:10000:24094/1 +SPEEIRELSKFKKPIFYDGKFFDIK 25 38 96 13.1057 C57KNANXX:5:1101:10000:84210/1,C57KNANXX:5:1101:10000:84210/1 +SSDLDQAAQQFIVPRKGVLELAR 23 35 96 4.65375 C57KNANXX:5:1101:10000:86801/1,C57KNANXX:5:1101:10000:86801/1 +VNGVVITVSPPTITLKVVISKPR 23 34 93 5.75404 C57KNANXX:5:1101:10000:68440/1,C57KNANXX:5:1101:10000:68440/1 +DFTPKRQSYWATFR 14 38 96 6.38445 C57KNANXX:5:1101:10001:17914/1,C57KNANXX:5:1101:10001:17914/1 +AKVNNAVVDFQFFDR 15 36 96 10.6388 C57KNANXX:5:1101:10001:15683/1,C57KNANXX:5:1101:10001:15683/1 +LTGQVPYLPWAKIK 14 36 96 5.61044 C57KNANXX:5:1101:10000:29932/1,C57KNANXX:5:1101:10000:29932/1 +LGLGPDDVTSDNPKLVYGR 19 32 96 2.81286 C57KNANXX:5:1101:10000:60887/1,C57KNANXX:5:1101:10000:60887/1 +NIIKKMDLVLTGEGSFDVQSTYGK 24 34 93 8.76308 C57KNANXX:5:1101:10000:99902/1,C57KNANXX:5:1101:10000:99902/1 +QIDEELSALLLKVADVFEKR 20 36 93 13.3198 C57KNANXX:5:1101:10000:91317/1,C57KNANXX:5:1101:10000:91317/1 +IIELEHQILKVEGSIMELEKTIVDK 25 36 96 19.4442 C57KNANXX:5:1101:10000:73724/1,C57KNANXX:5:1101:10000:73724/1 +KFDEITLDVYYKKGKESSK 19 33 96 22.2262 C57KNANXX:5:1101:10001:101410/1,C57KNANXX:5:1101:10001:101410/1 +QETIALENEIELLQAQIEQIGRAVQQECR 29 35 96 3.85699 C57KNANXX:5:1101:10000:58282/1,C57KNANXX:5:1101:10000:58282/1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/metagene_output.txt Thu Oct 13 08:38:04 2016 -0400 @@ -0,0 +1,168 @@ +# C57KNANXX:5:1101:10000:12567/1 +# gc = 0.553191, rbs = -1 +# self: - +gene_1 1 94 - 0 00 3.85469 p - - - +# C57KNANXX:5:1101:10000:19758/1 +# gc = 0.402062, rbs = -1 +# self: - +gene_1 1 97 - 1 01 0.240116 b - - - +# C57KNANXX:5:1101:10000:23019/1 +# gc = 0.329897, rbs = -1 +# self: - +gene_1 1 97 + 1 00 4.26594 b - - - +# C57KNANXX:5:1101:10000:24094/1 +# gc = 0.474227, rbs = -1 +# self: - +gene_1 1 97 + 1 00 6.34832 p - - - +# C57KNANXX:5:1101:10000:25279/1 +# gc = 0.360825, rbs = -1 +# self: - +gene_1 1 97 + 1 00 16.3044 p - - - +# C57KNANXX:5:1101:10000:28635/1 +# gc = 0.278351, rbs = -1 +# self: - +gene_1 1 97 + 2 00 4.69095 a - - - +# C57KNANXX:5:1101:10000:29175/1 +# gc = 0.154639, rbs = -1 +# self: - +# C57KNANXX:5:1101:10000:29932/1 +# gc = 0.536082, rbs = -1 +# self: - +gene_1 1 97 + 0 10 5.61044 p - - - +# C57KNANXX:5:1101:10000:3140/1 +# gc = 0.443299, rbs = -1 +# self: - +gene_1 1 97 + 2 00 8.08669 b - - - +# C57KNANXX:5:1101:10000:32777/1 +# gc = 0.484536, rbs = -1 +# self: - +gene_1 1 97 - 0 00 6.4402 b - - - +# C57KNANXX:5:1101:10000:33905/1 +# gc = 0.216495, rbs = -1 +# self: - +gene_1 34 97 - 1 01 5.37246 b - - - +# C57KNANXX:5:1101:10000:34777/1 +# gc = 0.195876, rbs = -1 +# self: - +# C57KNANXX:5:1101:10000:46059/1 +# gc = 0.257732, rbs = -1 +# self: - +# C57KNANXX:5:1101:10000:47265/1 +# gc = 0.564516, rbs = -1 +# self: - +# C57KNANXX:5:1101:10000:54860/1 +# gc = 0.257732, rbs = -1 +# self: - +gene_1 1 97 + 2 00 10.6181 b - - - +# C57KNANXX:5:1101:10000:58282/1 +# gc = 0.515464, rbs = -1 +# self: - +gene_1 1 97 + 0 00 3.85699 b - - - +# C57KNANXX:5:1101:10000:60887/1 +# gc = 0.56701, rbs = -1 +# self: - +gene_1 1 97 - 1 00 2.81286 p - - - +# C57KNANXX:5:1101:10000:61940/1 +# gc = 0.134021, rbs = -1 +# self: - +gene_1 1 70 + 1 01 4.82132 p - - - +# C57KNANXX:5:1101:10000:68440/1 +# gc = 0.453608, rbs = -1 +# self: - +gene_1 1 97 + 2 00 5.75404 a - - - +# C57KNANXX:5:1101:10000:73724/1 +# gc = 0.237113, rbs = -1 +# self: - +gene_1 1 97 + 1 00 19.4442 b - - - +# C57KNANXX:5:1101:10000:73874/1 +# gc = 0.556701, rbs = -1 +# self: - +gene_1 1 97 + 1 00 4.20317 b - - - +# C57KNANXX:5:1101:10000:76185/1 +# gc = 0.371134, rbs = -1 +# self: - +gene_1 1 97 - 0 00 5.18638 a - - - +# C57KNANXX:5:1101:10000:76393/1 +# gc = 0.28866, rbs = -1 +# self: - +gene_1 1 71 - 0 10 17.9709 a - - - +# C57KNANXX:5:1101:10000:79229/1 +# gc = 0.412371, rbs = -1 +# self: - +gene_1 1 97 + 1 00 7.73277 a - - - +# C57KNANXX:5:1101:10000:84210/1 +# gc = 0.257732, rbs = -1 +# self: - +gene_1 1 97 - 0 00 13.1057 p - - - +# C57KNANXX:5:1101:10000:85460/1 +# gc = 0.329897, rbs = -1 +# self: - +# C57KNANXX:5:1101:10000:86801/1 +# gc = 0.525773, rbs = -1 +# self: - +gene_1 1 97 - 1 00 4.65375 b - - - +# C57KNANXX:5:1101:10000:88288/1 +# gc = 0.329897, rbs = -1 +# self: - +gene_1 1 97 + 0 00 15.2122 p - - - +# C57KNANXX:5:1101:10000:90246/1 +# gc = 0.278351, rbs = -1 +# self: - +# C57KNANXX:5:1101:10000:91317/1 +# gc = 0.268041, rbs = -1 +# self: - +gene_1 1 97 - 2 00 13.3198 b - - - +# C57KNANXX:5:1101:10000:92875/1 +# gc = 0.309278, rbs = -1 +# self: - +gene_1 1 97 - 0 10 5.30784 b - - - +# C57KNANXX:5:1101:10000:9540/1 +# gc = 0.453608, rbs = -1 +# self: - +# C57KNANXX:5:1101:10000:99203/1 +# gc = 0.381443, rbs = -1 +# self: - +gene_1 29 97 + 0 10 2.77044 b 20 25 -1.94891 +# C57KNANXX:5:1101:10000:99902/1 +# gc = 0.28866, rbs = -1 +# self: - +gene_1 1 97 - 2 00 8.76308 a - - - +# C57KNANXX:5:1101:10001:100058/1 +# gc = 0.22619, rbs = -1 +# self: - +gene_1 3 84 + 0 10 5.19281 b - - - +# C57KNANXX:5:1101:10001:101410/1 +# gc = 0.28866, rbs = -1 +# self: - +gene_1 1 97 + 0 00 22.2262 p - - - +# C57KNANXX:5:1101:10001:15683/1 +# gc = 0.381443, rbs = -1 +# self: - +gene_1 1 97 + 1 00 10.6388 b - - - +# C57KNANXX:5:1101:10001:17691/1 +# gc = 0.381443, rbs = -1 +# self: - +gene_1 1 97 + 1 00 6.29703 a - - - +# C57KNANXX:5:1101:10001:17914/1 +# gc = 0.42268, rbs = -1 +# self: - +gene_1 1 97 + 1 00 6.38445 p - - - +# C57KNANXX:5:1101:10001:18762/1 +# gc = 0.43299, rbs = -1 +# self: - +gene_1 1 97 - 2 00 3.80632 b - - - +# C57KNANXX:5:1101:10001:19988/1 +# gc = 0.350515, rbs = -1 +# self: - +gene_1 1 97 - 0 00 14.6087 a - - - +# C57KNANXX:5:1101:10001:20542/1 +# gc = 0.43299, rbs = -1 +# self: - +# C57KNANXX:5:1101:10001:25583/1 +# gc = 0.546392, rbs = -1 +# self: - +gene_1 1 97 - 0 00 6.45153 b - - - +# C57KNANXX:5:1101:10001:26391/1 +# gc = 0.463918, rbs = -1 +# self: - +gene_1 1 97 - 2 00 9.77924 p - - -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/small.fq Thu Oct 13 08:38:04 2016 -0400 @@ -0,0 +1,200 @@ +@C57KNANXX:5:1101:10000:12567/1 +TCTTGCCTAAGTCAGTGCCCGCCTTTACGGCAGCACGGAGCATATCGCCCGTAGAAATCTGGGGGATAGATCGGAAGAGCGGTTCAGCAGGAAT ++ +CCGGGGGGGDGGG@GGGGGGGGGGGGEGGGGGGGGGFGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGFGGG +@C57KNANXX:5:1101:10000:19758/1 +CTAAACTTCTTTACAAAACTTAGGTTCCATTCTTAAAAACACCAGCATATAATGGCTACAGACTCCCTGTGGTGCTCGACATTCGATAACGTCCTTG ++ +CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10000:23019/1 +CAACCACACACTTTTAACGGTGAAATTATTACAAGAAAAGGGTTTTAATGTCGTTATTCTTTTTTCTGGGAAGGAGCATAAAACGACAGAAGAAATT ++ +CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10000:24094/1 +GGCTATTTTTGGTGCAGACTCAGAGAAAGAATTGTTTTTTGGCAAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGTGCAGTGAATCTCGTA ++ +CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG=;DGGGG +@C57KNANXX:5:1101:10000:25279/1 +AATCTTATTTGGTCCTCCTGGAGCAGGTAAAGGAACACAAGCTCAACACATTGTAAAAGATTATGACTATTTTCAAATATCAACTGGCGATATGTTG ++ +CCGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGEGGGGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10000:28635/1 +AATTAAAGTCATCAGTACCAAAAGATTTTGAATCTACAAGCTATAAAGATTCAAAATCTGATACCCTACATTATTGGTTTAAACCTTTCGTAGAAAA ++ +CCFFGGGGGGGGFFGGGGGGGGGGG=FGGGGGGGGGGGCGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGEGGGGGGGGBGGFEGGGGGGGG +@C57KNANXX:5:1101:10000:29175/1 +ACAAATATAAGAATTTAATTGTAATATAAATATAAATAATGTATATTTGTAATATGGAAAATTTTAAAGAATTCTTAGAGGAGTTAGAAGATAATAA ++ +CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGDGGGGEGCGGGGGGGG +@C57KNANXX:5:1101:10000:29932/1 +GTGATTGGGGATTACACCGAAGAACCGCTGGCTGGATACGACATCAGGCTCACAGGCCAAGTGCCTTATTTGCCGTGGGCAAAAATCAAAGGCACCC ++ +CCGFGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEFGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10000:3140/1 +CTACACTCACGGTTGTTGTTGGAGGAGATACGGTAGATACAGCTATTGGAGGTACTTACTCCGTAACTTATAATGTTAGCGATGTCTCTGGCAATGC ++ +CCGGCGGGGGFGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGCGGG +@C57KNANXX:5:1101:10000:32777/1 +CAAATACCCCCGCGGGTCCTAGAACCAGTATTTCGAAAGCACGGGTATAGCGTTTATGTTCACCGGCAAAGCGTATTTTTACCTGACGAATCGCTTG ++ +CCGGGGGGGGGGGGGGGGGGGGGGGGGGG>FGGFGGGGGGGGGBACDGGGGGGGGGGGGGGGGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGB +@C57KNANXX:5:1101:10000:33905/1 +GTTAGAAAGTAAACCAAACATAGGAACAACTTTTTATTTTACAATTAAAAAATAAATAATATTAGTAGGATATATTTCTAATACTCTAGTAACAAGC ++ +CCGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGEGGGGFGGGGGGD +@C57KNANXX:5:1101:10000:34777/1 +ATATTAAATCATTAAGATATTCGTCGGAGCTAAAAAAATATAATTGACAAATATTAGTTAAGTAAATTTTCTTGCTCATTTTAAAAAGTTAAACTAA ++ +CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10000:46059/1 +CTTTTTAATTAAGATTCTGATTTCGCAAACAAGCTGAAAATTCAGATTTGATTTTAATTCAGTATTTGACTGATAACTCAGATTTCTATTTAGATTC ++ +CCGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGGE>FFF +@C57KNANXX:5:1101:10000:47265/1 +TTATCTAGAGTGCCCCCGGTATGGCCTAAGCCACGGCCAGAAATCATCGGCACATATGCCCC ++ +CCGGGGGGGGGGGGGGFG<EGGGEGGGGGGGGGGDGGGGGGGFGGGGGGGGGGEGGGGG>GG +@C57KNANXX:5:1101:10000:48824/1 +AGATACATCATCTCTTTTTTTTCCACAGAATGAACAACTTGAATTATTTTTTTTA ++ +CBGCDFGGGGGFGFGGGGGGGGCGEEGGGGGGGGGGGEGGGEGGGFGGGGGGGG@ +@C57KNANXX:5:1101:10000:54860/1 +CTATTTTTACTTTTTCGGTTTTTAGTGACGTATCAAAAGATTTAATGCAACTTAGAATGATTAAATCTGCTGAAGAAATAGAAATCATTAAAAATGG ++ +CCGGGFGGGGGGGGGGGGBGGGGGGGG>GGGGGGGFGGEGGGGGGGGGGGGGGFEGGGG>GG@GGGGGGGCDGFGGGEGGGFGGGGFGGGGEGGGGG +@C57KNANXX:5:1101:10000:58282/1 +AGGCAAGAGACGATTGCCCTGGAAAACGAAATCGAGTTACTCCAGGCTCAAATTGAACAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGTG ++ +CCGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10000:60158/1 +TATGACAGAGGCAGTCCCAAAACCAAGAACGGTCCTGAAGCTGGTGT ++ +BBGGGGGGGGDGGGGGGGEGGGGGGBGBEFCEFGGGGGGGGGGGC@< +@C57KNANXX:5:1101:10000:60887/1 +GCCGTTTTGGCCCCACCCCGTCATGCGTCCATACACCAGTTTGGGATTGTCGGATGTAACATCATCTGGGCCAAGCCCAAGTCGCTCCATAACGCCT ++ +BCGGGGGGGGGGGGGGGGGGGGGGGGGGGGBGCGGGGGGGGGGDGGGGGGGGGAGGGCGGGGGGGGGFGGGGGGFGGGGGGGGFGGFG=GGGGGGDG +@C57KNANXX:5:1101:10000:61940/1 +AGAAATCAAAAACGCAAATAAAAATGTTATTTTTTTCATGATATTAATTTTGTATAAAAATAATAAATAAATCGTTTAAAACAATATTATTACAATA ++ +CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGFGGGGGGGG +@C57KNANXX:5:1101:10000:65336/1 +TACTTTTACAATTGTAAGACATTAACCATGAGTTCATCATTTGAAAAACATC ++ +BBG@F>FEFF>GGGGGGGG>CGGGGGGG>FGGGGGEGGFGGGEGGGGGGGGC +@C57KNANXX:5:1101:10000:66846/1 +GCAGTCCCAAGCCCCTTAAAACCTTTTGATGCAGATTCAGTTCCTTTTTCAACGTCAA ++ +BBGGGGGEGGGGGGGGGGGGGGGGGGCDGGGFGG>FGGGGGGGGGGGGGGE@FGCDG> +@C57KNANXX:5:1101:10000:68440/1 +GCAGCGACTATTATCCACTGCGCGTTAATGGTGTTGTAATAACTGTAAGTCCGCCAACGATAACCCTCAAAGTTGTGATATCGAAACCCAGGTTGCA ++ +CCGGGGGGGGGFGEGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10000:73724/1 +AAATAAAATTATTGAGTTAGAGCATCAGATTTTAAAAGTTGAAGGTTCTATTATGGAACTTGAAAAGACTATTGTCGATAAGATTGATATATTAATT ++ +CCEGGGGGGGGGGGGGGGGGFGEEGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGG +@C57KNANXX:5:1101:10000:73874/1 +GCCGCGCGATTTGCGCCGTATGTTACCAGGTCAATGCTACTACGTGCCTATCGTCGACGAGACAGGCGGCATGTTGAACGATCCTGTCTGCCTAAAG ++ +CCEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGBGGGG@GGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGB +@C57KNANXX:5:1101:10000:76185/1 +AAACAACATCCATTTTTACAGACAAACCACTGTCCATTAGTATTTGTTTCACCTGTTTTGCAGATAATTTTGGGTAGTATGAGATCGGAAGAGCGGT ++ +CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10000:76393/1 +TTTGCTAAAGCATCTACACCACGTTTTAATCCGTCTCTAGCGTCTATATCAAATTTTATATCTTTTGCCATTGTTTTTAAATTTTGCTTTTAACTAT ++ +CCGGGEGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGEGGGGGGGGGGGGGGGFG +@C57KNANXX:5:1101:10000:79229/1 +AACTGGTAAAACTACCCTTATTGAGAGGGTTGTGGATGAATGTTCACTTAAAAAAATGGTGGTTGCCCCCACTGGTGTTGCTGCTTTAAATATCGGT ++ +CCGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGFGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10000:81980/1 +GTTTTTCCCAATAATTCTTTTGCAGCAACGCTTAACTCTTCAATGG ++ +BBFGGGFGGGGGGGGGGGGGGGGFGGEGGGGGGGGGGGGGG>ECFG +@C57KNANXX:5:1101:10000:82990/1 +CTCTCTAATGGTAAAAGTTTATTAGCAGCTGGTATCAAAAAGGTATCTGG ++ +FGEFGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGFGGGGGGGGGGGGG +@C57KNANXX:5:1101:10000:84210/1 +CAATTAATGTATTCTTTATGTCAAAGAACTTACCATCATAAAAAATAGGCTTTTTAAATTTTGAAAGTTCTCTTATTTCTTCAGGACTTCTTTGTGT ++ +CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDG +@C57KNANXX:5:1101:10000:85460/1 +CAGAAGCTGCAGCTGAATAATCTAAACTTATTTCAAGTATAGATTGGTACAAGAGTATAAAATTTAGAAAAGCGTTACACGTGTGTAGCGCTTTTTT ++ +CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10000:86801/1 +CAGGCGCGCAAGCTCTAATACACCCTTACGTGGAACGATAAATTGTTGTGCAGCTTGGTCTAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACC ++ +CCGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGG +@C57KNANXX:5:1101:10000:88288/1 +ATTATCTCAGGCGATTTGACTATAACAGGTGACGCCACAGAACTTCAAACAACTAATACTGCAATTACAGATAATGTTATTGTATTAAACAAAGAGA ++ +BBGGGGGGGGGFGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGG>FGGGGGGGGGGGGGGGGGGGEGGGGG>FBDGGGGGGGGGGGGGGGEG>FG +@C57KNANXX:5:1101:10000:90246/1 +CTACCCTAAATTAAAGAGCGGCTTGTAAATAAAAAATCCCTACTAAAAGCAAGGATTGTATCTTAAAGGGTTATTATTTTATTAAAAACAAACTTCA ++ +CCGGGGGGGCEGEGGGGGGGGGGGGGEGGCEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGCGEG>GCFGGG@FGDGGGGGGG +@C57KNANXX:5:1101:10000:91317/1 +AAAATTTCACCTAAATTATCAAAAGTTCTTTTTTCAAAAACATCTGCCACCTTTAATAATAAAGCAGATAATTCTTCATCTATTTGCTTCAGGTCAT ++ +CCEGEGGGFGGEGEGGDCGEGGGEEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGFGEGGEFGGGGGGGGGGGGGGFGGG +@C57KNANXX:5:1101:10000:92875/1 +CAATTTTCTTTTGAACTACCTCAGGTCTCATTTGAGGAAAAAACAATACTTCTTGGATGGATGAATTGTTCGTTAAGAACATAATTAATCTATCCAT ++ +CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGFGCDFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10000:9540/1 +AACTGATATTCTGATTAAACCTTCGGTAATACCAATGACTGCACGGCGATCTTTTGGAACACCTGAATGTGTCATTGCTGCTGGGTGCTGAGATCGG ++ +CCDGGFGGGGGGGGGGGGGGGGGGGFGGGEDGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10000:99203/1 +CGTCGATGAGTAGAGTTGTGGGTAAAAAGTGTTACGTTACGATGAAGGTATAGATAGTTTAATTTCGATTGGACAAAGTTGCTACGGTAAATCCAGA ++ +CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10000:99902/1 +ATTTTATAGGGTAATTTCCCATAAGTACTTTGAACGTCAAAACTTCCTTCTCCTGTAAGAACTAGATCCATTTTTTTTATAATGTTTTTTAGGTTTG ++ +CBF>FEGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCFGGGGGGGGGEGGGGGGGGEGGGGGGGGGGGGGGGBF>GFG8 +@C57KNANXX:5:1101:10001:100058/1 +AAGTGTTAACTAAAAATTTTAATAAAAGAACAGTTAATCAAGTTGTTGATCCAAAAATTTTAAAATTTTGGAGAAAGCAACTGT ++ +CCGGGGGGGGGGGG>GGGCGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGEDEFGGCFDGGGEGEFGGGFGGGGGGGGGGG +@C57KNANXX:5:1101:10001:101410/1 +GGTGAAAAGAAATTTGATGAAATTACTTTAGATGTTTATTATAAAAAAGGGAAAGAATCCTCTAAATTATATGATGATGCACATGAGATCGGAAGAG ++ +BBGEDGGCGEGGGGGGGGGGGGGGFGGGGGGGGGGEGGEGGGGGGGGDGBBGGGGGGGGGGGGGGGGGGGGGGGCGC@GGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10001:15683/1 +ATTAGAACTAGAATTAATTCAGGCGGCCTCAACAGCTGCAAAAGCGAAGGTTAATAACGCCGTTGTAGATTTTCAATTTTTTGATCGACTCGATCAA ++ +CCGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGFFGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10001:17691/1 +AATTGGTATAAAAGCCTGTTTTGCAGGGAAGGACATTGTAGTTCCCGGTGTTGCCAATAAACTTACAGCAATAATTACGCACTTCTTTTCAAAGACA ++ +CCGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGFBGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGGGGGGGGDGGGGG +@C57KNANXX:5:1101:10001:17914/1 +TAGCCTTGAAATTAAGGATTTTACTCCTAAGCGTCAATCTTACTGGGCGACTTTTAGAGACCAACCCCAAGCATTCGAAATTCTGGTGCTCAACAAG ++ +CCEGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10001:18762/1 +AAAGCCGTTGCAGAAGCTGTAATATAAGCCCCAATACCAAAAAATGCCTGTTGTCCAAAAGAGACCTCCCCAACCACTAGCACCAAATAAGCAGAAA ++ +CBFGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGG@BG>GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10001:19988/1 +TTCTCACCATGTCAAATTGAGCTTCTGGATAGTTTATAACACCAGATGTGTTTAAGGTATGAGCTATTGGAGTGTAGTCTAAATACTCATAAACTTC ++ +BBGGGGGGGGGGFGFGGGGGGGCD>FGGGGGGEEGFGGDGGGGGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGGGGGGGGFGGGGFGGGGGGGGG +@C57KNANXX:5:1101:10001:20542/1 +GCAATAGTCATCTCATCTATCAATACAGAACCAATGTATTGAGAACCATTTTTGTAAGAGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGTG ++ +CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10001:25583/1 +CGTAGCGATCAACGATAAAATCCACCGCCGCTTCGTCACTGTCCCCAGCAACCAAACGTTCGCGCAACAGAATACGCAAATCACGCGCCAATGACGC ++ +CCGGGGD@GGGGGGBGGBGGGGGGGGGGGGGGGGGGGGFDGGGEGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@C57KNANXX:5:1101:10001:26391/1 +AGAACAATACCTTCCGCTCCTAATCCCATAACGTCATCAGCGGTTAATGCATCATTCCGATGAACAACAACTTCTGTCCCCAGCTCGCCAATGTAGT ++ +CCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG<GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFFGGGG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/testdb_metagene.metapeptides.fasta Thu Oct 13 08:38:04 2016 -0400 @@ -0,0 +1,48 @@ +>TTLIERVVDECSLKK +TTLIERVVDECSLKK +>DLRILLRERLVAGDSDEAAVDFIVDR +DLRILLRERLVAGDSDEAAVDFIVDR +>VLRYDEGIDSLISIGQSCYGK +VLRYDEGIDSLISIGQSCYGK +>QIDEELSALLLKVADVFEKR +QIDEELSALLLKVADVFEKR +>LLQEKGFNVVILFSGKEHK +LLQEKGFNVVILFSGKEHK +>SIPQISTGDMLRAAVK +SIPQISTGDMLRAAVK +>SPEEIRELSKFKKPIFYDGKFFDIK +SPEEIRELSKFKKPIFYDGKFFDIK +>LSAKQVKQILMDSGLSVK +LSAKQVKQILMDSGLSVK +>LIMFLTNNSSIQEVLFFPQMRPEVVQKK +LIMFLTNNSSIQEVLFFPQMRPEVVQKK +>ACFAGKDIVVPGVANKLTAIITHFFSK +ACFAGKDIVVPGVANKLTAIITHFFSK +>ELFFGKIGRAVQQECRDR +ELFFGKIGRAVQQECRDR +>SSDLDQAAQQFIVPRKGVLELAR +SSDLDQAAQQFIVPRKGVLELAR +>VNGVVITVSPPTITLKVVISKPR +VNGVVITVSPPTITLKVVISKPR +>DFTPKRQSYWATFR +DFTPKRQSYWATFR +>AKVNNAVVDFQFFDR +AKVNNAVVDFQFFDR +>LTGQVPYLPWAKIK +LTGQVPYLPWAKIK +>LGLGPDDVTSDNPKLVYGR +LGLGPDDVTSDNPKLVYGR +>NIIKKMDLVLTGEGSFDVQSTYGK +NIIKKMDLVLTGEGSFDVQSTYGK +>IIELEHQILKVEGSIMELEKTIVDK +IIELEHQILKVEGSIMELEKTIVDK +>NANKNVIFFMILILYKNNK +NANKNVIFFMILILYKNNK +>VLEIYPTNIIYFLIVK +VLEIYPTNIIYFLIVK +>KFDEITLDVYYKKGKESSK +KFDEITLDVYYKKGKESSK +>DVIECRAPQGVCSHYMLVFLRMEPKFCKEV +DVIECRAPQGVCSHYMLVFLRMEPKFCKEV +>QETIALENEIELLQAQIEQIGRAVQQECR +QETIALENEIELLQAQIEQIGRAVQQECR
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/testdb_metagene.metapeptides.tsv Thu Oct 13 08:38:04 2016 -0400 @@ -0,0 +1,25 @@ +sequence length min_qualscore partial_orf_length metagene_score read_ids +TTLIERVVDECSLKK 15 36 96 7.73277 C57KNANXX:5:1101:10000:79229/1 +DLRILLRERLVAGDSDEAAVDFIVDR 26 31 96 6.45153 C57KNANXX:5:1101:10001:25583/1 +VLRYDEGIDSLISIGQSCYGK 21 37 69 2.77044 C57KNANXX:5:1101:10000:99203/1 +QIDEELSALLLKVADVFEKR 20 36 93 13.3198 C57KNANXX:5:1101:10000:91317/1 +LLQEKGFNVVILFSGKEHK 19 38 96 4.26594 C57KNANXX:5:1101:10000:23019/1 +SIPQISTGDMLRAAVK 16 36 93 3.85469 C57KNANXX:5:1101:10000:12567/1 +SPEEIRELSKFKKPIFYDGKFFDIK 25 38 96 13.1057 C57KNANXX:5:1101:10000:84210/1 +LSAKQVKQILMDSGLSVK 18 38 96 5.18638 C57KNANXX:5:1101:10000:76185/1 +LIMFLTNNSSIQEVLFFPQMRPEVVQKK 28 34 96 5.30784 C57KNANXX:5:1101:10000:92875/1 +ACFAGKDIVVPGVANKLTAIITHFFSK 27 33 96 6.29703 C57KNANXX:5:1101:10001:17691/1 +ELFFGKIGRAVQQECRDR 18 38 96 6.34832 C57KNANXX:5:1101:10000:24094/1 +SSDLDQAAQQFIVPRKGVLELAR 23 35 96 4.65375 C57KNANXX:5:1101:10000:86801/1 +VNGVVITVSPPTITLKVVISKPR 23 34 93 5.75404 C57KNANXX:5:1101:10000:68440/1 +DFTPKRQSYWATFR 14 38 96 6.38445 C57KNANXX:5:1101:10001:17914/1 +AKVNNAVVDFQFFDR 15 36 96 10.6388 C57KNANXX:5:1101:10001:15683/1 +LTGQVPYLPWAKIK 14 36 96 5.61044 C57KNANXX:5:1101:10000:29932/1 +LGLGPDDVTSDNPKLVYGR 19 32 96 2.81286 C57KNANXX:5:1101:10000:60887/1 +NIIKKMDLVLTGEGSFDVQSTYGK 24 34 93 8.76308 C57KNANXX:5:1101:10000:99902/1 +IIELEHQILKVEGSIMELEKTIVDK 25 36 96 19.4442 C57KNANXX:5:1101:10000:73724/1 +NANKNVIFFMILILYKNNK 19 38 66 4.82132 C57KNANXX:5:1101:10000:61940/1 +VLEIYPTNIIYFLIVK 16 36 60 5.37246 C57KNANXX:5:1101:10000:33905/1 +KFDEITLDVYYKKGKESSK 19 33 96 22.2262 C57KNANXX:5:1101:10001:101410/1 +DVIECRAPQGVCSHYMLVFLRMEPKFCKEV 30 38 93 0.240116 C57KNANXX:5:1101:10000:19758/1 +QETIALENEIELLQAQIEQIGRAVQQECR 29 35 96 3.85699 C57KNANXX:5:1101:10000:58282/1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/testdb_nometagene.metapeptides.tsv Thu Oct 13 08:38:04 2016 -0400 @@ -0,0 +1,47 @@ +sequence length min_qualscore partial_orf_length metagene_score read_ids +TTLIERVVDECSLKK 15 36 96 -1.0 C57KNANXX:5:1101:10000:79229/1 +DLRILLRERLVAGDSDEAAVDFIVDR 26 31 96 -1.0 C57KNANXX:5:1101:10001:25583/1 +KNNSSCSFCGKKR 13 34 54 -1.0 C57KNANXX:5:1101:10000:48824/1 +AATPVGATTIFLSEHSSTTLSIR 23 36 96 -1.0 C57KNANXX:5:1101:10000:79229/1 +RITTLKPFSCNNFTVK 16 38 96 -1.0 C57KNANXX:5:1101:10000:23019/1 +FHLNYQKFFFQKHLPPLIIK 20 34 96 -1.0 C57KNANXX:5:1101:10000:91317/1 +HASIHQFGIVGCNIIWAKPK 20 32 96 -1.0 C57KNANXX:5:1101:10000:60887/1 +SNFVQSKLNYLYLHR 15 37 93 -1.0 C57KNANXX:5:1101:10000:99203/1 +NLSYQSNTELKSNLNFQLVCEIRILIK 27 34 96 -1.0 C57KNANXX:5:1101:10000:46059/1 +LLQEKGFNVVILFSGKEHK 19 38 96 -1.0 C57KNANXX:5:1101:10000:23019/1 +SIPQISTGDMLRAAVK 16 36 93 -1.0 C57KNANXX:5:1101:10000:12567/1 +SVQFEPGVTRFR 12 37 96 -1.0 C57KNANXX:5:1101:10000:58282/1 +DFTPKRQSYWATFR 14 38 96 -1.0 C57KNANXX:5:1101:10001:17914/1 +HWRVICVFCCANVWLLGTVTKRR 23 35 96 -1.0 C57KNANXX:5:1101:10001:25583/1 +QHPFLQTNHCPLVFVSPVLQIILGSMR 27 38 96 -1.0 C57KNANXX:5:1101:10000:76185/1 +LSAKQVKQILMDSGLSVK 18 38 96 -1.0 C57KNANXX:5:1101:10000:76185/1 +SSDLILPKIICKTGETNTNGQWFVCK 26 38 93 -1.0 C57KNANXX:5:1101:10000:76185/1 +DIANIISYGVSTSNSCIYRISSNNNR 26 34 93 -1.0 C57KNANXX:5:1101:10000:3140/1 +LIMFLTNNSSIQEVLFFPQMRPEVVQKK 28 34 96 -1.0 C57KNANXX:5:1101:10000:92875/1 +QIRHLACEPDVVSSQR 16 36 96 -1.0 C57KNANXX:5:1101:10000:29932/1 +YHNFEGYRWR 10 34 93 -1.0 C57KNANXX:5:1101:10000:68440/1 +RTAGWIRHQAHRPSALFAVGKNQR 24 36 93 -1.0 C57KNANXX:5:1101:10000:29932/1 +ACFAGKDIVVPGVANKLTAIITHFFSK 27 33 96 -1.0 C57KNANXX:5:1101:10001:17691/1 +ELFFGKIGRAVQQECRDR 18 38 96 -1.0 C57KNANXX:5:1101:10000:24094/1 +SPEEIRELSKFKKPIFYDGKFFDIK 25 38 96 -1.0 C57KNANXX:5:1101:10000:84210/1 +SSDLDQAAQQFIVPRKGVLELAR 23 35 96 -1.0 C57KNANXX:5:1101:10000:86801/1 +VNGVVITVSPPTITLKVVISKPR 23 34 93 -1.0 C57KNANXX:5:1101:10000:68440/1 +ISPKLSKVLFSKTSATFNNKADNSSSICFR 30 34 96 -1.0 C57KNANXX:5:1101:10000:91317/1 +LRERIVFWQDRK 12 38 96 -1.0 C57KNANXX:5:1101:10000:24094/1 +KCVIIAVSLLATPGTTMSFPAK 22 33 96 -1.0 C57KNANXX:5:1101:10001:17691/1 +NSLKFSILQIYIIYIYITIK 20 37 96 -1.0 C57KNANXX:5:1101:10000:29175/1 +RASSNTPLRGTINCCAAWSRSEERFSR 27 35 96 -1.0 C57KNANXX:5:1101:10000:86801/1 +FAPYVTRSMLLRAYRRRDRRHVER 24 31 96 -1.0 C57KNANXX:5:1101:10000:73874/1 +AKVNNAVVDFQFFDR 15 36 96 -1.0 C57KNANXX:5:1101:10001:15683/1 +LTGQVPYLPWAKIK 14 36 96 -1.0 C57KNANXX:5:1101:10000:29932/1 +LGLGPDDVTSDNPKLVYGR 19 32 96 -1.0 C57KNANXX:5:1101:10000:60887/1 +SQHPAAMTHSGVPKDRRAVIGITEGLIR 28 35 96 -1.0 C57KNANXX:5:1101:10000:9540/1 +NIIKKMDLVLTGEGSFDVQSTYGK 24 34 93 -1.0 C57KNANXX:5:1101:10000:99902/1 +LRILLLSVNLTGRLLETNPKHSK 23 36 93 -1.0 C57KNANXX:5:1101:10001:17914/1 +SVPAFTAARSISPVEIWGIDRK 22 31 90 -1.0 C57KNANXX:5:1101:10000:12567/1 +KIIQVVHSVEKK 12 34 51 -1.0 C57KNANXX:5:1101:10000:48824/1 +QIDEELSALLLKVADVFEKR 20 36 93 -1.0 C57KNANXX:5:1101:10000:91317/1 +IIELEHQILKVEGSIMELEKTIVDK 25 36 96 -1.0 C57KNANXX:5:1101:10000:73724/1 +CSKRSPCSHWYYRR 14 35 96 -1.0 C57KNANXX:5:1101:10000:9540/1 +KFDEITLDVYYKKGKESSK 19 33 96 -1.0 C57KNANXX:5:1101:10001:101410/1 +QETIALENEIELLQAQIEQIGRAVQQECR 29 35 96 -1.0 C57KNANXX:5:1101:10000:58282/1