changeset 6:bad73d1d7345 draft

planemo upload commit d7ff2885794dff868dcd8bc1443aabbff10cb51c
author caleb-easterly
date Thu, 13 Jul 2017 16:05:51 -0400
parents d4bd627618e5
children 56ed8dee5eaf
files FastaHeader-1.0-SNAPSHOT.jar fastaFilteringTest_IN.txt fastaFilteringTest_OUT1.txt fastaFilteringTest_OUT2.txt test-data/geneticFiltering.in test-data/geneticFilteringBad.out test-data/geneticFilteringGood.out test-data/length5Filtering.in test-data/length5FilteringBad.out test-data/length5FilteringGood.out validate_fasta_database-1.0.jar validate_fasta_database.xml validate_fasta_headers.xml
diffstat 13 files changed, 147 insertions(+), 249 deletions(-) [+]
line wrap: on
line diff
Binary file FastaHeader-1.0-SNAPSHOT.jar has changed
--- a/fastaFilteringTest_IN.txt	Wed Jun 28 16:05:07 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,84 +0,0 @@
->MCHU - Calmodulin - Human, rabbit, bovine, rat, and chicken
-ADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTID
-FPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIREA
-DIDGDGQVNYEEFVQMMTAK*
->gi||||5524211gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
->gi||||5523211gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
->gi||||5524201gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
->gi||||5524212gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
->gi||||5523511gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
->gi||||5524299gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
->gi||||5524871gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
->gi||||5524741gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
->sp|Q01912|1A1C_VIGRR 1-aminocyclopropane-1-carboxylate synthase (Fragment) OS=Vigna radiata var. radiata GN=ACS5 PE=2 SV=1
-QMGLAENQLTSDLVEDWILNNPEASICTPEGINDFRAIANFQDYHGLAEFRNAVAKFMAR
-TRGNRITFDPDRIVMSGGATGAHEVTAFCLADPGEAFLVPIPYYPGFDRDLRWRTGVKLV
-PVMCDSSNNFVLTKEALEDAYEKAREDNIRVKGLLITNPSNPLGTIMDRKTLRTVVSFIN
-EKRIHLVCDEIYAATVFSQPGFISIAEILEDETDIECDRNLVHIVYSLSKDMGFPGFRVG
-IIYSYNDAVVNCARKMSSFGLVSTQTQYLLASMLNDDEFVERFLAESAKRLAQRFRVFTG
-GLAKVGIKCLQSNAGLFVWMDLRQLLKKPTFDSETELWKVIIHEVKINVSPGYSFHCTEP
-GWFRVCFA
->sp|B9K206|1A1D_AGRVS 1-aminocyclopropane-1-carboxylate deaminase OS=Agrobacterium vitis (strain S4 / ATCC BAA-846) GN=acdS PE=3 SV=1
-MLDAFDRYPLTFGPTPIEKLERLTDHLGGKVQLYAKREDCNSGLAFGGNKLRKLEYIIPD
-AIASGADTLVSIGGVQSNHTRMVAAVAAKIGFKCRLVQEAWVPHEDAVYDRVGNIMLSRI
-MGADVRLVDDGFDIGIRRSWEEAIEEVKAAGGKPYAIPAGASVHKYGGLGYVGFAEEVRA
-QEAALGFAFDYIVVCTVTGSSHAGMAVGFAKDGRADHVIGIDASFTPDQTRAQVLEIAQR
-TADLVKLGREMRPEDIVLVEDYAYPVYGVPSEETKDAIRLVGRLEGMITDPVYEGKSMQG
-MIDLVKKGYFPEGSKVLYAHLGGAPALNGYGYAFRNG
->sp|A3ME84|1A1D_BURM7 1-aminocyclopropane-1-carboxylate deaminase OS=Burkholderia mallei (strain NCTC 10247) GN=acdS PE=3 SV=1
-MNLQKFSRYPLTFGPTPIQPLKRLSAHLGGKVELYAKRDDCNSGLAFGGNKTRKLEYLIP
-DALAQGCDTLVSIGGIQSNQTRQVAAVAAHLGMKCVLVQENWVNYHDAVYDRVGNIQMSR
-MMGADVRLVPDGFDIGFRKSWEDALADVRARGGKPYAIPAGCSDHPLGGLGFVGFAEEVR
-AQEAELGFQFDYVVVCSVTGSTQAGMVVGFAADGRADRVIGVDASAKPAQTREQILRIAK
-HTADRVELGRDITSADVVLDERFGGPEYGLPNEGTLEAIRLCAKLEGVLTDPVYEGKSMH
-GMIEKVRLGEFPAGSKVLYAHLGGVPALNAYSFLFRDG
->sp|Q62CE3|1A1D_BURMA 1-aminocyclopropane-1-carboxylate deaminase OS=Burkholderia mallei (strain ATCC 23344) GN=acdS PE=3 SV=1
-MNLQKFSRYPLTFGPTPIQPLKRLSAHLGGKVELYAKRDDCNSGLAFGGNKTRKLEYLIP
-DALAQGCDTLVSIGGIQSNQTRQVAAVAAHLGMKCVLVQENWVNYHDAVYDRVGNIQMSR
-MMGADVRLVPDGFDIGFRKSWEDALADVRARGGKPYAIPAGCSDHPLGGLGFVGFAEEVR
-AQEAELGFQFDYVVVCSVTGSTQAGMVVGFAADGRADRVIGVDASAKPAQTREQILRIAK
-HTADRVELGRDITSADVVLDERFGGPEYGLPNEGTLEAIRLCAKLEGVLTDPVYEGKSMH
-GMIEKVRLGEFPAGSKVLYAHLGGVPALNAYSFLFRDG
->BAB62851.1 bcr/abl e8a2 fusion protein, partial [Homo sapiens] from GenBank
-LLYKPVDRVTRSTLVLHDLLKHTPASHPDHPLLQDALRISQNFLSSINEEITPRRQSMTVKKGEGEDRMK
-ASSTRKRLLLMEEALQRPVASDFEPQGLSEAARWNSKENLLAGPSENDPNLFVALYDFVASGDNTLSITK
\ No newline at end of file
--- a/fastaFilteringTest_OUT1.txt	Wed Jun 28 16:05:07 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
->MCHU - Calmodulin - Human, rabbit, bovine, rat, and chicken
-ADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTID
-FPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIREA
-DIDGDGQVNYEEFVQMMTAK*
->sp|Q01912|1A1C_VIGRR 1-aminocyclopropane-1-carboxylate synthase (Fragment) OS=Vigna radiata var. radiata GN=ACS5 PE=2 SV=1
-QMGLAENQLTSDLVEDWILNNPEASICTPEGINDFRAIANFQDYHGLAEFRNAVAKFMAR
-TRGNRITFDPDRIVMSGGATGAHEVTAFCLADPGEAFLVPIPYYPGFDRDLRWRTGVKLV
-PVMCDSSNNFVLTKEALEDAYEKAREDNIRVKGLLITNPSNPLGTIMDRKTLRTVVSFIN
-EKRIHLVCDEIYAATVFSQPGFISIAEILEDETDIECDRNLVHIVYSLSKDMGFPGFRVG
-IIYSYNDAVVNCARKMSSFGLVSTQTQYLLASMLNDDEFVERFLAESAKRLAQRFRVFTG
-GLAKVGIKCLQSNAGLFVWMDLRQLLKKPTFDSETELWKVIIHEVKINVSPGYSFHCTEP
-GWFRVCFA
->sp|B9K206|1A1D_AGRVS 1-aminocyclopropane-1-carboxylate deaminase OS=Agrobacterium vitis (strain S4 / ATCC BAA-846) GN=acdS PE=3 SV=1
-MLDAFDRYPLTFGPTPIEKLERLTDHLGGKVQLYAKREDCNSGLAFGGNKLRKLEYIIPD
-AIASGADTLVSIGGVQSNHTRMVAAVAAKIGFKCRLVQEAWVPHEDAVYDRVGNIMLSRI
-MGADVRLVDDGFDIGIRRSWEEAIEEVKAAGGKPYAIPAGASVHKYGGLGYVGFAEEVRA
-QEAALGFAFDYIVVCTVTGSSHAGMAVGFAKDGRADHVIGIDASFTPDQTRAQVLEIAQR
-TADLVKLGREMRPEDIVLVEDYAYPVYGVPSEETKDAIRLVGRLEGMITDPVYEGKSMQG
-MIDLVKKGYFPEGSKVLYAHLGGAPALNGYGYAFRNG
->sp|A3ME84|1A1D_BURM7 1-aminocyclopropane-1-carboxylate deaminase OS=Burkholderia mallei (strain NCTC 10247) GN=acdS PE=3 SV=1
-MNLQKFSRYPLTFGPTPIQPLKRLSAHLGGKVELYAKRDDCNSGLAFGGNKTRKLEYLIP
-DALAQGCDTLVSIGGIQSNQTRQVAAVAAHLGMKCVLVQENWVNYHDAVYDRVGNIQMSR
-MMGADVRLVPDGFDIGFRKSWEDALADVRARGGKPYAIPAGCSDHPLGGLGFVGFAEEVR
-AQEAELGFQFDYVVVCSVTGSTQAGMVVGFAADGRADRVIGVDASAKPAQTREQILRIAK
-HTADRVELGRDITSADVVLDERFGGPEYGLPNEGTLEAIRLCAKLEGVLTDPVYEGKSMH
-GMIEKVRLGEFPAGSKVLYAHLGGVPALNAYSFLFRDG
->sp|Q62CE3|1A1D_BURMA 1-aminocyclopropane-1-carboxylate deaminase OS=Burkholderia mallei (strain ATCC 23344) GN=acdS PE=3 SV=1
-MNLQKFSRYPLTFGPTPIQPLKRLSAHLGGKVELYAKRDDCNSGLAFGGNKTRKLEYLIP
-DALAQGCDTLVSIGGIQSNQTRQVAAVAAHLGMKCVLVQENWVNYHDAVYDRVGNIQMSR
-MMGADVRLVPDGFDIGFRKSWEDALADVRARGGKPYAIPAGCSDHPLGGLGFVGFAEEVR
-AQEAELGFQFDYVVVCSVTGSTQAGMVVGFAADGRADRVIGVDASAKPAQTREQILRIAK
-HTADRVELGRDITSADVVLDERFGGPEYGLPNEGTLEAIRLCAKLEGVLTDPVYEGKSMH
-GMIEKVRLGEFPAGSKVLYAHLGGVPALNAYSFLFRDG
->BAB62851.1 bcr/abl e8a2 fusion protein, partial [Homo sapiens] from GenBank
-LLYKPVDRVTRSTLVLHDLLKHTPASHPDHPLLQDALRISQNFLSSINEEITPRRQSMTVKKGEGEDRMK
-ASSTRKRLLLMEEALQRPVASDFEPQGLSEAARWNSKENLLAGPSENDPNLFVALYDFVASGDNTLSITK
--- a/fastaFilteringTest_OUT2.txt	Wed Jun 28 16:05:07 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
->gi||||5524211gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
->gi||||5523211gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
->gi||||5524201gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
->gi||||5524212gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
->gi||||5523511gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
->gi||||5524299gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
->gi||||5524871gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
->gi||||5524741gbAAD44166.1 cytochrome b [Elephas maximus maximus]
-LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV
-EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG
-LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL
-GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX
-IENY
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/geneticFiltering.in	Thu Jul 13 16:05:51 2017 -0400
@@ -0,0 +1,8 @@
+>generic|001
+ACTGACTG
+>generic|002
+ACUGACUG
+>generic|003
+MKMMMMM
+>generic|004
+MKMMMMMX
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/geneticFilteringBad.out	Thu Jul 13 16:05:51 2017 -0400
@@ -0,0 +1,6 @@
+>generic|001
+ACTGACTG
+>generic|002
+ACUGACUG
+>generic|004
+MKMMMMMX
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/geneticFilteringGood.out	Thu Jul 13 16:05:51 2017 -0400
@@ -0,0 +1,2 @@
+>generic|003
+MKMMMMM
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/length5Filtering.in	Thu Jul 13 16:05:51 2017 -0400
@@ -0,0 +1,6 @@
+>generic|001
+MMMMMMMMMM
+>generic|002
+MMMMM
+>generic|003
+MMMM
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/length5FilteringBad.out	Thu Jul 13 16:05:51 2017 -0400
@@ -0,0 +1,2 @@
+>generic|003
+MMMM
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/length5FilteringGood.out	Thu Jul 13 16:05:51 2017 -0400
@@ -0,0 +1,4 @@
+>generic|001
+MMMMMMMMMM
+>generic|002
+MMMMM
\ No newline at end of file
Binary file validate_fasta_database-1.0.jar has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/validate_fasta_database.xml	Thu Jul 13 16:05:51 2017 -0400
@@ -0,0 +1,119 @@
+<tool id="validate_fasta_database" name="Validate FASTA Database" version="0.1.3">
+    <requirements>
+    </requirements>
+    <stdio>
+        <exit_code range="1" level="fatal" description="Invalid FASTA headers detected, was asked to fail"/>
+    </stdio>
+    <command detect_errors="exit_code"><![CDATA[
+        java -jar $__tool_directory__/validate_fasta_database-1.0.jar
+        '$inFasta'
+        '$goodFastaOut'
+        '$badFastaOut'
+        '$crashIfInvalid'
+        '$checkIsProtein'
+        '$customLetters'
+        '$checkHasAccession'
+        '$minimumLength'
+    ]]></command>
+    <inputs>
+        <param type="data" name="inFasta" format="fasta" label="Select input FASTA dataset"/>
+        <param type="boolean" name="crashIfInvalid"
+               label="Fail job if invalid FASTA headers detected?"
+               value="false"/>
+        <param type="boolean" name="checkIsProtein"
+               label="Ensure that sequence is a valid amino acid sequence?"
+               help="Checks that sequence only contains the 20 essential amino
+                acids (and optional non-standard AAs), and checks that is not DNA or RNA"
+               value="true"/>
+        <param type="text" name="customLetters" value=""
+               label="Optional: add one letter codes for any non-standard amino acids that you are using. "
+               help="Anything that is not an upper case letter [A-Z] will be ignored."/>
+        <param type="boolean" name="checkHasAccession"
+               label="Only pass sequences with accession numbers?"
+               value="false"/>
+        <param type="integer" name="minimumLength"
+               label="Minimum length for sequences to pass"
+               value="0"/>
+
+
+        <!--<conditional name="checkLength">-->
+            <!--<param type="boolean" name="checkLength" label="Filter out sequences below a minimum sequenceLength?">-->
+                <!--<option value="true"></option>-->
+                <!--<option value="false"></option>-->
+            <!--</param>-->
+            <!--<when value="true">-->
+                <!--<param name="minimumLength" type="integer" value="0" label="Minimum sequenceLength that AA sequence must have"/>-->
+            <!--</when>-->
+            <!--<when value="false">-->
+            <!--</when>-->
+        <!--</conditional>-->
+
+    </inputs>
+    <outputs>
+        <data name="goodFastaOut" format="fasta" label="Validate FASTA: Passed Sequences"/>
+        <data name="badFastaOut" format="fasta" label="Validate FASTA: Failed Sequences"/>
+    </outputs>
+    <tests>
+        <!-- test general filtering -->
+        <test>
+            <param name="inFasta" value="fastaFilteringTest_IN.fasta"/>
+            <output name="goodFastaOut" file="fastaFilteringTest_OUT1.fasta" />
+            <output name="badFastaOut" file="fastaFilteringTest_OUT2.fasta" />
+        </test>
+
+        <!--test filtering out genetic sequences and bad protein sequences-->
+        <test>
+            <param name="inFasta" value="geneticFiltering.in"/>
+            <param name="checkIsProtein" value="true"/>
+            <output name="goodFastaOut" file="geneticFilteringGood.out"/>
+            <output name="badFastaOut" file="geneticFilteringBad.out"/>
+        </test>
+
+        <test>
+            <param name="inFasta" value="length5Filtering.in"/>
+            <param name="minimumLength" value="5"/>
+            <output name="goodFastaOut" file="length5FilteringGood.out"/>
+            <output name="badFastaOut" file="length5FilteringBad.out"/>
+        </test>
+    </tests>
+    <help>
+
+<![CDATA[
+**Notes**
+
+Takes a FASTA database and validates the headers using the Compomics (developers of SearchGUI and PeptideShaker) schema. 
+Custom FASTA databases may be in an invalid format, which causes SearchGUI to crash.
+        
+**Output**
+
+The main output of this tool, "Validate FASTA: Passed Sequences", is a FASTA database that can be run through SearchGUI without error.
+The failed sequences may be examined for typos and other errors. 
+
+In addition, the tool will print the databases assigned by the Compomics utility (i.e., UniProt), for a quick check of the validity of the custom FASTA database. 
+
+Sequences that may cause the tool to report an exception are those that are not valid examples of the following formats: 
+    * UniProt,
+    * SwissProt (starts with ">sw|" or ">SW|")
+    * NCBI (starts with ">gi|" or ">GI|")
+    * Halobacterium from Max Planck (starts with "OE")
+    * H Influenza, from Novartis (starts with ">hflu_")
+    * C Trachomatis (starts with ">C.tr\_" or "C\_trachomatis\_")
+    * M Tuberculosis (starts with ">M. tub")
+    * Saccharomyces Genome Database (contains "SGDID")
+    * Genome translation (ex. ">dm345\_3L-sense [2343534-234353938]")
+    * Genome Annotation Framework for Flexible Analysis (GAFFA) (starts with ">GAFFA")
+    * UPS (contains "\_HUMAN\_UPS")
+           
+Many sequences are reported as Generic, which may or may not allow for extraction of the accession number. 
+]]>
+    </help>
+    <citations>
+        <citation type="bibtex">
+            @misc{fastaValidationTool,
+            author = {The GalaxyP Team},
+            date = {22 June 2017},
+            title = {FASTA Database Validation Tool}
+            }
+        </citation>
+    </citations>
+</tool>
--- a/validate_fasta_headers.xml	Wed Jun 28 16:05:07 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-<tool id="validate_fasta_database" name="Validate FASTA Headers" version="0.1.2">
-    <requirements>
-    </requirements>
-    <stdio>
-        <exit_code range="1" level="fatal" description="Invalid FASTA headers detected, was asked to fail"/>
-    </stdio>
-    <command detect_errors="exit_code"><![CDATA[
-        java -jar $__tool_directory__/FastaHeader-1.0-SNAPSHOT.jar '$FASTA' '$goodFasta' '$badFasta' '$crashIfInvalid'
-        '$checkIsProtein' $checkLength.checkLength
-        #if $checkLength.checkLength
-             $checkLength.minimumLength
-        #end if
-    ]]></command>
-    <inputs>
-        <param type="data" name="FASTA" format="fasta" label="Select input FASTA dataset"/>
-        <param type="boolean" name="crashIfInvalid" label="Fail job if invalid FASTA headers detected?"/>
-        <param type="boolean" name="checkIsProtein" label="Ensure that sequence is not DNA or RNA?"/>
-        <conditional name="checkLength">
-            <param type="boolean" name="checkLength" label="Filter out sequences below a minimum sequenceLength?">
-                <option value="true"></option>
-                <option value="false"></option>
-            </param>
-            <when value="true">
-                <param name="minimumLength" type="integer" value="0" label="Minimum sequenceLength that AA sequence must have"/>
-            </when>
-            <when value="false">
-            </when>
-        </conditional>
-
-    </inputs>
-    <outputs>
-        <data name="goodFasta" format="fasta" label="Validate FASTA: Passed Sequences"/>
-        <data name="badFasta" format="fasta" label="Validate FASTA: Failed Sequences"/>
-    </outputs>
-    <tests>
-        <test>
-            <param name="FASTA" value="fastaFilteringTest_IN.fasta"/>
-            <output name="goodFasta" file="fastaFilteringTest_OUT1.fasta" />
-            <output name="badFasta" file="fastaFilteringTest_OUT2.fasta" />
-        </test>
-    </tests>
-    <help>
-<![CDATA[
-**Notes**
-
-Takes a FASTA database and validates the headers using the Compomics (developers of SearchGUI and PeptideShaker) schema. 
-Custom FASTA databases may be in an invalid format, which causes SearchGUI to crash.
-        
-**Output**
-
-The main output of this tool, "Validate FASTA: Passed Sequences", is a FASTA database that can be run through SearchGUI without error.
-The failed sequences may be examined for typos and other errors. 
-
-In addition, the tool will print the databases assigned by the Compomics utility (i.e., UniProt), for a quick check of the validity of the custom FASTA database. 
-
-Sequences that may cause the tool to report an exception are those that are not valid examples of the following formats: 
-    * UniProt,
-    * SwissProt (starts with ">sw|" or ">SW|")
-    * NCBI (starts with ">gi|" or ">GI|")
-    * Halobacterium from Max Planck (starts with "OE")
-    * H Influenza, from Novartis (starts with ">hflu_")
-    * C Trachomatis (starts with ">C.tr\_" or "C\_trachomatis\_")
-    * M Tuberculosis (starts with ">M. tub")
-    * Saccharomyces Genome Database (contains "SGDID")
-    * Genome translation (ex. ">dm345\_3L-sense [2343534-234353938]")
-    * Genome Annotation Framework for Flexible Analysis (GAFFA) (starts with ">GAFFA")
-    * UPS (contains "\_HUMAN\_UPS")
-           
-Many sequences are reported as Generic, which may or may not allow for extraction of the accession number. 
-]]>
-    </help>
-    <citations>
-        <citation type="bibtex">
-            @misc{fastaValidation,
-            author = {The GalaxyP Team},
-            date = {22 June 2017},
-            title = {FASTA Database Validation Tool}
-            }
-        </citation>
-    </citations>
-</tool>