Repository 'taxonomy_filter_refseq'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/taxonomy_filter_refseq

Changeset 1:d9662c76b6d5 (2020-04-29)
Previous changeset 0:28e95c6a944d (2019-01-16)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc commit e68b434ae52d29f038bac26874ceb51d4911b4fb"
modified:
taxonomy_filter_refseq.xml
test-data/output1.fasta
test-data/sample_refseq.fasta
added:
test-data/output2.fasta
test-data/output3.fasta
b
diff -r 28e95c6a944d -r d9662c76b6d5 taxonomy_filter_refseq.xml
--- a/taxonomy_filter_refseq.xml Wed Jan 16 08:30:47 2019 -0500
+++ b/taxonomy_filter_refseq.xml Wed Apr 29 16:50:22 2020 -0400
[
@@ -1,7 +1,7 @@
 <tool id="taxonomy_filter_refseq" name="Filter RefSeq by taxonomy" version="@TOOL_VERSION@+galaxy0">
     <description>Only retain sequences that are descendants of a given taxonomic node.</description>
     <macros>
-        <token name="@TOOL_VERSION@">0.1.4</token>
+        <token name="@TOOL_VERSION@">0.3.0</token>
     </macros>
     <edam_topics>
         <edam_topic>topic_0091</edam_topic>
@@ -26,7 +26,7 @@
             mkdir taxonomy && ln -s '$taxonomy.nodes' taxonomy/nodes.dmp && ln -s '$taxonomy.names' taxonomy/names.dmp &&
             #set $taxonomy_dir = 'taxonomy'
         #end if
-     taxonomy_filter_refseq '$refseq_input' '$taxonomy_dir' '$ancestor_name' >'$refseq_output_fasta'
+     taxonomy_filter_refseq $no_curated $no_predicted '$refseq_input' '$taxonomy_dir' '$ancestor_name' >'$refseq_output_fasta'
     ]]></command>
     <inputs>
         <conditional name="taxonomy">
@@ -67,6 +67,14 @@
             </when>
         </conditional>
         <param name="ancestor_name" type="text" label="Ancestor taxon (scientific) name" />
+        <param argument="--no_curated" type="boolean" truevalue="--no_curated" falsevalue="" checked="false"
+            label="Exclude curated sequences from dataset"
+            help="Curated sequences are identified by their accession number prefixes and excluded if this option is checked"
+        />
+        <param argument="--no_predicted" type="boolean" truevalue="--no_predicted" falsevalue="" checked="false"
+            label="Exclude predicted sequences from the dataset"
+            help="Computationally predicted (but not curated) sequences are identifed by their accession number prefix and excluded if this option is checked"
+        />
     </inputs>
     <outputs>
         <data name="refseq_output_fasta" format="fasta" label="${tool.name} on ${on_string}" />
@@ -90,8 +98,32 @@
             <param name="names" ftype="txt" value="sample_tree_names.dmp" />
             <param name="history_refseq" ftype="fasta" value="sample_refseq.fasta" />
             <param name="ancestor_name" value="unclassified bacterial viruses" />
+            <param name="no_predicted" value="false" />
+            <param name="no_curated" value="false" />
             <output name="refseq_output_fasta" value="output1.fasta" />
         </test>
+        <test>
+            <param name="taxonomy_source" value="history" />
+            <param name="refseq_source" value="history" />
+            <param name="nodes" ftype="txt" value="sample_tree_nodes.dmp" />
+            <param name="names" ftype="txt" value="sample_tree_names.dmp" />
+            <param name="history_refseq" ftype="fasta" value="sample_refseq.fasta" />
+            <param name="ancestor_name" value="unclassified bacterial viruses" />
+            <param name="no_predicted" value="true" />
+            <param name="no_curated" value="false" />
+            <output name="refseq_output_fasta" value="output2.fasta" />
+        </test>        
+        <test>
+            <param name="taxonomy_source" value="history" />
+            <param name="refseq_source" value="history" />
+            <param name="nodes" ftype="txt" value="sample_tree_nodes.dmp" />
+            <param name="names" ftype="txt" value="sample_tree_names.dmp" />
+            <param name="history_refseq" ftype="fasta" value="sample_refseq.fasta" />
+            <param name="ancestor_name" value="unclassified bacterial viruses" />
+            <param name="no_predicted" value="false" />
+            <param name="no_curated" value="true" />
+            <output name="refseq_output_fasta" value="output3.fasta" />
+        </test>        
     </tests>
     <help><![CDATA[
         This tool allows NCBI RefSeq sequences to be filtered so that only those whose species name are 
b
diff -r 28e95c6a944d -r d9662c76b6d5 test-data/output1.fasta
--- a/test-data/output1.fasta Wed Jan 16 08:30:47 2019 -0500
+++ b/test-data/output1.fasta Wed Apr 29 16:50:22 2020 -0400
[
@@ -17,7 +17,7 @@
 >YP_044794.1 hypothetical protein [Mycoplasma phage phiMFV1]
 MDIFKKMEIAEKDKIEKLLLNIDKFIWDCNSKGWTKSKNKKVISLYKKTLEEIKKNEKYDWLYVPIKSQIYKFADHDLKS
 DIFLWFSHTKILCGYTYLKDKWNF
->YP_044795.1 MarRP [Mycoplasma phage phiMFV1]
+>NP_044795.1 MarRP [Mycoplasma phage phiMFV1]
 MKFLKEPPVFTEEQFKELVHYTDLEDNPTFEAYKSNAFFKIYSGAKGVSKSFSRMIETVYRLVNEKNFNSFWCRNQYNHI
 KGTLKPLLEKVLSFLAAKGLDYRPYISIYNTEAYWDYDDGGKGRAIFFGNWKNVQSFQGVTLSQKDFAWGELVIDEPIEK
 ELTEELEEIYKIQASNLEILIANTVLRSKNIEGFKTNVTFLYNIFTIDHFLIKDFHNPILPLYSGTDFLKKINLNLAKEL
b
diff -r 28e95c6a944d -r d9662c76b6d5 test-data/output2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output2.fasta Wed Apr 29 16:50:22 2020 -0400
[
@@ -0,0 +1,8 @@
+>NP_044795.1 MarRP [Mycoplasma phage phiMFV1]
+MKFLKEPPVFTEEQFKELVHYTDLEDNPTFEAYKSNAFFKIYSGAKGVSKSFSRMIETVYRLVNEKNFNSFWCRNQYNHI
+KGTLKPLLEKVLSFLAAKGLDYRPYISIYNTEAYWDYDDGGKGRAIFFGNWKNVQSFQGVTLSQKDFAWGELVIDEPIEK
+ELTEELEEIYKIQASNLEILIANTVLRSKNIEGFKTNVTFLYNIFTIDHFLIKDFHNPILPLYSGTDFLKKINLNLAKEL
+IENTYLQKEDLAFKEGMGIIVTMFSKYFVPKNELSDLQLKQFEALKTKDYKLWMITVAGFAFEEPPERYDYFMKSVLLNK
+KGNWDRKKCKIINIKQLETKLENEEICGLFYGFDYGLTDNCALVVVLLLSGARIIVLDIFEDIKKLLPKNKRRENKAIYN
+KVALIVKKWNEYFDKYNFKFYISWLLGNFDSFLYGDNVHSLQWMTEVFINNGINTKLLPASRFKGKKGFGIIDRQVWQKN
+IFENGLVELLPKAKSLLTLLCQQVIDKEDPKNPNQRNERINKKIYDVINAFEMANTLQNCEYRNYLFNKELKEKNEQNNY
b
diff -r 28e95c6a944d -r d9662c76b6d5 test-data/output3.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output3.fasta Wed Apr 29 16:50:22 2020 -0400
[
@@ -0,0 +1,19 @@
+>YP_009173773.1 putative ATP-dependent DNA helicase [Mycoplasma phage phiMFV1]
+MNKMPWESEEVEYKKSTSELKEAIISLTAMLNKKGTGTVYFGVKDDRTVVGLEIGNKTMRDISQAISNNVKPEIMPRISI
+EFIDNQNVIKVEAEGEQKPYSAYDKYYIRIGEEDKKLNRDKLVDFVQKSVSKINICEIENKDQDLTFNSFLLYANIKNLT
+VNKDTFEKNFDLKTKNNKYNLMSYLLADNNDISIKVNTFKDNDKSVLLKRNEYGNKCLFYAIESVLNYVGSINDTYVDLS
+TATRSEQKLFNFECFREAWLNACIHNRWDEKYPPQVNIFKDYIQIESNGGIPRNMTKEKFFKGVSRPVNEKLQKIFMQLD
+LVEQTGHGVPLIVKKYGEEAFEIDENTIWVKIPFNRKGFKRINEKENKNNLNDNQIKVIKTIEKESNITINQLSSTLSLS
+EGYIKKIINQLKNKNLIERNGLRKNGNWTIISYNL
+>YP_044791.1 RepB [Mycoplasma phage phiMFV1]
+MLLTTKQLQEFYKIEMENIAKNKSLLTNFNIIDNYANGLKPKQLYFLAARPGVGKTSFALNIIFNALENLKEDECIVLFS
+LEMDSIEIYSKLLALANSCSLKEVETNSKKIDFEDLNKKNLFIFDRYRNNFNNEKLSEVTPDIIYKSLNSIGLKIKAVFI
+DYFQLLDNQKYSSEREKLSQCSKQLKELSKIFNCNFFVLSQLSREYEKKTNNEPSFSDLKGTGSIEQDADLIMFLYNSSN
+NFNVGTLIPNKKTILNLSIAKNRNGELVKTNIDFIPHLAKFIES
+>YP_044792.1 hypothetical protein [Mycoplasma phage phiMFV1]
+MLNNKCDFCHKRFQPKKERYFWGNLVVCSEECLEQVSCYASEIDLNCEGENNE
+>YP_044793.1 hypothetical protein [Mycoplasma phage phiMFV1]
+MNKEKLIKFLIDLTEKENTKLKSDVKILKSKLKKIKKEYEEQEKAFDDLLKKL
+>YP_044794.1 hypothetical protein [Mycoplasma phage phiMFV1]
+MDIFKKMEIAEKDKIEKLLLNIDKFIWDCNSKGWTKSKNKKVISLYKKTLEEIKKNEKYDWLYVPIKSQIYKFADHDLKS
+DIFLWFSHTKILCGYTYLKDKWNF
b
diff -r 28e95c6a944d -r d9662c76b6d5 test-data/sample_refseq.fasta
--- a/test-data/sample_refseq.fasta Wed Jan 16 08:30:47 2019 -0500
+++ b/test-data/sample_refseq.fasta Wed Apr 29 16:50:22 2020 -0400
[
@@ -44,7 +44,7 @@
 >YP_044794.1 hypothetical protein [Mycoplasma phage phiMFV1]
 MDIFKKMEIAEKDKIEKLLLNIDKFIWDCNSKGWTKSKNKKVISLYKKTLEEIKKNEKYDWLYVPIKSQIYKFADHDLKS
 DIFLWFSHTKILCGYTYLKDKWNF
->YP_044795.1 MarRP [Mycoplasma phage phiMFV1]
+>NP_044795.1 MarRP [Mycoplasma phage phiMFV1]
 MKFLKEPPVFTEEQFKELVHYTDLEDNPTFEAYKSNAFFKIYSGAKGVSKSFSRMIETVYRLVNEKNFNSFWCRNQYNHI
 KGTLKPLLEKVLSFLAAKGLDYRPYISIYNTEAYWDYDDGGKGRAIFFGNWKNVQSFQGVTLSQKDFAWGELVIDEPIEK
 ELTEELEEIYKIQASNLEILIANTVLRSKNIEGFKTNVTFLYNIFTIDHFLIKDFHNPILPLYSGTDFLKKINLNLAKEL