Mercurial > repos > iuc > taxonomy_filter_refseq
changeset 1:d9662c76b6d5 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc commit e68b434ae52d29f038bac26874ceb51d4911b4fb"
author | iuc |
---|---|
date | Wed, 29 Apr 2020 16:50:22 -0400 |
parents | 28e95c6a944d |
children | |
files | taxonomy_filter_refseq.xml test-data/output1.fasta test-data/output2.fasta test-data/output3.fasta test-data/sample_refseq.fasta |
diffstat | 5 files changed, 63 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/taxonomy_filter_refseq.xml Wed Jan 16 08:30:47 2019 -0500 +++ b/taxonomy_filter_refseq.xml Wed Apr 29 16:50:22 2020 -0400 @@ -1,7 +1,7 @@ <tool id="taxonomy_filter_refseq" name="Filter RefSeq by taxonomy" version="@TOOL_VERSION@+galaxy0"> <description>Only retain sequences that are descendants of a given taxonomic node.</description> <macros> - <token name="@TOOL_VERSION@">0.1.4</token> + <token name="@TOOL_VERSION@">0.3.0</token> </macros> <edam_topics> <edam_topic>topic_0091</edam_topic> @@ -26,7 +26,7 @@ mkdir taxonomy && ln -s '$taxonomy.nodes' taxonomy/nodes.dmp && ln -s '$taxonomy.names' taxonomy/names.dmp && #set $taxonomy_dir = 'taxonomy' #end if - taxonomy_filter_refseq '$refseq_input' '$taxonomy_dir' '$ancestor_name' >'$refseq_output_fasta' + taxonomy_filter_refseq $no_curated $no_predicted '$refseq_input' '$taxonomy_dir' '$ancestor_name' >'$refseq_output_fasta' ]]></command> <inputs> <conditional name="taxonomy"> @@ -67,6 +67,14 @@ </when> </conditional> <param name="ancestor_name" type="text" label="Ancestor taxon (scientific) name" /> + <param argument="--no_curated" type="boolean" truevalue="--no_curated" falsevalue="" checked="false" + label="Exclude curated sequences from dataset" + help="Curated sequences are identified by their accession number prefixes and excluded if this option is checked" + /> + <param argument="--no_predicted" type="boolean" truevalue="--no_predicted" falsevalue="" checked="false" + label="Exclude predicted sequences from the dataset" + help="Computationally predicted (but not curated) sequences are identifed by their accession number prefix and excluded if this option is checked" + /> </inputs> <outputs> <data name="refseq_output_fasta" format="fasta" label="${tool.name} on ${on_string}" /> @@ -90,8 +98,32 @@ <param name="names" ftype="txt" value="sample_tree_names.dmp" /> <param name="history_refseq" ftype="fasta" value="sample_refseq.fasta" /> <param name="ancestor_name" value="unclassified bacterial viruses" /> + <param name="no_predicted" value="false" /> + <param name="no_curated" value="false" /> <output name="refseq_output_fasta" value="output1.fasta" /> </test> + <test> + <param name="taxonomy_source" value="history" /> + <param name="refseq_source" value="history" /> + <param name="nodes" ftype="txt" value="sample_tree_nodes.dmp" /> + <param name="names" ftype="txt" value="sample_tree_names.dmp" /> + <param name="history_refseq" ftype="fasta" value="sample_refseq.fasta" /> + <param name="ancestor_name" value="unclassified bacterial viruses" /> + <param name="no_predicted" value="true" /> + <param name="no_curated" value="false" /> + <output name="refseq_output_fasta" value="output2.fasta" /> + </test> + <test> + <param name="taxonomy_source" value="history" /> + <param name="refseq_source" value="history" /> + <param name="nodes" ftype="txt" value="sample_tree_nodes.dmp" /> + <param name="names" ftype="txt" value="sample_tree_names.dmp" /> + <param name="history_refseq" ftype="fasta" value="sample_refseq.fasta" /> + <param name="ancestor_name" value="unclassified bacterial viruses" /> + <param name="no_predicted" value="false" /> + <param name="no_curated" value="true" /> + <output name="refseq_output_fasta" value="output3.fasta" /> + </test> </tests> <help><![CDATA[ This tool allows NCBI RefSeq sequences to be filtered so that only those whose species name are
--- a/test-data/output1.fasta Wed Jan 16 08:30:47 2019 -0500 +++ b/test-data/output1.fasta Wed Apr 29 16:50:22 2020 -0400 @@ -17,7 +17,7 @@ >YP_044794.1 hypothetical protein [Mycoplasma phage phiMFV1] MDIFKKMEIAEKDKIEKLLLNIDKFIWDCNSKGWTKSKNKKVISLYKKTLEEIKKNEKYDWLYVPIKSQIYKFADHDLKS DIFLWFSHTKILCGYTYLKDKWNF ->YP_044795.1 MarRP [Mycoplasma phage phiMFV1] +>NP_044795.1 MarRP [Mycoplasma phage phiMFV1] MKFLKEPPVFTEEQFKELVHYTDLEDNPTFEAYKSNAFFKIYSGAKGVSKSFSRMIETVYRLVNEKNFNSFWCRNQYNHI KGTLKPLLEKVLSFLAAKGLDYRPYISIYNTEAYWDYDDGGKGRAIFFGNWKNVQSFQGVTLSQKDFAWGELVIDEPIEK ELTEELEEIYKIQASNLEILIANTVLRSKNIEGFKTNVTFLYNIFTIDHFLIKDFHNPILPLYSGTDFLKKINLNLAKEL
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output2.fasta Wed Apr 29 16:50:22 2020 -0400 @@ -0,0 +1,8 @@ +>NP_044795.1 MarRP [Mycoplasma phage phiMFV1] +MKFLKEPPVFTEEQFKELVHYTDLEDNPTFEAYKSNAFFKIYSGAKGVSKSFSRMIETVYRLVNEKNFNSFWCRNQYNHI +KGTLKPLLEKVLSFLAAKGLDYRPYISIYNTEAYWDYDDGGKGRAIFFGNWKNVQSFQGVTLSQKDFAWGELVIDEPIEK +ELTEELEEIYKIQASNLEILIANTVLRSKNIEGFKTNVTFLYNIFTIDHFLIKDFHNPILPLYSGTDFLKKINLNLAKEL +IENTYLQKEDLAFKEGMGIIVTMFSKYFVPKNELSDLQLKQFEALKTKDYKLWMITVAGFAFEEPPERYDYFMKSVLLNK +KGNWDRKKCKIINIKQLETKLENEEICGLFYGFDYGLTDNCALVVVLLLSGARIIVLDIFEDIKKLLPKNKRRENKAIYN +KVALIVKKWNEYFDKYNFKFYISWLLGNFDSFLYGDNVHSLQWMTEVFINNGINTKLLPASRFKGKKGFGIIDRQVWQKN +IFENGLVELLPKAKSLLTLLCQQVIDKEDPKNPNQRNERINKKIYDVINAFEMANTLQNCEYRNYLFNKELKEKNEQNNY
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output3.fasta Wed Apr 29 16:50:22 2020 -0400 @@ -0,0 +1,19 @@ +>YP_009173773.1 putative ATP-dependent DNA helicase [Mycoplasma phage phiMFV1] +MNKMPWESEEVEYKKSTSELKEAIISLTAMLNKKGTGTVYFGVKDDRTVVGLEIGNKTMRDISQAISNNVKPEIMPRISI +EFIDNQNVIKVEAEGEQKPYSAYDKYYIRIGEEDKKLNRDKLVDFVQKSVSKINICEIENKDQDLTFNSFLLYANIKNLT +VNKDTFEKNFDLKTKNNKYNLMSYLLADNNDISIKVNTFKDNDKSVLLKRNEYGNKCLFYAIESVLNYVGSINDTYVDLS +TATRSEQKLFNFECFREAWLNACIHNRWDEKYPPQVNIFKDYIQIESNGGIPRNMTKEKFFKGVSRPVNEKLQKIFMQLD +LVEQTGHGVPLIVKKYGEEAFEIDENTIWVKIPFNRKGFKRINEKENKNNLNDNQIKVIKTIEKESNITINQLSSTLSLS +EGYIKKIINQLKNKNLIERNGLRKNGNWTIISYNL +>YP_044791.1 RepB [Mycoplasma phage phiMFV1] +MLLTTKQLQEFYKIEMENIAKNKSLLTNFNIIDNYANGLKPKQLYFLAARPGVGKTSFALNIIFNALENLKEDECIVLFS +LEMDSIEIYSKLLALANSCSLKEVETNSKKIDFEDLNKKNLFIFDRYRNNFNNEKLSEVTPDIIYKSLNSIGLKIKAVFI +DYFQLLDNQKYSSEREKLSQCSKQLKELSKIFNCNFFVLSQLSREYEKKTNNEPSFSDLKGTGSIEQDADLIMFLYNSSN +NFNVGTLIPNKKTILNLSIAKNRNGELVKTNIDFIPHLAKFIES +>YP_044792.1 hypothetical protein [Mycoplasma phage phiMFV1] +MLNNKCDFCHKRFQPKKERYFWGNLVVCSEECLEQVSCYASEIDLNCEGENNE +>YP_044793.1 hypothetical protein [Mycoplasma phage phiMFV1] +MNKEKLIKFLIDLTEKENTKLKSDVKILKSKLKKIKKEYEEQEKAFDDLLKKL +>YP_044794.1 hypothetical protein [Mycoplasma phage phiMFV1] +MDIFKKMEIAEKDKIEKLLLNIDKFIWDCNSKGWTKSKNKKVISLYKKTLEEIKKNEKYDWLYVPIKSQIYKFADHDLKS +DIFLWFSHTKILCGYTYLKDKWNF
--- a/test-data/sample_refseq.fasta Wed Jan 16 08:30:47 2019 -0500 +++ b/test-data/sample_refseq.fasta Wed Apr 29 16:50:22 2020 -0400 @@ -44,7 +44,7 @@ >YP_044794.1 hypothetical protein [Mycoplasma phage phiMFV1] MDIFKKMEIAEKDKIEKLLLNIDKFIWDCNSKGWTKSKNKKVISLYKKTLEEIKKNEKYDWLYVPIKSQIYKFADHDLKS DIFLWFSHTKILCGYTYLKDKWNF ->YP_044795.1 MarRP [Mycoplasma phage phiMFV1] +>NP_044795.1 MarRP [Mycoplasma phage phiMFV1] MKFLKEPPVFTEEQFKELVHYTDLEDNPTFEAYKSNAFFKIYSGAKGVSKSFSRMIETVYRLVNEKNFNSFWCRNQYNHI KGTLKPLLEKVLSFLAAKGLDYRPYISIYNTEAYWDYDDGGKGRAIFFGNWKNVQSFQGVTLSQKDFAWGELVIDEPIEK ELTEELEEIYKIQASNLEILIANTVLRSKNIEGFKTNVTFLYNIFTIDHFLIKDFHNPILPLYSGTDFLKKINLNLAKEL