diff glimmer_gbk_to_orf.xml @ 0:6351903666da draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
author bgruening
date Tue, 28 Nov 2017 10:07:28 -0500
parents
children 04861c9bbf45
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer_gbk_to_orf.xml	Tue Nov 28 10:07:28 2017 -0500
@@ -0,0 +1,217 @@
+<tool id="glimmer_gbk_to_orf" name="Extract ORF" version="@WRAPPER_VERSION@">
+    <description>from a GenBank file</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command><![CDATA[
+        python '$__tool_directory__/glimmer_gbk_to_orf.py'
+            -g '$infile'
+            -a '$aa_output'
+            -n '$nc_output'
+            ##TODO translation table, can be extracted from genbank file directly
+    ]]></command>
+    <inputs>
+        <param name="infile" type='data' format="genbank" label="gene bank file"/>
+    </inputs>
+    <outputs>
+        <data name="aa_output" format="fasta" />
+        <data name="nc_output" format="fasta" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="test.gbk" />
+            <output name="aa_output" file="orf_aa.fa" />
+            <output name="nc_output" file="orf_nc.fa" />
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+Read a GenBank file and export fasta formatted amino acid and CDS files.
+
+-----
+
+**Example**
+	* input::
+
+		Genebankfile
+
+			LOCUS       BA000030             9025608 bp    DNA     linear   BCT 21-DEC-2007
+		DEFINITION  Streptomyces avermitilis MA-4680 DNA, complete genome.
+		ACCESSION   BA000030 AP005021-AP005050
+		VERSION     BA000030.3  GI:148878541
+		DBLINK      Project: 189
+		KEYWORDS    .
+		SOURCE      Streptomyces avermitilis MA-4680
+		  ORGANISM  Streptomyces avermitilis MA-4680
+			    Bacteria; Actinobacteria; Actinobacteridae; Actinomycetales;
+			    Streptomycineae; Streptomycetaceae; Streptomyces.
+		REFERENCE   1
+		  AUTHORS   Omura,S., Ikeda,H., Ishikawa,J., Hanamoto,A., Takahashi,C.,
+			    Shinose,M., Takahashi,Y., Horikawa,H., Nakazawa,H., Osonoe,T.,
+			    Kikuchi,H., Shiba,T., Sakaki,Y. and Hattori,M.
+		  TITLE     Genome sequence of an industrial microorganism Streptomyces
+			    avermitilis: deducing the ability of producing secondary
+			    metabolites
+		  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 98 (21), 12215-12220 (2001)
+		   PUBMED   11572948
+		REFERENCE   2
+		  AUTHORS   Ikeda,H., Ishikawa,J., Hanamoto,A., Shinose,M., Kikuchi,H.,
+			    Shiba,T., Sakaki,Y., Hattori,M. and Omura,S.
+		  TITLE     Complete genome sequence and comparative analysis of the industrial
+			    microorganism Streptomyces avermitilis
+		  JOURNAL   Nat. Biotechnol. 21 (5), 526-531 (2003)
+		   PUBMED   12692562
+		REFERENCE   3  (bases 1 to 9025608)
+		  AUTHORS   Omura,S., Ikeda,H., Ishikawa,J., Hanamoto,A., Takahashi,C.,
+			    Shinose,M., Takahashi,Y., Horikawa,H., Nakazawa,H., Osonoe,T.,
+			    Kushida,N., Shiba,T., Sakaki,Y. and Hattori,M.
+		  TITLE     Direct Submission
+		  JOURNAL   Submitted (29-MAR-2002) Contact:S Omura Kitasato University,
+			    Kitasato Institute for Life Sciences; 1-15-1 Kitasato, Sagamihara,
+			    Kanagawa 228-8555, Japan URL
+			    :http://avermitilis.ls.kitasato-u.ac.jp/
+		COMMENT     On Jun 15, 2007 this sequence version replaced gi:57546753.
+			    This work was done in collaboration with Haruo Ikeda(*1), Jun
+			    Ishikawa(*2), Akiharu Hanamoto(*3), Chigusa Takahashi(*3), Mayumi
+			    Shinose(*3), Hiroshi Horikawa(*4), Hidekazu Nakazawa(*4), Tomomi
+			    Osonoe(*4), Norihiro Kushida(*4), Hisashi Kikuchi(*4), Tadayoshi
+			    Shiba(*5), Yoshiyuki Sakaki(*6,*7), Masahira Hattori(*1,*7)
+			    and Satoshi Omura(*1,*3).
+			    Final finishing process and all annotation were done by H. Ikeda
+			    and J. Ishikawa.
+			    *1 Kitasato Institute for Life Sciences, Kitasato University *2
+			    National Institute of Infectious Diseases
+			    *3 The Kitasato Institute
+			    *4 National Institute of Technology and Evaluation *5 School of
+			    Science, Kitasato University
+			    *6 Institute of Medical Science, University of Tokyo *7 RIKEN,
+			    Genomic Sciences Center
+			    All the annotated genes identified are available from following
+			    urls.
+			    http://avermitilis.ls.kitasato-u.ac.jp.
+		FEATURES             Location/Qualifiers
+		     source          1..9025608
+				     /organism="Streptomyces avermitilis MA-4680"
+				     /mol_type="genomic DNA"
+				     /strain="MA-4680"
+				     /db_xref="taxon:227882"
+				     /note="This strain is also named as strain: ATCC 31267,
+				     NCIMB 12804 or NRRL 8165."
+		     gene            complement(1380..1811)
+				     /locus_tag="SAV_1"
+		     CDS             complement(1380..1811)
+				     /locus_tag="SAV_1"
+				     /codon_start=1
+				     /transl_table=11
+				     /product="hypothetical protein"
+				     /protein_id="BAC67710.1"
+				     /db_xref="GI:29603637"
+				     /translation="MTAEWYVLVEEDTRETKRADGVELRLHRWKLAATQHIAGDQEQA
+				     AAAAEDAALNYMPGVLARHARPGDEPARHAFLTQDGAWLVLLRQRHRECHIRVTTARL
+				     MHTQEEKEAPPKSFKEKLRSALDGPQPPEPAGRPWKPGSET"
+
+
+* output::
+
+	-  aminoAcidOutput
+	>SAV_1
+	MTAEWYVLVEEDTRETKRADGVELRLHRWKLAATQHIAGDQEQAAAAAEDAALNYMPGVL
+	ARHARPGDEPARHAFLTQDGAWLVLLRQRHRECHIRVTTARLMHTQEEKEAPPKSFKEKL
+	RSALDGPQPPEPAGRPWKPGSET
+	>SAV_2
+	VPPQGARGTIVSATGSGKTSMAAASTLNCFPEGRILVTVPTLDLLAQTAQAWRAVGHHSP
+	MIAVCSLENDPVLNERT
+	>SAV_3
+	MDWNFPDDDIFFCGGCGDDDTPDPRVPRQDKALCVRCDRVERQVRRYRITVPRRNAIMRF
+	QRDVCALCQEGPPTDHCPDAVSFWHIDHDHRCCPPGGSCGRCVRGLLCLPCNATRLPAYE
+	RLPNVLRDSPRFNTYLNSPPARHPEARPTARDHAGPRDASSYLIDAFFTAADHPEGNALS
+	S
+	>SAV_4
+	VALTPGGTRVTQWQDRQAIGDMHERRVAAALRARGWTVQPCGQGTYPPAVREALRRTRSA
+	LRHFPDLIAARGADLITIDAKDRMPSTDTDRYAVSADTVTAGLFFTAAHAPTPLYYVFGD
+	LKVLTPAEVVHYTAHALRHRSGAFHLVRTEQAHCFDDVFGSAGAAAAA
+	>SAV_5
+	MMLLMAAYVDPRFRPTLWPGTPVPTPELMPLRGARADGEWIVWTPQVRSRSHTVPVPEDF
+	YLREFMEVDPEDLDAVAALMGAYGHLGGSINTGSWDVDVYERLKELTEREHPRAPFALHG
+	ELATLFMREAQAAITTWLALRREGGLDALIEPEVSEEELAQWQASNADLEEAWPRDLDHL
+	RELSLEIRISNLVSELNAALKPFSIGIGGLGDRYPTILAVAFLQLYNHLAEDATIRECAN
+	ETCRRHFVRQRGRAAYGQNRTSGIKYCTRECARAQAQREHRRRRKQQTTTLQQPPAPGPQ
+	SHDTSEPTAEGR
+	>SAV_6
+	MISLREHQVEANARIRAWAGFPTRSPVPAQGLRGTVVSATGSGKTITAAWAARECFRGGR
+	ILVMVPTLDLLVQTAQAWRRVGHNGPMVAACSLEKDEVLEQLGVRTTTNPIQLALWAGHG
+	PVVVFATYASLVDREDPEDVTGRAKVRGPLEAALAGGQRLYGQTMDGFDLAVVDEAHSTT
+	GDLGRPWAAIHDNSRIPADFRLYLTATPRILASPRPQKGADGRELEIATMASDPDGPYGE
+	WLFELGLSEAVERGILAGFEIDVLEIRDPSPALGESEEAQRGRRLALLQTALLEHAAARN
+	LRTVMTFHQRVEEAAAFAQTMPQTAARLYEAEVSAEALVDAGALPESSIGAEFYELEAGR
+	HVPPDRVWAAWLCGDHLVAERREVLRQFADGLDAGNKRVHRAFLASVRVLGEGVDIVGER
+	GVEAICFADTRGSQVEIVQNIGRALRPNPDGTNKTARIIVPVFLQPGENPTDMVASASFA
+	PLVTVLQGLRSHSERLVEQLASRALTSGQRHVHVKRDEDGRIIGTTTEGEGGQHESEGAV
+	ESALLHFSTPRDATTIAAFLRTRVYRPESLVWLEGYQALLRWRKKNHITGLYAVPYDTET
+	EAGVTKAFPLGRWVHQQRRTYRAGELDPHRTTLLDEAGMVWEPGDEAWENKLAALRSFHR
+	AHGHLAPRRDAVWGDADSELVPVGEHMANLRRKDGLGKNPQRAATRATQLAAIDPDWNCP
+	WPLDWQRHYRVLADLATDEPHSRLPDIQPGVQFEGDDLGKWLQRQRRSWAELSEEQQQRL
+	TALGVTPAEPPTPTPSAKGGGKAAAFQRGLAALAQWIQREGAHKVVPRGHVEAVVIDGQE
+	HQHKLGVWISNTKTRRDKLTHDQRTALAALGVEWA
+	....
+
+	- orfs
+
+	>SAV_1
+	ATGACCGCCGAGTGGTACGTCCTCGTCGAAGAGGACACACGAGAGACCAAGCGCGCCGAC
+	GGCGTTGAACTCAGATTGCACCGCTGGAAACTGGCGGCCACTCAGCACATCGCAGGAGAT
+	CAGGAACAGGCCGCCGCCGCGGCCGAGGATGCGGCCCTGAACTACATGCCGGGAGTGCTC
+	GCTCGGCATGCCCGACCGGGAGACGAACCGGCCCGGCATGCTTTCCTCACCCAGGACGGG
+	GCCTGGCTGGTGCTCCTCAGGCAGCGGCACCGCGAGTGTCACATACGGGTGACCACTGCC
+	CGGCTCATGCATACACAGGAAGAGAAGGAGGCCCCGCCGAAAAGCTTCAAGGAGAAACTC
+	CGCAGCGCCCTGGATGGTCCTCAGCCGCCCGAACCGGCTGGTAGGCCATGGAAGCCGGGC
+	AGCGAAACCTGA
+	>SAV_2
+	GTGCCCCCTCAGGGAGCCCGTGGCACGATCGTGTCAGCTACCGGGTCCGGCAAAACGAGC
+	ATGGCCGCCGCGAGCACGCTGAACTGCTTCCCCGAAGGCCGGATCCTCGTGACCGTGCCG
+	ACCCTGGACCTGCTCGCACAGACCGCCCAGGCGTGGCGGGCAGTCGGCCACCACTCCCCC
+	ATGATCGCGGTGTGCTCGCTGGAGAACGACCCAGTGCTGAACGAGCGGACCTGA
+	>SAV_3
+	ATGGACTGGAACTTCCCCGACGACGACATCTTCTTCTGCGGCGGGTGCGGCGACGACGAC
+	ACCCCCGACCCGCGGGTCCCGCGTCAGGACAAGGCCCTGTGCGTCCGCTGCGACAGAGTC
+	GAACGGCAGGTCCGCCGATACCGGATCACCGTGCCGCGGAGGAACGCGATCATGCGCTTC
+	CAGCGCGACGTCTGCGCCCTGTGCCAGGAAGGCCCGCCGACCGACCACTGCCCCGATGCC
+	GTCAGCTTCTGGCACATCGACCACGACCACCGCTGCTGCCCTCCCGGCGGCTCATGCGGG
+	CGGTGCGTCCGCGGCCTCCTGTGCCTGCCCTGCAACGCCACCCGCCTGCCCGCCTACGAA
+	CGCCTCCCCAACGTCCTCCGCGACAGCCCTCGCTTCAACACCTACCTCAACAGCCCACCC
+	GCCCGGCACCCCGAAGCCCGCCCCACCGCCAGGGACCATGCAGGCCCCCGCGACGCATCC
+	AGCTACCTCATCGACGCCTTTTTCACCGCCGCGGACCATCCCGAGGGGAACGCCCTCAGC
+	TCCTGA
+	>SAV_4
+	GTGGCACTTACCCCAGGGGGAACCCGAGTGACGCAGTGGCAGGACCGCCAGGCGATAGGC
+	GACATGCACGAACGTCGGGTGGCGGCCGCGCTGCGCGCCCGCGGCTGGACCGTCCAGCCC
+	TGCGGACAGGGCACCTACCCGCCCGCCGTACGGGAAGCCCTGCGCCGGACCCGCTCCGCC
+	CTGCGGCACTTCCCCGACCTCATCGCCGCCCGCGGCGCCGACCTGATCACCATCGACGCC
+	AAGGACCGCATGCCCAGCACCGACACCGACCGCTACGCCGTCAGCGCCGACACCGTGACC
+	GCCGGCCTCTTTTTCACCGCGGCCCACGCTCCGACTCCGCTGTACTACGTCTTCGGCGAC
+	CTGAAGGTCCTCACGCCGGCGGAGGTGGTCCACTACACCGCTCACGCCTTGCGCCACCGC
+	AGCGGTGCCTTCCACCTCGTACGCACGGAGCAAGCACACTGCTTCGACGACGTCTTCGGA
+	TCGGCTGGCGCAGCAGCTGCGGCATGA
+	>SAV_5
+	ATGATGCTCCTCATGGCGGCATACGTTGACCCACGCTTTCGTCCTACGCTATGGCCTGGA
+	ACGCCCGTGCCGACACCGGAGTTGATGCCTCTTCGCGGAGCGCGGGCCGACGGTGAATGG
+	ATCGTCTGGACCCCGCAGGTCCGCTCCCGCTCGCACACGGTCCCCGTGCCGGAGGACTTC
+	TACCTGCGCGAGTTCATGGAGGTCGACCCTGAGGACCTCGACGCCGTGGCCGCCCTGATG
+	GGCGCCTACGGACACCTCGGCGGGAGCATCAACACCGGAAGCTGGGACGTCGACGTCTAC
+	GAGCGCCTCAAGGAGCTCACGGAGCGCGAACACCCCCGCGCGCCGTTCGCCCTGCACGGC
+	GAACTGGCCACGCTGTTCATGAGGGAGGCGCAGGCGGCCATCACCACCTGGCTGGCCCTG
+	CGCCGCGAGGGCGGGCTCGACGCGCTCATCGAGCCCGAGGTGTCCGAGGAAGAACTGGCG
+	CAGTGGCAAGCGAGCAACGCTGATCTTGAGGAAGCGTGGCCGCGGGACCTGGACCACCTG
+	CGCGAACTCTCCCTGGAGATCAGGATCAGCAACCTCGTGAGCGAACTGAACGCCGCGCTG
+	AAGCCGTTCAGCATCGGCATCGGCGGCCTGGGCGACCGCTACCCCACCATCCTCGCTGTG
+	GCGTTCCTCCAGCTCTACAACCACCTCGCCGAGGACGCCACGATCCGCGAGTGCGCGAAC
+	GAGACCTGCCGCCGCCACTTCGTACGCCAGCGCGGCCGCGCCGCATACGGGCAGAACCGC
+	ACCAGCGGCATCAAGTACTGCACCCGCGAATGCGCCCGCGCCCAGGCCCAGCGCGAACAC
+	CGCCGGCGCCGCAAACAGCAGACCACGACCCTCCAGCAGCCGCCGGCGCCTGGTCCTCAG
+	TCTCACGACACCTCAGAGCCGACTGCCGAAGGGCGCTGA
+	.......
+
+]]></help>
+    <expand macro="citation" />
+</tool>