view glimmer_gbk_to_orf.xml @ 3:44dc09edd97b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit a80e3e4aa3a40970af507bf9119cf7f1c2ffb336
author iuc
date Mon, 16 Dec 2024 14:42:47 +0000
parents 04861c9bbf45
children
line wrap: on
line source

<tool id="glimmer_gbk_to_orf" name="Extract ORF" version="@WRAPPER_VERSION@" profile="@PROFILE_VERSION@">
    <description>from a GenBank file</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <command><![CDATA[
        python '$__tool_directory__/glimmer_gbk_to_orf.py'
            -g '$infile'
            -a '$aa_output'
            -n '$nc_output'
            ##TODO translation table, can be extracted from genbank file directly
    ]]></command>
    <inputs>
        <param name="infile" type='data' format="genbank" label="gene bank file"/>
    </inputs>
    <outputs>
        <data name="aa_output" format="fasta" label="${tool.name} on ${on_string}: Amino acids"/>
        <data name="nc_output" format="fasta" label="{tool.name} on ${on_string}: Nucleotides"/>
    </outputs>
    <tests>
        <test>
            <param name="infile" value="test.gbk" />
            <output name="aa_output" file="orf_aa.fa" />
            <output name="nc_output" file="orf_nc.fa" />
        </test>
    </tests>
    <help><![CDATA[
**What it does**
Read a GenBank file and export fasta formatted amino acid and CDS files.

-----

**Example**
	* input::

		Genebankfile

			LOCUS       BA000030             9025608 bp    DNA     linear   BCT 21-DEC-2007
		DEFINITION  Streptomyces avermitilis MA-4680 DNA, complete genome.
		ACCESSION   BA000030 AP005021-AP005050
		VERSION     BA000030.3  GI:148878541
		DBLINK      Project: 189
		KEYWORDS    .
		SOURCE      Streptomyces avermitilis MA-4680
		  ORGANISM  Streptomyces avermitilis MA-4680
			    Bacteria; Actinobacteria; Actinobacteridae; Actinomycetales;
			    Streptomycineae; Streptomycetaceae; Streptomyces.
		REFERENCE   1
		  AUTHORS   Omura,S., Ikeda,H., Ishikawa,J., Hanamoto,A., Takahashi,C.,
			    Shinose,M., Takahashi,Y., Horikawa,H., Nakazawa,H., Osonoe,T.,
			    Kikuchi,H., Shiba,T., Sakaki,Y. and Hattori,M.
		  TITLE     Genome sequence of an industrial microorganism Streptomyces
			    avermitilis: deducing the ability of producing secondary
			    metabolites
		  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 98 (21), 12215-12220 (2001)
		   PUBMED   11572948
		REFERENCE   2
		  AUTHORS   Ikeda,H., Ishikawa,J., Hanamoto,A., Shinose,M., Kikuchi,H.,
			    Shiba,T., Sakaki,Y., Hattori,M. and Omura,S.
		  TITLE     Complete genome sequence and comparative analysis of the industrial
			    microorganism Streptomyces avermitilis
		  JOURNAL   Nat. Biotechnol. 21 (5), 526-531 (2003)
		   PUBMED   12692562
		REFERENCE   3  (bases 1 to 9025608)
		  AUTHORS   Omura,S., Ikeda,H., Ishikawa,J., Hanamoto,A., Takahashi,C.,
			    Shinose,M., Takahashi,Y., Horikawa,H., Nakazawa,H., Osonoe,T.,
			    Kushida,N., Shiba,T., Sakaki,Y. and Hattori,M.
		  TITLE     Direct Submission
		  JOURNAL   Submitted (29-MAR-2002) Contact:S Omura Kitasato University,
			    Kitasato Institute for Life Sciences; 1-15-1 Kitasato, Sagamihara,
			    Kanagawa 228-8555, Japan URL
			    :http://avermitilis.ls.kitasato-u.ac.jp/
		COMMENT     On Jun 15, 2007 this sequence version replaced gi:57546753.
			    This work was done in collaboration with Haruo Ikeda(*1), Jun
			    Ishikawa(*2), Akiharu Hanamoto(*3), Chigusa Takahashi(*3), Mayumi
			    Shinose(*3), Hiroshi Horikawa(*4), Hidekazu Nakazawa(*4), Tomomi
			    Osonoe(*4), Norihiro Kushida(*4), Hisashi Kikuchi(*4), Tadayoshi
			    Shiba(*5), Yoshiyuki Sakaki(*6,*7), Masahira Hattori(*1,*7)
			    and Satoshi Omura(*1,*3).
			    Final finishing process and all annotation were done by H. Ikeda
			    and J. Ishikawa.
			    *1 Kitasato Institute for Life Sciences, Kitasato University *2
			    National Institute of Infectious Diseases
			    *3 The Kitasato Institute
			    *4 National Institute of Technology and Evaluation *5 School of
			    Science, Kitasato University
			    *6 Institute of Medical Science, University of Tokyo *7 RIKEN,
			    Genomic Sciences Center
			    All the annotated genes identified are available from following
			    urls.
			    http://avermitilis.ls.kitasato-u.ac.jp.
		FEATURES             Location/Qualifiers
		     source          1..9025608
				     /organism="Streptomyces avermitilis MA-4680"
				     /mol_type="genomic DNA"
				     /strain="MA-4680"
				     /db_xref="taxon:227882"
				     /note="This strain is also named as strain: ATCC 31267,
				     NCIMB 12804 or NRRL 8165."
		     gene            complement(1380..1811)
				     /locus_tag="SAV_1"
		     CDS             complement(1380..1811)
				     /locus_tag="SAV_1"
				     /codon_start=1
				     /transl_table=11
				     /product="hypothetical protein"
				     /protein_id="BAC67710.1"
				     /db_xref="GI:29603637"
				     /translation="MTAEWYVLVEEDTRETKRADGVELRLHRWKLAATQHIAGDQEQA
				     AAAAEDAALNYMPGVLARHARPGDEPARHAFLTQDGAWLVLLRQRHRECHIRVTTARL
				     MHTQEEKEAPPKSFKEKLRSALDGPQPPEPAGRPWKPGSET"


* output::

	-  aminoAcidOutput
	>SAV_1
	MTAEWYVLVEEDTRETKRADGVELRLHRWKLAATQHIAGDQEQAAAAAEDAALNYMPGVL
	ARHARPGDEPARHAFLTQDGAWLVLLRQRHRECHIRVTTARLMHTQEEKEAPPKSFKEKL
	RSALDGPQPPEPAGRPWKPGSET
	>SAV_2
	VPPQGARGTIVSATGSGKTSMAAASTLNCFPEGRILVTVPTLDLLAQTAQAWRAVGHHSP
	MIAVCSLENDPVLNERT
	>SAV_3
	MDWNFPDDDIFFCGGCGDDDTPDPRVPRQDKALCVRCDRVERQVRRYRITVPRRNAIMRF
	QRDVCALCQEGPPTDHCPDAVSFWHIDHDHRCCPPGGSCGRCVRGLLCLPCNATRLPAYE
	RLPNVLRDSPRFNTYLNSPPARHPEARPTARDHAGPRDASSYLIDAFFTAADHPEGNALS
	S
	>SAV_4
	VALTPGGTRVTQWQDRQAIGDMHERRVAAALRARGWTVQPCGQGTYPPAVREALRRTRSA
	LRHFPDLIAARGADLITIDAKDRMPSTDTDRYAVSADTVTAGLFFTAAHAPTPLYYVFGD
	LKVLTPAEVVHYTAHALRHRSGAFHLVRTEQAHCFDDVFGSAGAAAAA
	>SAV_5
	MMLLMAAYVDPRFRPTLWPGTPVPTPELMPLRGARADGEWIVWTPQVRSRSHTVPVPEDF
	YLREFMEVDPEDLDAVAALMGAYGHLGGSINTGSWDVDVYERLKELTEREHPRAPFALHG
	ELATLFMREAQAAITTWLALRREGGLDALIEPEVSEEELAQWQASNADLEEAWPRDLDHL
	RELSLEIRISNLVSELNAALKPFSIGIGGLGDRYPTILAVAFLQLYNHLAEDATIRECAN
	ETCRRHFVRQRGRAAYGQNRTSGIKYCTRECARAQAQREHRRRRKQQTTTLQQPPAPGPQ
	SHDTSEPTAEGR
	>SAV_6
	MISLREHQVEANARIRAWAGFPTRSPVPAQGLRGTVVSATGSGKTITAAWAARECFRGGR
	ILVMVPTLDLLVQTAQAWRRVGHNGPMVAACSLEKDEVLEQLGVRTTTNPIQLALWAGHG
	PVVVFATYASLVDREDPEDVTGRAKVRGPLEAALAGGQRLYGQTMDGFDLAVVDEAHSTT
	GDLGRPWAAIHDNSRIPADFRLYLTATPRILASPRPQKGADGRELEIATMASDPDGPYGE
	WLFELGLSEAVERGILAGFEIDVLEIRDPSPALGESEEAQRGRRLALLQTALLEHAAARN
	LRTVMTFHQRVEEAAAFAQTMPQTAARLYEAEVSAEALVDAGALPESSIGAEFYELEAGR
	HVPPDRVWAAWLCGDHLVAERREVLRQFADGLDAGNKRVHRAFLASVRVLGEGVDIVGER
	GVEAICFADTRGSQVEIVQNIGRALRPNPDGTNKTARIIVPVFLQPGENPTDMVASASFA
	PLVTVLQGLRSHSERLVEQLASRALTSGQRHVHVKRDEDGRIIGTTTEGEGGQHESEGAV
	ESALLHFSTPRDATTIAAFLRTRVYRPESLVWLEGYQALLRWRKKNHITGLYAVPYDTET
	EAGVTKAFPLGRWVHQQRRTYRAGELDPHRTTLLDEAGMVWEPGDEAWENKLAALRSFHR
	AHGHLAPRRDAVWGDADSELVPVGEHMANLRRKDGLGKNPQRAATRATQLAAIDPDWNCP
	WPLDWQRHYRVLADLATDEPHSRLPDIQPGVQFEGDDLGKWLQRQRRSWAELSEEQQQRL
	TALGVTPAEPPTPTPSAKGGGKAAAFQRGLAALAQWIQREGAHKVVPRGHVEAVVIDGQE
	HQHKLGVWISNTKTRRDKLTHDQRTALAALGVEWA
	....

	- orfs

	>SAV_1
	ATGACCGCCGAGTGGTACGTCCTCGTCGAAGAGGACACACGAGAGACCAAGCGCGCCGAC
	GGCGTTGAACTCAGATTGCACCGCTGGAAACTGGCGGCCACTCAGCACATCGCAGGAGAT
	CAGGAACAGGCCGCCGCCGCGGCCGAGGATGCGGCCCTGAACTACATGCCGGGAGTGCTC
	GCTCGGCATGCCCGACCGGGAGACGAACCGGCCCGGCATGCTTTCCTCACCCAGGACGGG
	GCCTGGCTGGTGCTCCTCAGGCAGCGGCACCGCGAGTGTCACATACGGGTGACCACTGCC
	CGGCTCATGCATACACAGGAAGAGAAGGAGGCCCCGCCGAAAAGCTTCAAGGAGAAACTC
	CGCAGCGCCCTGGATGGTCCTCAGCCGCCCGAACCGGCTGGTAGGCCATGGAAGCCGGGC
	AGCGAAACCTGA
	>SAV_2
	GTGCCCCCTCAGGGAGCCCGTGGCACGATCGTGTCAGCTACCGGGTCCGGCAAAACGAGC
	ATGGCCGCCGCGAGCACGCTGAACTGCTTCCCCGAAGGCCGGATCCTCGTGACCGTGCCG
	ACCCTGGACCTGCTCGCACAGACCGCCCAGGCGTGGCGGGCAGTCGGCCACCACTCCCCC
	ATGATCGCGGTGTGCTCGCTGGAGAACGACCCAGTGCTGAACGAGCGGACCTGA
	>SAV_3
	ATGGACTGGAACTTCCCCGACGACGACATCTTCTTCTGCGGCGGGTGCGGCGACGACGAC
	ACCCCCGACCCGCGGGTCCCGCGTCAGGACAAGGCCCTGTGCGTCCGCTGCGACAGAGTC
	GAACGGCAGGTCCGCCGATACCGGATCACCGTGCCGCGGAGGAACGCGATCATGCGCTTC
	CAGCGCGACGTCTGCGCCCTGTGCCAGGAAGGCCCGCCGACCGACCACTGCCCCGATGCC
	GTCAGCTTCTGGCACATCGACCACGACCACCGCTGCTGCCCTCCCGGCGGCTCATGCGGG
	CGGTGCGTCCGCGGCCTCCTGTGCCTGCCCTGCAACGCCACCCGCCTGCCCGCCTACGAA
	CGCCTCCCCAACGTCCTCCGCGACAGCCCTCGCTTCAACACCTACCTCAACAGCCCACCC
	GCCCGGCACCCCGAAGCCCGCCCCACCGCCAGGGACCATGCAGGCCCCCGCGACGCATCC
	AGCTACCTCATCGACGCCTTTTTCACCGCCGCGGACCATCCCGAGGGGAACGCCCTCAGC
	TCCTGA
	>SAV_4
	GTGGCACTTACCCCAGGGGGAACCCGAGTGACGCAGTGGCAGGACCGCCAGGCGATAGGC
	GACATGCACGAACGTCGGGTGGCGGCCGCGCTGCGCGCCCGCGGCTGGACCGTCCAGCCC
	TGCGGACAGGGCACCTACCCGCCCGCCGTACGGGAAGCCCTGCGCCGGACCCGCTCCGCC
	CTGCGGCACTTCCCCGACCTCATCGCCGCCCGCGGCGCCGACCTGATCACCATCGACGCC
	AAGGACCGCATGCCCAGCACCGACACCGACCGCTACGCCGTCAGCGCCGACACCGTGACC
	GCCGGCCTCTTTTTCACCGCGGCCCACGCTCCGACTCCGCTGTACTACGTCTTCGGCGAC
	CTGAAGGTCCTCACGCCGGCGGAGGTGGTCCACTACACCGCTCACGCCTTGCGCCACCGC
	AGCGGTGCCTTCCACCTCGTACGCACGGAGCAAGCACACTGCTTCGACGACGTCTTCGGA
	TCGGCTGGCGCAGCAGCTGCGGCATGA
	>SAV_5
	ATGATGCTCCTCATGGCGGCATACGTTGACCCACGCTTTCGTCCTACGCTATGGCCTGGA
	ACGCCCGTGCCGACACCGGAGTTGATGCCTCTTCGCGGAGCGCGGGCCGACGGTGAATGG
	ATCGTCTGGACCCCGCAGGTCCGCTCCCGCTCGCACACGGTCCCCGTGCCGGAGGACTTC
	TACCTGCGCGAGTTCATGGAGGTCGACCCTGAGGACCTCGACGCCGTGGCCGCCCTGATG
	GGCGCCTACGGACACCTCGGCGGGAGCATCAACACCGGAAGCTGGGACGTCGACGTCTAC
	GAGCGCCTCAAGGAGCTCACGGAGCGCGAACACCCCCGCGCGCCGTTCGCCCTGCACGGC
	GAACTGGCCACGCTGTTCATGAGGGAGGCGCAGGCGGCCATCACCACCTGGCTGGCCCTG
	CGCCGCGAGGGCGGGCTCGACGCGCTCATCGAGCCCGAGGTGTCCGAGGAAGAACTGGCG
	CAGTGGCAAGCGAGCAACGCTGATCTTGAGGAAGCGTGGCCGCGGGACCTGGACCACCTG
	CGCGAACTCTCCCTGGAGATCAGGATCAGCAACCTCGTGAGCGAACTGAACGCCGCGCTG
	AAGCCGTTCAGCATCGGCATCGGCGGCCTGGGCGACCGCTACCCCACCATCCTCGCTGTG
	GCGTTCCTCCAGCTCTACAACCACCTCGCCGAGGACGCCACGATCCGCGAGTGCGCGAAC
	GAGACCTGCCGCCGCCACTTCGTACGCCAGCGCGGCCGCGCCGCATACGGGCAGAACCGC
	ACCAGCGGCATCAAGTACTGCACCCGCGAATGCGCCCGCGCCCAGGCCCAGCGCGAACAC
	CGCCGGCGCCGCAAACAGCAGACCACGACCCTCCAGCAGCCGCCGGCGCCTGGTCCTCAG
	TCTCACGACACCTCAGAGCCGACTGCCGAAGGGCGCTGA
	.......

]]></help>
    <expand macro="citation" />
</tool>