view optdoe.xml @ 0:cae2975d264f draft

planemo upload for repository ['https://github.com/brsynth/icfree-ml', 'https://github.com/pablocarb/doebase'] commit 6c01728900e4ebd1a833a7e7d697f61561d86dc6-dirty
author tduigou
date Tue, 28 Mar 2023 12:32:52 +0000
parents
children e56802e12fea
line wrap: on
line source

<tool id="doe_synbio_optdoe" name="Optimal Design of Experiment" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="@LICENCE@">
    <description>Combine selected genetic parts and enzyme variants for the desired pathways</description>
    <macros>
        <import>macros.xml</import>
        <token name="@TOOL_VERSION@">v2.0.2</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">doebase</requirement>
    </requirements>
    <expand macro="stdio"/>
    <command detect_errors="exit_code"><![CDATA[
        python -m doebase
            --func doeGetSBOL
            #if $adv.genes_sbol_file
                --sbol_file '$adv.genes_sbol_file'
            #end if
            #if $adv.ref_parts_file
                --ref_parts_file '$adv.ref_parts_file'
            #end if
            --libsize '$adv.libsize'
            $adv.get_sequences
            $adv.backtranslate
            --codon_table '$adv.codon_table'
            '$genes_file'
            '$constructs'
    ]]></command>
    <inputs>
        <param name="genes_file" type="data" format="csv" label="Genes" help="CSV file with header: Name,Type,Part,Step. Provided by the tool Selenzyme"/>
        <section name="adv" title="Advanced Options" expanded="false">
            <param name="genes_sbol_file" type="data" format="xml" optional="true" label="Optimised Genes" help="SBOL file with genes associated with different RBS. Provided by the tool PartsGenie"/>
            <param argument="--ref_parts_file" type="data" format="csv" value="None" optional="true" label="Ref Parts (default: E. coli compliant)" help="Provide fixed parts"/>
            <param argument="--libsize" type="integer" value="32" min="32" label="Maximal library size" help="Maximal number of contructs in the output"/>
            <param argument="--get_sequences" type="boolean" truevalue="--get_sequences true" falsevalue="--get_sequences false" checked="true" label="Get sequences" help="Grab the sequence for each part of the SBOL output"/>
            <param argument="--backtranslate" type="boolean" truevalue="--backtranslate true" falsevalue="--backtranslate false" checked="true" label="Back translate" help="Translate the protein sequence given by its Uniprot ID to a DNA sequence"/>
            <param argument="--codon_table" type="select" label="Codon Table">
                <option value="Eecoli.cut">Escherichia coli K12</option>
                <option value="Ehuman.cut">Homo sapiens</option>
                <option value="Eacica.cut">Acinetobacter calcoaceticus</option>
                <option value="Eagrtu.cut">Agrobacterium tumefaciens str C58</option>
                <option value="Eanasp.cut">Anabaena sp</option>
                <option value="Earath.cut">Arabidopsis thaliana</option>
                <option value="Eazovi.cut">Azotobacter vinelandii</option>
                <option value="Ebacme.cut">Bacillus megaterium</option>
                <option value="Ebacst.cut">Geobacillus stearothermophilus</option>
                <option value="Ebacsu.cut">Bacillus subtilis</option>
                <option value="Ebacsu_high.cut">Bacillus subtilis (high)</option>
                <option value="Ebommo.cut">Bombyx mori (silkmoth)</option>
                <option value="Ebovin.cut">Bos taurus</option>
                <option value="Ebpphx.cut">Bacteriophage phi-X174</option>
                <option value="Ebraja.cut">Bradyrhizobium japonicum USDA 110</option>
                <option value="Ebrana.cut">Brassica napus (rape)</option>
                <option value="Ebrare.cut">Brachydanio rerio (zebrafish)</option>
                <option value="Ecaeel.cut">Caenorhabditis elegans (nematode)</option>
                <option value="Ecanal.cut">Candida albicans</option>
                <option value="Ecanfa.cut">Canis familiaris (dog)</option>
                <option value="Ecaucr.cut">Caulobacter crescentus CB15</option>
                <option value="Echick.cut">Gallus gallus (chicken)</option>
                <option value="Echlre.cut">Chlamydomonas reinhardtii</option>
                <option value="Echltr.cut">Chlamydia trachomatis D UW-3 CX</option>
                <option value="Ecloab.cut">Clostridium acetobutylicum ATCC 824</option>
                <option value="Ecrigr.cut">Cricetulus greseus (hamster)</option>
                <option value="Ecyapa.cut">Cyanophora paradoxa</option>
                <option value="Edicdi.cut">Dictyostelium discoideum</option>
                <option value="Edicdi_high.cut">Dictyostelium discoideum (high)</option>
                <option value="Edrome.cut">Drosophila melanogaster</option>
                <option value="Edrome_high.cut">Drosophila melanogaster (high)</option>
                <option value="Eecoli_high.cut">Escherichia coli K12 (high)</option>
                <option value="Eemeni.cut">Emericella nidulans</option>
                <option value="Eemeni_mit.cut">Emericella nidulans mitochondrion</option>
                <option value="Eemeni_high.cut">Emericella nidulans highly expressed</option>
                <option value="Eerwct.cut">Erwinia carotovora subsp atroseptica SCRI1043</option>
                <option value="Ehaein.cut">Haemophilus influenzae Rd KW20</option>
                <option value="Ehalma.cut">Haloarcula marismortui ATCC 43049 (Halobacterium marismortui)</option>
                <option value="Ehalsa.cut">Halobacterium salinarum NRC-1 (Halobacterium halobium)</option>
                <option value="Ehorvu.cut">Hordeum vulgare subsp vulgare (Barley)</option>
                <option value="Eklepn.cut">Klebsiella pneumoniae</option>
                <option value="Eklula.cut">Kluyveromyces lactis NRRL Y-1140</option>
                <option value="Elacdl.cut">Lactococcus lactis subsp lactis Il1403 L. delbrueckii subsp lactis</option>
                <option value="Elyces.cut">Lycopersicon esculentum (tomato)</option>
                <option value="Emacfa.cut">Macaca fascicularis</option>
                <option value="Emaize.cut">Zea mays</option>
                <option value="Emaize_chl.cut">Zea mays chloroplast</option>
                <option value="Emammal_high.cut">Mammalian species (high)</option>
                <option value="Emanse.cut">Manduca sexta</option>
                <option value="Emarpo_chl.cut">Marchantia polymorpha chloroplast (liverwort)</option>
                <option value="Emedsa.cut">Medicago sativa</option>
                <option value="Emetth.cut">Methanothermobacter thermautotrophicus str Delta H</option>
                <option value="Emouse.cut">Mus musculus</option>
                <option value="Emyctu.cut">Mycobacterium tuberculosis H37Rv</option>
                <option value="Eneigo.cut">Neisseria gonorrhoeae</option>
                <option value="Eneucr.cut">Neurospora crassa</option>
                <option value="Eoncmy.cut">Oncorhynchus mykiss (rainbow trout)</option>
                <option value="Eorysa.cut">Oryza sativa (rice)</option>
                <option value="Eorysa_chl.cut">Oryza sativa chloroplast (rice)</option>
                <option value="Epea.cut">Pisum sativum (pea)</option>
                <option value="Epethy.cut">Petunia x hybrida</option>
                <option value="Ephavu.cut">Phaseolus vulgaris</option>
                <option value="Epig.cut">Sus scrofa (pig)</option>
                <option value="Eplafa.cut">Plasmodium falciparum 3D7</option>
                <option value="Eprovu.cut">Proteus vulgaris</option>
                <option value="Epseae.cut">Pseudomonas aeruginosa PAO1</option>
                <option value="Epsepu.cut">Pseudomonas putida KT2440</option>
                <option value="Epsesm.cut">Pseudomonas syringae pv tomato str DC3000</option>
                <option value="Erabit.cut">Oryctolagus cuniculus (rabbit)</option>
                <option value="Erat.cut">Rattus norvegicus</option>
                <option value="Erhile.cut">Rhizobium leguminosarum</option>
                <option value="Erhime.cut">Sinorhizobium meliloti 1021</option>
                <option value="Erhoca.cut">Rhodobacter capsulatus</option>
                <option value="Erhosh.cut">Rhodobacter sphaeroides</option>
                <option value="Esalsa.cut">Salmo salar (Atlantic salmon)</option>
                <option value="Esalty.cut">Salmonella typhimurium LT2</option>
                <option value="Eschma.cut">Schistosoma mansoni</option>
                <option value="Eschpo.cut">Schizosaccharomyces pombe</option>
                <option value="Eschpo_cai.cut">Schizosaccharomyces pombe (CAI equivalent)</option>
                <option value="Eschpo_high.cut">Schizosaccharomyces pombe (high)</option>
                <option value="Eserma.cut">Serratia marcescens</option>
                <option value="Esheep.cut">Ovis aries (sheep)</option>
                <option value="Esoltu.cut">Solanum tuberosum (potato)</option>
                <option value="Esoybn.cut">Glycine max (soybean)</option>
                <option value="Espiol.cut">Spinacia oleracea (spinach)</option>
                <option value="Estaau.cut">Staphylococcus aureus</option>
                <option value="Estrco.cut">Streptomyces coelicolor A3 2</option>
                <option value="Estrmu.cut">Streptococcus mutans UA159</option>
                <option value="Estrpn.cut">Streptococcus pneumoniae R6</option>
                <option value="Estrpu.cut">Strongylocentrotus purpuratus (sea urchin)</option>
                <option value="Esv40.cut">Simian Virus 40</option>
                <option value="Esynco.cut">Synechococcus sp WH 8102</option>
                <option value="Esyncy.cut">Synechocystis sp PCC 6803</option>
                <option value="Etetth.cut">Tetrahymena thermophila</option>
                <option value="Etheth.cut">Thermus thermophilus HB8</option>
                <option value="Etobac.cut">Nicotiana tabacum (tobacco)</option>
                <option value="Etobac_chl.cut">Nicotiana tabacum chloroplast (tobacco)</option>
                <option value="Etrybr.cut">Trypanosoma brucei</option>
                <option value="Etrycr.cut">Trypanosoma cruzi</option>
                <option value="Evibch.cut">Vibrio cholerae O1 biovar eltor str N16961</option>
                <option value="Ewheat.cut">Triticum aestivum (wheat)</option>
                <option value="Exenla.cut">Xenopus laevis</option>
                <option value="Eyeast.cut">Saccharomyces cerevisiae</option>
                <option value="Eyeast_cai.cut">Saccharomyces cerevisiae original CAI set</option>
                <option value="Eyeast_high.cut">Saccharomyces cerevisiae (high)</option>
                <option value="Eyeast_mit.cut">Saccharomyces cerevisiae mitochondrion</option>
                <option value="Eyeren.cut">Yersinia enterocolitica</option>
            </param>
        </section>
    </inputs>
    <outputs>
        <data name="constructs" format="xml" label="Constructs" />
    </outputs>
    <tests>
        <test>
        <!-- test 1: check if identical outputs are produced with default parameters  -->
            <param name="genes_file" value="genes_lycopene.csv" />
            <output name="constructs" >
                <assert_contents>
                    <is_valid_xml />
                    <has_text text='rdf:RDF xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:xsd="http://www.w3.org/2001/XMLSchema#" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:sbol="http://sbols.org/v2#" xmlns:xsd1="http://www.w3.org/2001/XMLSchema#dateTime/" xmlns:om="http://www.ontology-of-units-of-measure.org/resource/om-2/" xmlns:synbiohub="http://synbiohub.org#" xmlns:sbh="http://wiki.synbiohub.org/wiki/Terms/synbiohub#" xmlns:sybio="http://www.sybio.ncl.ac.uk#" xmlns:ncbi="http://www.ncbi.nlm.nih.gov#" xmlns:igem="http://wiki.synbiohub.org/wiki/Terms/igem#" xmlns:genbank="http://www.ncbi.nlm.nih.gov/genbank#" xmlns:gbconv="http://sbols.org/genBankConversion#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:obo="http://purl.obolibrary.org/obo/"'/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
OptDoE
===========

An optimal design of experiments is performed by using OptBioDes based on logistic regression analysis with an assumed linear model for the response. The design is evaluated by its D-efficiency, defined as:

.. math:: D_{eff} = \frac{1}{n}\abs{X^{T}X}^{\frac{1}{p}}

|

Where n is the number of experimental runs or library size, p is the number of independent variables, and X is the model matrix, i.e., a row for each experimental run and a column for each term in the model. The library size n can be selected and should be above a minimal threshold depending on the number of combinatorial complexity of the library.
The experimental design can be evaluated through the provided diagnostics for D-efficiency, power analysis, and relative prediction variance, allowing the optimal selection of library size.
The resulting design is provided as an SBOL collection containing the definition of each DNA component and the combinatorial library of constructs.

.. image:: https://raw.githubusercontent.com/brsynth/synbiocad-galaxy-wrappers/master/optdoe/img/image3.png
	:width: 80 %
	:align: center


.. image:: https://raw.githubusercontent.com/brsynth/synbiocad-galaxy-wrappers/master/optdoe/img/image46.png
	:width: 80 %
	:align: center

|

Above are examples of two constructs generated through optimal combinatorial design and represented using SBOL Visual in SynBioHub.
Note: if advanced parameter "input_parts" is left empty, the following parts are used:


    +---------+------------+--------------------------------------------------+
    | Name    | Type       | SynBioHub                                        |
    +=========+============+==================================================+
    | PlacUV5 | Promoter   | https://synbiohub.org/public/igem/BBa_K1847014/1 |
    +---------+------------+--------------------------------------------------+
    | Ptrc    | Promoter   | https://synbiohub.org/public/igem/BBa_J56012/1   |
    +---------+------------+--------------------------------------------------+
    | BBR1    | Origin     | https://synbiohub.org/public/igem/BBa_I50041/1   |
    +---------+------------+--------------------------------------------------+
    | p15A    | Origin     | https://synbiohub.org/public/igem/BBa_I50032/1   |
    +---------+------------+--------------------------------------------------+
    | ColE1   | Origin     | https://synbiohub.org/public/igem/BBa_J64101/1   |
    +---------+------------+--------------------------------------------------+
    | res1    | Resistance | https://synbiohub.org/public/igem/BBa_I13800/1   |
    +---------+------------+--------------------------------------------------+
    | Ter     | Terminator | https://synbiohub.org/public/igem/BBa_B1006/1    |
    +---------+------------+--------------------------------------------------+

The source of this file can be found here as a model: https://raw.githubusercontent.com/pablocarb/doebase/master/doebase/data/ref_parts.csv

Input
-----

Required:

* **Genes**\ : Output of Selenzyme. The CSV file contains four columnds: Name,Type,Part,Step. The column "Name" contains the Uniprot ID found by Selenzyme. The column "Type" represents the type of the "Part", a *gene*. The column "Part" has the same information as the column "Name". The column "Step" contains the order of the position in the pathway of the gene.

Advanced options:

* **Optimised Genes**\ : SBOL file containing optimized versions of the genes associated with different RBS. Output of the PartsGenie tool.
* **Ref Parts**\ : CSV with the genetic parts. Default (if left empty) is the above table. To generate a new compatible CSV file, please use the "OptDoE Parts Reference Generator" tool
* **Maximal library size**\ : (integer, default: 32) Maximal number of constructs in the SBOL output.
* **Get sequences**\ : (boolean, default: True) Grab the sequence for each part of the SBOL output.
* **Back translate**\ : (boolean, default: True) Translate the protein sequence given by its Uniprot ID to a DNA sequence.
* **Codon Table**\ : (text, default: Eecoli.cut) Codon Usage Table. See `codon_table <https://www.ebi.ac.uk/seqdb/confluence/display/JDSAT/EMBOSS+backtranseq+Help+and+Documentation#EMBOSSbacktranseqHelpandDocumentation-WebServices>`_ for more codons.

Output
------

* **Constructs**\ : Output SBOL file. It contains some combinations of constructs according to the algorithm described above.
    ]]></help>
    <expand macro="creator"/>
    <citations>
        <citation type="bibtex">
        @unpublished{pablocarb
            author = {Pablo Carbonell},
            title = {{doebase}},
            url = {https://github.com/pablocarb/doebase/},
        }
        </citation>
    </citations>
</tool>