diff optdoe.xml @ 0:cae2975d264f draft

planemo upload for repository ['https://github.com/brsynth/icfree-ml', 'https://github.com/pablocarb/doebase'] commit 6c01728900e4ebd1a833a7e7d697f61561d86dc6-dirty
author tduigou
date Tue, 28 Mar 2023 12:32:52 +0000
parents
children e56802e12fea
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/optdoe.xml	Tue Mar 28 12:32:52 2023 +0000
@@ -0,0 +1,241 @@
+<tool id="doe_synbio_optdoe" name="Optimal Design of Experiment" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="@LICENCE@">
+    <description>Combine selected genetic parts and enzyme variants for the desired pathways</description>
+    <macros>
+        <import>macros.xml</import>
+        <token name="@TOOL_VERSION@">v2.0.2</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">doebase</requirement>
+    </requirements>
+    <expand macro="stdio"/>
+    <command detect_errors="exit_code"><![CDATA[
+        python -m doebase
+            --func doeGetSBOL
+            #if $adv.genes_sbol_file
+                --sbol_file '$adv.genes_sbol_file'
+            #end if
+            #if $adv.ref_parts_file
+                --ref_parts_file '$adv.ref_parts_file'
+            #end if
+            --libsize '$adv.libsize'
+            $adv.get_sequences
+            $adv.backtranslate
+            --codon_table '$adv.codon_table'
+            '$genes_file'
+            '$constructs'
+    ]]></command>
+    <inputs>
+        <param name="genes_file" type="data" format="csv" label="Genes" help="CSV file with header: Name,Type,Part,Step. Provided by the tool Selenzyme"/>
+        <section name="adv" title="Advanced Options" expanded="false">
+            <param name="genes_sbol_file" type="data" format="xml" optional="true" label="Optimised Genes" help="SBOL file with genes associated with different RBS. Provided by the tool PartsGenie"/>
+            <param argument="--ref_parts_file" type="data" format="csv" value="None" optional="true" label="Ref Parts (default: E. coli compliant)" help="Provide fixed parts"/>
+            <param argument="--libsize" type="integer" value="32" min="32" label="Maximal library size" help="Maximal number of contructs in the output"/>
+            <param argument="--get_sequences" type="boolean" truevalue="--get_sequences true" falsevalue="--get_sequences false" checked="true" label="Get sequences" help="Grab the sequence for each part of the SBOL output"/>
+            <param argument="--backtranslate" type="boolean" truevalue="--backtranslate true" falsevalue="--backtranslate false" checked="true" label="Back translate" help="Translate the protein sequence given by its Uniprot ID to a DNA sequence"/>
+            <param argument="--codon_table" type="select" label="Codon Table">
+                <option value="Eecoli.cut">Escherichia coli K12</option>
+                <option value="Ehuman.cut">Homo sapiens</option>
+                <option value="Eacica.cut">Acinetobacter calcoaceticus</option>
+                <option value="Eagrtu.cut">Agrobacterium tumefaciens str C58</option>
+                <option value="Eanasp.cut">Anabaena sp</option>
+                <option value="Earath.cut">Arabidopsis thaliana</option>
+                <option value="Eazovi.cut">Azotobacter vinelandii</option>
+                <option value="Ebacme.cut">Bacillus megaterium</option>
+                <option value="Ebacst.cut">Geobacillus stearothermophilus</option>
+                <option value="Ebacsu.cut">Bacillus subtilis</option>
+                <option value="Ebacsu_high.cut">Bacillus subtilis (high)</option>
+                <option value="Ebommo.cut">Bombyx mori (silkmoth)</option>
+                <option value="Ebovin.cut">Bos taurus</option>
+                <option value="Ebpphx.cut">Bacteriophage phi-X174</option>
+                <option value="Ebraja.cut">Bradyrhizobium japonicum USDA 110</option>
+                <option value="Ebrana.cut">Brassica napus (rape)</option>
+                <option value="Ebrare.cut">Brachydanio rerio (zebrafish)</option>
+                <option value="Ecaeel.cut">Caenorhabditis elegans (nematode)</option>
+                <option value="Ecanal.cut">Candida albicans</option>
+                <option value="Ecanfa.cut">Canis familiaris (dog)</option>
+                <option value="Ecaucr.cut">Caulobacter crescentus CB15</option>
+                <option value="Echick.cut">Gallus gallus (chicken)</option>
+                <option value="Echlre.cut">Chlamydomonas reinhardtii</option>
+                <option value="Echltr.cut">Chlamydia trachomatis D UW-3 CX</option>
+                <option value="Ecloab.cut">Clostridium acetobutylicum ATCC 824</option>
+                <option value="Ecrigr.cut">Cricetulus greseus (hamster)</option>
+                <option value="Ecyapa.cut">Cyanophora paradoxa</option>
+                <option value="Edicdi.cut">Dictyostelium discoideum</option>
+                <option value="Edicdi_high.cut">Dictyostelium discoideum (high)</option>
+                <option value="Edrome.cut">Drosophila melanogaster</option>
+                <option value="Edrome_high.cut">Drosophila melanogaster (high)</option>
+                <option value="Eecoli_high.cut">Escherichia coli K12 (high)</option>
+                <option value="Eemeni.cut">Emericella nidulans</option>
+                <option value="Eemeni_mit.cut">Emericella nidulans mitochondrion</option>
+                <option value="Eemeni_high.cut">Emericella nidulans highly expressed</option>
+                <option value="Eerwct.cut">Erwinia carotovora subsp atroseptica SCRI1043</option>
+                <option value="Ehaein.cut">Haemophilus influenzae Rd KW20</option>
+                <option value="Ehalma.cut">Haloarcula marismortui ATCC 43049 (Halobacterium marismortui)</option>
+                <option value="Ehalsa.cut">Halobacterium salinarum NRC-1 (Halobacterium halobium)</option>
+                <option value="Ehorvu.cut">Hordeum vulgare subsp vulgare (Barley)</option>
+                <option value="Eklepn.cut">Klebsiella pneumoniae</option>
+                <option value="Eklula.cut">Kluyveromyces lactis NRRL Y-1140</option>
+                <option value="Elacdl.cut">Lactococcus lactis subsp lactis Il1403 L. delbrueckii subsp lactis</option>
+                <option value="Elyces.cut">Lycopersicon esculentum (tomato)</option>
+                <option value="Emacfa.cut">Macaca fascicularis</option>
+                <option value="Emaize.cut">Zea mays</option>
+                <option value="Emaize_chl.cut">Zea mays chloroplast</option>
+                <option value="Emammal_high.cut">Mammalian species (high)</option>
+                <option value="Emanse.cut">Manduca sexta</option>
+                <option value="Emarpo_chl.cut">Marchantia polymorpha chloroplast (liverwort)</option>
+                <option value="Emedsa.cut">Medicago sativa</option>
+                <option value="Emetth.cut">Methanothermobacter thermautotrophicus str Delta H</option>
+                <option value="Emouse.cut">Mus musculus</option>
+                <option value="Emyctu.cut">Mycobacterium tuberculosis H37Rv</option>
+                <option value="Eneigo.cut">Neisseria gonorrhoeae</option>
+                <option value="Eneucr.cut">Neurospora crassa</option>
+                <option value="Eoncmy.cut">Oncorhynchus mykiss (rainbow trout)</option>
+                <option value="Eorysa.cut">Oryza sativa (rice)</option>
+                <option value="Eorysa_chl.cut">Oryza sativa chloroplast (rice)</option>
+                <option value="Epea.cut">Pisum sativum (pea)</option>
+                <option value="Epethy.cut">Petunia x hybrida</option>
+                <option value="Ephavu.cut">Phaseolus vulgaris</option>
+                <option value="Epig.cut">Sus scrofa (pig)</option>
+                <option value="Eplafa.cut">Plasmodium falciparum 3D7</option>
+                <option value="Eprovu.cut">Proteus vulgaris</option>
+                <option value="Epseae.cut">Pseudomonas aeruginosa PAO1</option>
+                <option value="Epsepu.cut">Pseudomonas putida KT2440</option>
+                <option value="Epsesm.cut">Pseudomonas syringae pv tomato str DC3000</option>
+                <option value="Erabit.cut">Oryctolagus cuniculus (rabbit)</option>
+                <option value="Erat.cut">Rattus norvegicus</option>
+                <option value="Erhile.cut">Rhizobium leguminosarum</option>
+                <option value="Erhime.cut">Sinorhizobium meliloti 1021</option>
+                <option value="Erhoca.cut">Rhodobacter capsulatus</option>
+                <option value="Erhosh.cut">Rhodobacter sphaeroides</option>
+                <option value="Esalsa.cut">Salmo salar (Atlantic salmon)</option>
+                <option value="Esalty.cut">Salmonella typhimurium LT2</option>
+                <option value="Eschma.cut">Schistosoma mansoni</option>
+                <option value="Eschpo.cut">Schizosaccharomyces pombe</option>
+                <option value="Eschpo_cai.cut">Schizosaccharomyces pombe (CAI equivalent)</option>
+                <option value="Eschpo_high.cut">Schizosaccharomyces pombe (high)</option>
+                <option value="Eserma.cut">Serratia marcescens</option>
+                <option value="Esheep.cut">Ovis aries (sheep)</option>
+                <option value="Esoltu.cut">Solanum tuberosum (potato)</option>
+                <option value="Esoybn.cut">Glycine max (soybean)</option>
+                <option value="Espiol.cut">Spinacia oleracea (spinach)</option>
+                <option value="Estaau.cut">Staphylococcus aureus</option>
+                <option value="Estrco.cut">Streptomyces coelicolor A3 2</option>
+                <option value="Estrmu.cut">Streptococcus mutans UA159</option>
+                <option value="Estrpn.cut">Streptococcus pneumoniae R6</option>
+                <option value="Estrpu.cut">Strongylocentrotus purpuratus (sea urchin)</option>
+                <option value="Esv40.cut">Simian Virus 40</option>
+                <option value="Esynco.cut">Synechococcus sp WH 8102</option>
+                <option value="Esyncy.cut">Synechocystis sp PCC 6803</option>
+                <option value="Etetth.cut">Tetrahymena thermophila</option>
+                <option value="Etheth.cut">Thermus thermophilus HB8</option>
+                <option value="Etobac.cut">Nicotiana tabacum (tobacco)</option>
+                <option value="Etobac_chl.cut">Nicotiana tabacum chloroplast (tobacco)</option>
+                <option value="Etrybr.cut">Trypanosoma brucei</option>
+                <option value="Etrycr.cut">Trypanosoma cruzi</option>
+                <option value="Evibch.cut">Vibrio cholerae O1 biovar eltor str N16961</option>
+                <option value="Ewheat.cut">Triticum aestivum (wheat)</option>
+                <option value="Exenla.cut">Xenopus laevis</option>
+                <option value="Eyeast.cut">Saccharomyces cerevisiae</option>
+                <option value="Eyeast_cai.cut">Saccharomyces cerevisiae original CAI set</option>
+                <option value="Eyeast_high.cut">Saccharomyces cerevisiae (high)</option>
+                <option value="Eyeast_mit.cut">Saccharomyces cerevisiae mitochondrion</option>
+                <option value="Eyeren.cut">Yersinia enterocolitica</option>
+            </param>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="constructs" format="xml" label="Constructs" />
+    </outputs>
+    <tests>
+        <test>
+        <!-- test 1: check if identical outputs are produced with default parameters  -->
+            <param name="genes_file" value="genes_lycopene.csv" />
+            <output name="constructs" >
+                <assert_contents>
+                    <is_valid_xml />
+                    <has_text text='rdf:RDF xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:xsd="http://www.w3.org/2001/XMLSchema#" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:sbol="http://sbols.org/v2#" xmlns:xsd1="http://www.w3.org/2001/XMLSchema#dateTime/" xmlns:om="http://www.ontology-of-units-of-measure.org/resource/om-2/" xmlns:synbiohub="http://synbiohub.org#" xmlns:sbh="http://wiki.synbiohub.org/wiki/Terms/synbiohub#" xmlns:sybio="http://www.sybio.ncl.ac.uk#" xmlns:ncbi="http://www.ncbi.nlm.nih.gov#" xmlns:igem="http://wiki.synbiohub.org/wiki/Terms/igem#" xmlns:genbank="http://www.ncbi.nlm.nih.gov/genbank#" xmlns:gbconv="http://sbols.org/genBankConversion#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:obo="http://purl.obolibrary.org/obo/"'/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+OptDoE
+===========
+
+An optimal design of experiments is performed by using OptBioDes based on logistic regression analysis with an assumed linear model for the response. The design is evaluated by its D-efficiency, defined as:
+
+.. math:: D_{eff} = \frac{1}{n}\abs{X^{T}X}^{\frac{1}{p}}
+
+|
+
+Where n is the number of experimental runs or library size, p is the number of independent variables, and X is the model matrix, i.e., a row for each experimental run and a column for each term in the model. The library size n can be selected and should be above a minimal threshold depending on the number of combinatorial complexity of the library.
+The experimental design can be evaluated through the provided diagnostics for D-efficiency, power analysis, and relative prediction variance, allowing the optimal selection of library size.
+The resulting design is provided as an SBOL collection containing the definition of each DNA component and the combinatorial library of constructs.
+
+.. image:: https://raw.githubusercontent.com/brsynth/synbiocad-galaxy-wrappers/master/optdoe/img/image3.png
+	:width: 80 %
+	:align: center
+
+
+.. image:: https://raw.githubusercontent.com/brsynth/synbiocad-galaxy-wrappers/master/optdoe/img/image46.png
+	:width: 80 %
+	:align: center
+
+|
+
+Above are examples of two constructs generated through optimal combinatorial design and represented using SBOL Visual in SynBioHub.
+Note: if advanced parameter "input_parts" is left empty, the following parts are used:
+
+
+    +---------+------------+--------------------------------------------------+
+    | Name    | Type       | SynBioHub                                        |
+    +=========+============+==================================================+
+    | PlacUV5 | Promoter   | https://synbiohub.org/public/igem/BBa_K1847014/1 |
+    +---------+------------+--------------------------------------------------+
+    | Ptrc    | Promoter   | https://synbiohub.org/public/igem/BBa_J56012/1   |
+    +---------+------------+--------------------------------------------------+
+    | BBR1    | Origin     | https://synbiohub.org/public/igem/BBa_I50041/1   |
+    +---------+------------+--------------------------------------------------+
+    | p15A    | Origin     | https://synbiohub.org/public/igem/BBa_I50032/1   |
+    +---------+------------+--------------------------------------------------+
+    | ColE1   | Origin     | https://synbiohub.org/public/igem/BBa_J64101/1   |
+    +---------+------------+--------------------------------------------------+
+    | res1    | Resistance | https://synbiohub.org/public/igem/BBa_I13800/1   |
+    +---------+------------+--------------------------------------------------+
+    | Ter     | Terminator | https://synbiohub.org/public/igem/BBa_B1006/1    |
+    +---------+------------+--------------------------------------------------+
+
+The source of this file can be found here as a model: https://raw.githubusercontent.com/pablocarb/doebase/master/doebase/data/ref_parts.csv
+
+Input
+-----
+
+Required:
+
+* **Genes**\ : Output of Selenzyme. The CSV file contains four columnds: Name,Type,Part,Step. The column "Name" contains the Uniprot ID found by Selenzyme. The column "Type" represents the type of the "Part", a *gene*. The column "Part" has the same information as the column "Name". The column "Step" contains the order of the position in the pathway of the gene.
+
+Advanced options:
+
+* **Optimised Genes**\ : SBOL file containing optimized versions of the genes associated with different RBS. Output of the PartsGenie tool.
+* **Ref Parts**\ : CSV with the genetic parts. Default (if left empty) is the above table. To generate a new compatible CSV file, please use the "OptDoE Parts Reference Generator" tool
+* **Maximal library size**\ : (integer, default: 32) Maximal number of constructs in the SBOL output.
+* **Get sequences**\ : (boolean, default: True) Grab the sequence for each part of the SBOL output.
+* **Back translate**\ : (boolean, default: True) Translate the protein sequence given by its Uniprot ID to a DNA sequence.
+* **Codon Table**\ : (text, default: Eecoli.cut) Codon Usage Table. See `codon_table <https://www.ebi.ac.uk/seqdb/confluence/display/JDSAT/EMBOSS+backtranseq+Help+and+Documentation#EMBOSSbacktranseqHelpandDocumentation-WebServices>`_ for more codons.
+
+Output
+------
+
+* **Constructs**\ : Output SBOL file. It contains some combinations of constructs according to the algorithm described above.
+    ]]></help>
+    <expand macro="creator"/>
+    <citations>
+        <citation type="bibtex">
+        @unpublished{pablocarb
+            author = {Pablo Carbonell},
+            title = {{doebase}},
+            url = {https://github.com/pablocarb/doebase/},
+        }
+        </citation>
+    </citations>
+</tool>