Mercurial > repos > tduigou > doe_synbio_optdoe
diff optdoe.xml @ 0:cae2975d264f draft
planemo upload for repository ['https://github.com/brsynth/icfree-ml', 'https://github.com/pablocarb/doebase'] commit 6c01728900e4ebd1a833a7e7d697f61561d86dc6-dirty
author | tduigou |
---|---|
date | Tue, 28 Mar 2023 12:32:52 +0000 |
parents | |
children | e56802e12fea |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/optdoe.xml Tue Mar 28 12:32:52 2023 +0000 @@ -0,0 +1,241 @@ +<tool id="doe_synbio_optdoe" name="Optimal Design of Experiment" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="@LICENCE@"> + <description>Combine selected genetic parts and enzyme variants for the desired pathways</description> + <macros> + <import>macros.xml</import> + <token name="@TOOL_VERSION@">v2.0.2</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">doebase</requirement> + </requirements> + <expand macro="stdio"/> + <command detect_errors="exit_code"><![CDATA[ + python -m doebase + --func doeGetSBOL + #if $adv.genes_sbol_file + --sbol_file '$adv.genes_sbol_file' + #end if + #if $adv.ref_parts_file + --ref_parts_file '$adv.ref_parts_file' + #end if + --libsize '$adv.libsize' + $adv.get_sequences + $adv.backtranslate + --codon_table '$adv.codon_table' + '$genes_file' + '$constructs' + ]]></command> + <inputs> + <param name="genes_file" type="data" format="csv" label="Genes" help="CSV file with header: Name,Type,Part,Step. Provided by the tool Selenzyme"/> + <section name="adv" title="Advanced Options" expanded="false"> + <param name="genes_sbol_file" type="data" format="xml" optional="true" label="Optimised Genes" help="SBOL file with genes associated with different RBS. Provided by the tool PartsGenie"/> + <param argument="--ref_parts_file" type="data" format="csv" value="None" optional="true" label="Ref Parts (default: E. coli compliant)" help="Provide fixed parts"/> + <param argument="--libsize" type="integer" value="32" min="32" label="Maximal library size" help="Maximal number of contructs in the output"/> + <param argument="--get_sequences" type="boolean" truevalue="--get_sequences true" falsevalue="--get_sequences false" checked="true" label="Get sequences" help="Grab the sequence for each part of the SBOL output"/> + <param argument="--backtranslate" type="boolean" truevalue="--backtranslate true" falsevalue="--backtranslate false" checked="true" label="Back translate" help="Translate the protein sequence given by its Uniprot ID to a DNA sequence"/> + <param argument="--codon_table" type="select" label="Codon Table"> + <option value="Eecoli.cut">Escherichia coli K12</option> + <option value="Ehuman.cut">Homo sapiens</option> + <option value="Eacica.cut">Acinetobacter calcoaceticus</option> + <option value="Eagrtu.cut">Agrobacterium tumefaciens str C58</option> + <option value="Eanasp.cut">Anabaena sp</option> + <option value="Earath.cut">Arabidopsis thaliana</option> + <option value="Eazovi.cut">Azotobacter vinelandii</option> + <option value="Ebacme.cut">Bacillus megaterium</option> + <option value="Ebacst.cut">Geobacillus stearothermophilus</option> + <option value="Ebacsu.cut">Bacillus subtilis</option> + <option value="Ebacsu_high.cut">Bacillus subtilis (high)</option> + <option value="Ebommo.cut">Bombyx mori (silkmoth)</option> + <option value="Ebovin.cut">Bos taurus</option> + <option value="Ebpphx.cut">Bacteriophage phi-X174</option> + <option value="Ebraja.cut">Bradyrhizobium japonicum USDA 110</option> + <option value="Ebrana.cut">Brassica napus (rape)</option> + <option value="Ebrare.cut">Brachydanio rerio (zebrafish)</option> + <option value="Ecaeel.cut">Caenorhabditis elegans (nematode)</option> + <option value="Ecanal.cut">Candida albicans</option> + <option value="Ecanfa.cut">Canis familiaris (dog)</option> + <option value="Ecaucr.cut">Caulobacter crescentus CB15</option> + <option value="Echick.cut">Gallus gallus (chicken)</option> + <option value="Echlre.cut">Chlamydomonas reinhardtii</option> + <option value="Echltr.cut">Chlamydia trachomatis D UW-3 CX</option> + <option value="Ecloab.cut">Clostridium acetobutylicum ATCC 824</option> + <option value="Ecrigr.cut">Cricetulus greseus (hamster)</option> + <option value="Ecyapa.cut">Cyanophora paradoxa</option> + <option value="Edicdi.cut">Dictyostelium discoideum</option> + <option value="Edicdi_high.cut">Dictyostelium discoideum (high)</option> + <option value="Edrome.cut">Drosophila melanogaster</option> + <option value="Edrome_high.cut">Drosophila melanogaster (high)</option> + <option value="Eecoli_high.cut">Escherichia coli K12 (high)</option> + <option value="Eemeni.cut">Emericella nidulans</option> + <option value="Eemeni_mit.cut">Emericella nidulans mitochondrion</option> + <option value="Eemeni_high.cut">Emericella nidulans highly expressed</option> + <option value="Eerwct.cut">Erwinia carotovora subsp atroseptica SCRI1043</option> + <option value="Ehaein.cut">Haemophilus influenzae Rd KW20</option> + <option value="Ehalma.cut">Haloarcula marismortui ATCC 43049 (Halobacterium marismortui)</option> + <option value="Ehalsa.cut">Halobacterium salinarum NRC-1 (Halobacterium halobium)</option> + <option value="Ehorvu.cut">Hordeum vulgare subsp vulgare (Barley)</option> + <option value="Eklepn.cut">Klebsiella pneumoniae</option> + <option value="Eklula.cut">Kluyveromyces lactis NRRL Y-1140</option> + <option value="Elacdl.cut">Lactococcus lactis subsp lactis Il1403 L. delbrueckii subsp lactis</option> + <option value="Elyces.cut">Lycopersicon esculentum (tomato)</option> + <option value="Emacfa.cut">Macaca fascicularis</option> + <option value="Emaize.cut">Zea mays</option> + <option value="Emaize_chl.cut">Zea mays chloroplast</option> + <option value="Emammal_high.cut">Mammalian species (high)</option> + <option value="Emanse.cut">Manduca sexta</option> + <option value="Emarpo_chl.cut">Marchantia polymorpha chloroplast (liverwort)</option> + <option value="Emedsa.cut">Medicago sativa</option> + <option value="Emetth.cut">Methanothermobacter thermautotrophicus str Delta H</option> + <option value="Emouse.cut">Mus musculus</option> + <option value="Emyctu.cut">Mycobacterium tuberculosis H37Rv</option> + <option value="Eneigo.cut">Neisseria gonorrhoeae</option> + <option value="Eneucr.cut">Neurospora crassa</option> + <option value="Eoncmy.cut">Oncorhynchus mykiss (rainbow trout)</option> + <option value="Eorysa.cut">Oryza sativa (rice)</option> + <option value="Eorysa_chl.cut">Oryza sativa chloroplast (rice)</option> + <option value="Epea.cut">Pisum sativum (pea)</option> + <option value="Epethy.cut">Petunia x hybrida</option> + <option value="Ephavu.cut">Phaseolus vulgaris</option> + <option value="Epig.cut">Sus scrofa (pig)</option> + <option value="Eplafa.cut">Plasmodium falciparum 3D7</option> + <option value="Eprovu.cut">Proteus vulgaris</option> + <option value="Epseae.cut">Pseudomonas aeruginosa PAO1</option> + <option value="Epsepu.cut">Pseudomonas putida KT2440</option> + <option value="Epsesm.cut">Pseudomonas syringae pv tomato str DC3000</option> + <option value="Erabit.cut">Oryctolagus cuniculus (rabbit)</option> + <option value="Erat.cut">Rattus norvegicus</option> + <option value="Erhile.cut">Rhizobium leguminosarum</option> + <option value="Erhime.cut">Sinorhizobium meliloti 1021</option> + <option value="Erhoca.cut">Rhodobacter capsulatus</option> + <option value="Erhosh.cut">Rhodobacter sphaeroides</option> + <option value="Esalsa.cut">Salmo salar (Atlantic salmon)</option> + <option value="Esalty.cut">Salmonella typhimurium LT2</option> + <option value="Eschma.cut">Schistosoma mansoni</option> + <option value="Eschpo.cut">Schizosaccharomyces pombe</option> + <option value="Eschpo_cai.cut">Schizosaccharomyces pombe (CAI equivalent)</option> + <option value="Eschpo_high.cut">Schizosaccharomyces pombe (high)</option> + <option value="Eserma.cut">Serratia marcescens</option> + <option value="Esheep.cut">Ovis aries (sheep)</option> + <option value="Esoltu.cut">Solanum tuberosum (potato)</option> + <option value="Esoybn.cut">Glycine max (soybean)</option> + <option value="Espiol.cut">Spinacia oleracea (spinach)</option> + <option value="Estaau.cut">Staphylococcus aureus</option> + <option value="Estrco.cut">Streptomyces coelicolor A3 2</option> + <option value="Estrmu.cut">Streptococcus mutans UA159</option> + <option value="Estrpn.cut">Streptococcus pneumoniae R6</option> + <option value="Estrpu.cut">Strongylocentrotus purpuratus (sea urchin)</option> + <option value="Esv40.cut">Simian Virus 40</option> + <option value="Esynco.cut">Synechococcus sp WH 8102</option> + <option value="Esyncy.cut">Synechocystis sp PCC 6803</option> + <option value="Etetth.cut">Tetrahymena thermophila</option> + <option value="Etheth.cut">Thermus thermophilus HB8</option> + <option value="Etobac.cut">Nicotiana tabacum (tobacco)</option> + <option value="Etobac_chl.cut">Nicotiana tabacum chloroplast (tobacco)</option> + <option value="Etrybr.cut">Trypanosoma brucei</option> + <option value="Etrycr.cut">Trypanosoma cruzi</option> + <option value="Evibch.cut">Vibrio cholerae O1 biovar eltor str N16961</option> + <option value="Ewheat.cut">Triticum aestivum (wheat)</option> + <option value="Exenla.cut">Xenopus laevis</option> + <option value="Eyeast.cut">Saccharomyces cerevisiae</option> + <option value="Eyeast_cai.cut">Saccharomyces cerevisiae original CAI set</option> + <option value="Eyeast_high.cut">Saccharomyces cerevisiae (high)</option> + <option value="Eyeast_mit.cut">Saccharomyces cerevisiae mitochondrion</option> + <option value="Eyeren.cut">Yersinia enterocolitica</option> + </param> + </section> + </inputs> + <outputs> + <data name="constructs" format="xml" label="Constructs" /> + </outputs> + <tests> + <test> + <!-- test 1: check if identical outputs are produced with default parameters --> + <param name="genes_file" value="genes_lycopene.csv" /> + <output name="constructs" > + <assert_contents> + <is_valid_xml /> + <has_text text='rdf:RDF xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:xsd="http://www.w3.org/2001/XMLSchema#" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:sbol="http://sbols.org/v2#" xmlns:xsd1="http://www.w3.org/2001/XMLSchema#dateTime/" xmlns:om="http://www.ontology-of-units-of-measure.org/resource/om-2/" xmlns:synbiohub="http://synbiohub.org#" xmlns:sbh="http://wiki.synbiohub.org/wiki/Terms/synbiohub#" xmlns:sybio="http://www.sybio.ncl.ac.uk#" xmlns:ncbi="http://www.ncbi.nlm.nih.gov#" xmlns:igem="http://wiki.synbiohub.org/wiki/Terms/igem#" xmlns:genbank="http://www.ncbi.nlm.nih.gov/genbank#" xmlns:gbconv="http://sbols.org/genBankConversion#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:obo="http://purl.obolibrary.org/obo/"'/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +OptDoE +=========== + +An optimal design of experiments is performed by using OptBioDes based on logistic regression analysis with an assumed linear model for the response. The design is evaluated by its D-efficiency, defined as: + +.. math:: D_{eff} = \frac{1}{n}\abs{X^{T}X}^{\frac{1}{p}} + +| + +Where n is the number of experimental runs or library size, p is the number of independent variables, and X is the model matrix, i.e., a row for each experimental run and a column for each term in the model. The library size n can be selected and should be above a minimal threshold depending on the number of combinatorial complexity of the library. +The experimental design can be evaluated through the provided diagnostics for D-efficiency, power analysis, and relative prediction variance, allowing the optimal selection of library size. +The resulting design is provided as an SBOL collection containing the definition of each DNA component and the combinatorial library of constructs. + +.. image:: https://raw.githubusercontent.com/brsynth/synbiocad-galaxy-wrappers/master/optdoe/img/image3.png + :width: 80 % + :align: center + + +.. image:: https://raw.githubusercontent.com/brsynth/synbiocad-galaxy-wrappers/master/optdoe/img/image46.png + :width: 80 % + :align: center + +| + +Above are examples of two constructs generated through optimal combinatorial design and represented using SBOL Visual in SynBioHub. +Note: if advanced parameter "input_parts" is left empty, the following parts are used: + + + +---------+------------+--------------------------------------------------+ + | Name | Type | SynBioHub | + +=========+============+==================================================+ + | PlacUV5 | Promoter | https://synbiohub.org/public/igem/BBa_K1847014/1 | + +---------+------------+--------------------------------------------------+ + | Ptrc | Promoter | https://synbiohub.org/public/igem/BBa_J56012/1 | + +---------+------------+--------------------------------------------------+ + | BBR1 | Origin | https://synbiohub.org/public/igem/BBa_I50041/1 | + +---------+------------+--------------------------------------------------+ + | p15A | Origin | https://synbiohub.org/public/igem/BBa_I50032/1 | + +---------+------------+--------------------------------------------------+ + | ColE1 | Origin | https://synbiohub.org/public/igem/BBa_J64101/1 | + +---------+------------+--------------------------------------------------+ + | res1 | Resistance | https://synbiohub.org/public/igem/BBa_I13800/1 | + +---------+------------+--------------------------------------------------+ + | Ter | Terminator | https://synbiohub.org/public/igem/BBa_B1006/1 | + +---------+------------+--------------------------------------------------+ + +The source of this file can be found here as a model: https://raw.githubusercontent.com/pablocarb/doebase/master/doebase/data/ref_parts.csv + +Input +----- + +Required: + +* **Genes**\ : Output of Selenzyme. The CSV file contains four columnds: Name,Type,Part,Step. The column "Name" contains the Uniprot ID found by Selenzyme. The column "Type" represents the type of the "Part", a *gene*. The column "Part" has the same information as the column "Name". The column "Step" contains the order of the position in the pathway of the gene. + +Advanced options: + +* **Optimised Genes**\ : SBOL file containing optimized versions of the genes associated with different RBS. Output of the PartsGenie tool. +* **Ref Parts**\ : CSV with the genetic parts. Default (if left empty) is the above table. To generate a new compatible CSV file, please use the "OptDoE Parts Reference Generator" tool +* **Maximal library size**\ : (integer, default: 32) Maximal number of constructs in the SBOL output. +* **Get sequences**\ : (boolean, default: True) Grab the sequence for each part of the SBOL output. +* **Back translate**\ : (boolean, default: True) Translate the protein sequence given by its Uniprot ID to a DNA sequence. +* **Codon Table**\ : (text, default: Eecoli.cut) Codon Usage Table. See `codon_table <https://www.ebi.ac.uk/seqdb/confluence/display/JDSAT/EMBOSS+backtranseq+Help+and+Documentation#EMBOSSbacktranseqHelpandDocumentation-WebServices>`_ for more codons. + +Output +------ + +* **Constructs**\ : Output SBOL file. It contains some combinations of constructs according to the algorithm described above. + ]]></help> + <expand macro="creator"/> + <citations> + <citation type="bibtex"> + @unpublished{pablocarb + author = {Pablo Carbonell}, + title = {{doebase}}, + url = {https://github.com/pablocarb/doebase/}, + } + </citation> + </citations> +</tool>