Previous changeset 20:d88f0e7350f9 (2018-09-10) Next changeset 22:efc60c8dc54b (2018-09-10) |
Commit message:
Uploaded |
added:
phage_promoter.xml |
b |
diff -r d88f0e7350f9 -r 41f097d98765 phage_promoter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phage_promoter.xml Mon Sep 10 05:16:55 2018 -0400 |
[ |
@@ -0,0 +1,104 @@ +<tool id="get_proms" name="PhagePromoter" version="0.1.0"> + <description> +Get promoters of phage genomes + </description> + <requirements> + <requirement type="package">biopython</requirement> + <requirement type="package">scikit-learn</requirement> + <requirement type="package"> numpy</requirement> + <requirement type="package">pandas</requirement> + </requirements> + <command detect_errors="exit_code" interpreter="python"><![CDATA[ + phage_promoter.py "$input_type.genome_format" "$genome" "$both" "$threshold" "$family" "$bacteria" "$lifecycle" + ]]> + </command> + <inputs> + <conditional name="input_type"> + <param type="select" name="genome_format" label='file format'> + <option value="genbank" selected="yes">genbank</option> + <option value="fasta">fasta</option> + </param> + <when value="genbank"> + <param type="data" name="genome" format="genbank" label='genome'/> + </when> + <when value="fasta"> + <param type="data" name="genome" format="fasta" label='genome'/> + </when> + </conditional> + <param type="boolean" name="both" label='Search both strands' checked="false" truevalue="-both" falsevalue="" /> + <param name="threshold" type="float" value="0.50" label="Threshold" help="Probabilty of being a promoter (float between 0 and 1)" /> + <param type="select" name="family" label='Phage family'> + <option value="Podoviridae" selected="yes">Podoviridae</option> + <option value="Siphoviridae">Siphoviridae</option> + <option value="Myoviridae">Myoviridae</option> + </param> + <param type="select" name="bacteria" label='Host bacteria Genus'> + <option value="Escherichia coli" selected="yes">Escherichia coli</option> + <option value="Salmonella">Salmonella</option> + <option value="Pseudomonas">Pseudomonas</option> + <option value="Yersinia">Yersinia</option> + <option value="Morganella">Morganella</option> + <option value="Cronobacter">Cronobacter</option> + <option value="Staphylococcus">Staphylococcus</option> + <option value="Streptococcus">Streptococcus</option> + <option value="Lactococcus">Lactococcus</option> + <option value="Streptomyces">Streptomyces</option> + <option value="Klebsiella">Klebsiella</option> + <option value="Bacillus">Bacillus</option> + <option value="Pectobacterium">Pectobacterium</option> + <option value="other">other</option> + </param> + <param type="select" name="lifecycle" label='Phage type'> + <option value="virulent" selected="yes">virulent</option> + <option value="temperate">temperate</option> + </param> + </inputs> + <outputs> + <data name="output1" format="html" from_work_dir="output.html" /> + <data name="output2" format="fasta" from_work_dir="output.fasta" /> + </outputs> + <tests> + <test> + <param name="genome_format" value="genbank"/> + <param name="genome" value="NC_015264.gb"/> + <param name="both" value="False"/> + <param name="threshold" value="0.50"/> + <param name="family" value="Podoviridae"/> + <param name="bacteria" value="Pseudomonas"/> + <param name="lifecycle" value="virulent"/> + <output name="output1" file="output.html"/> + <output name="output2" file="output.fasta"/> + </test> + </tests> + <help><![CDATA[ + +=============== +PhagePromoters +=============== + +Get promoters of phage genomes + +PhagePromoters is a python script that predicts promoter sequences in phage genomes, using a machine learning SVM model. This model was built from a train dataset with 19 features and 3200 examples (800 positives and 2400 negatives), each representing a 65 bp sequence of a phage genome. The positive cases represent the phage sequences that are already identified as promoters. + +**Inputs:** + +* genome format: fasta vs genbank; +* genome file: acepts both genbank and fasta formats; +* both strands (yes or no): allows the search in both DNA strands; +* threshold: represents the probability of the test sequence being a promoter (float between 0 and 1, default 0.50)". For example, if threshold=0.90, the model only returns the predicted sequences with more than 90% probability of being a promoter. The larger the genome, the higher the threshold should be. +* family: The family of the testing phage - Podoviridae, Siphoviridae or Myoviridae; +* Bacteria: The host of the phage. The train dataset include the following hosts: Bacillus, EColi, Salmonella, Pseudomonas, Yersinia, Klebsiella, Pectobacterium, Morganella, Cronobacter, Staphylococcus, Streptococcus, Streptomyces, Lactococcus. If the testing phage has a different host, select the option 'other', and it is recommended the use of a higher threshold value for more accurate results. +* phage type: The type of the phage, according to its lifecycle: virulent or temperate; + +**Outputs:** +This tool outputs two files: a FASTA file and a table in HTML, with the locations, sequence, score and type (recognized by host or phage RNAP) of the predicted promoters. + +**Requirements:** + +* Biopython +* Sklearn +* Numpy +* Pandas + + ]]></help> +</tool> |