Mercurial > repos > cpt > cpt_putative_osp
diff generate-putative-osp.xml @ 1:05b97a4dce94 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:51:44 +0000 |
parents | |
children | 05244a021b80 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/generate-putative-osp.xml Mon Jun 05 02:51:44 2023 +0000 @@ -0,0 +1,74 @@ +<tool id="edu.tamu.cpt2.spanin.generate-putative-osp" name="OSP candidates" version="1.0"> + <description>constructs a putative list of potential o-spanin from an input genomic FASTA</description> + <macros> + <import>macros.xml</import> + <import>cpt-macros.xml</import> + </macros> + <expand macro="requirements"> + </expand> + <command detect_errors="aggressive"><![CDATA[ +'$__tool_directory__/generate-putative-osp.py' +'$fasta_file' +--strand '$strand' +--switch '$switch' +--osp_on '$osp_on' +--osp_op '$osp_op' +--osp_ob '$osp_ob' +--osp_og '$osp_og' +--osp_min_len '$osp_min_len' +--putative_osp '$putative_osp' +--summary_osp_txt '$summary_osp' +--putative_osp_gff '$putative_osp_gff' +--min_lipo_after '$lipo_min' +--max_lipo_after '$lipo_max' +--osp_max '$osp_max' +]]></command> + <inputs> + <param type="select" label="Strand Choice" name="strand"> + <option value="both">both</option> + <option value="forward">+</option> + <option value="reverse">-</option> + </param> + <param label="Single Genome FASTA" name="fasta_file" type="data" format="fasta"/> + <param label="o-spanin minimal length" name="osp_min_len" type="integer" value="45"/> + <param label="o-spanin maximum length" name="osp_max" type="integer" value="200"/> + <param label="Range Selection; default is all; for a specific range to check for a spanin input integers separated by a colon (eg. 1234:4321)" type="text" name="switch" value="all"/> + <param label="Lipobox minimal distance from start codon" name="osp_min_dist" type="integer" value="10"/> + <param label="Lipobox maximum distance from start codon" name="osp_max_dist" type="integer" value="60" help="Searches for a Lipobox between Lipoboxmin and Lipoboxmax ie [Lipoboxmin,Lipoboxmax]"/> + <param label="Minimum amount of residues after lipobox is found" name="lipo_min" type="integer" value="25"/> + <param label="Maximum amount of residues after lipobox is found" name="lipo_max" type="integer" value="170"/> + </inputs> + <outputs> + <data format="fasta" name="osp_on" label="NucSequences.fa" hidden="true"/> + <data format="fasta" name="osp_op" label="ProtSequences.fa" hidden="true"/> + <data format="bed" name="osp_ob" label="BED_Output.bed" hidden="true"/> + <data format="gff3" name="osp_og" label="GFF_Output.gff" hidden="true"/> + <data format="fasta" name="putative_osp" label="putative_osp.fa"/> + <data format="txt" name="summary_osp" label="summary_osp.txt"/> + <data format="gff3" name="putative_osp_gff" label="putative_osp.gff3"/> + </outputs> + <help><![CDATA[ + +**What it does** +Searches a genome for candidate o-spanins (OSPs), a phage protein involved in outer membrane disruption during Gram-negative bacterial host cell lysis. + + +**METHODOLOGY** + +Locates ALL potential start sequences, based on TTG / ATG / GTG (M / L / V). This list is pared down to those within the user-set min/max lengths. That filtered list generates a set of files with the ORFs in FASTA (nt and aa), BED, and GFF3 file formats. + +For each sequence in the protein FASTA, the tool then checks within the user-specified range (min/max distance from start codon) for a regular expression (RegEx) to identify a potential lipobox. The following residues are allowed for the potential lipobox: + + * [ILMFTV][^REKD][GAS]C + * AW[AGS]C + +Finally, the protein list is filtered for size with user-set periplasmic length parameters, calculated as the number of residues after the putative lipobox. + +**INPUT** --> Genomic FASTA +*NOTE: This tool only takes a SINGLE genomic fasta. It does not work with multiFASTAs.* + +**OUTPUT** --> putative_osp.fa (FASTA) file, putative_osp.gff3, and basic summary statistics file as sumamry_osp.txt +Protein sequences which passed the above filters are returned as the candidate OSPs. +]]></help> + <expand macro="citations-crr"/> +</tool>