view searchFile.xml @ 1:6e3a843b6304 draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:53:18 +0000
parents
children 21ae0e340d80
line wrap: on
line source

<tool id="edu.tamu.cpt.proximity.searchFile" name="Search File" version="1.0">
    <description>Queries a gff3, genbank, fasta, or blastxml file for a user defined set of terms</description>
    <macros>
        <import>macros.xml</import>
        <import>cpt-macros.xml</import>
    </macros>
    <expand macro="requirements">
        <requirement type="package" version="0.10.1">gffutils</requirement>
    </expand>
    <command detect_errors="aggressive"><![CDATA[
python2.7 '$__tool_directory__/searchFile.py'
    #if $check is not None:
    --dbaseTerms "$check"
    #end if
    #if $term_add.term_selector == "customtxt":
    --custom_txt "$term_add.custom_txt"
    #elif $term_add.term_selector == "customfile":
    --custom_file "$term_add.custom_file"
    #else:
    #pass
    #end if
    #for $input_select in $input_files:
        #if $input_select.file_selector.file_select == "gff3selection":
            --gff3_files #for $gff3_file in $input_select.file_selector.gff3_files:
                "${gff3_file}" #end for
        #end if
        #if $input_select.file_selector.file_select == "gbkselection":
            --gbk_files #for $gbk_file in $input_select.file_selector.gbk_files:
                "${gbk_file}" #end for
        #end if
        #if $input_select.file_selector.file_select == "faselection":
            --fa_files #for $fa_file in $input_select.file_selector.fa_files:
                "${fa_file}" #end for
        #end if
        #if $input_select.file_selector.file_select == "blastselection":
            --blast_files #for $blast_file in $input_select.file_selector.blast_files:
                "${blast_file}" #end for
        #end if
    #end for
    #if $prox:
        --prox
    #end if
    --output '$output'
    ]]></command>
    <inputs>
        <param label="Using Proximity to Lysis Pipeline?" name="prox" type="boolean" truevalue="--prox" falsevalue="" checked="false" help="required GFF3 input"/>
        <param name="check" type="select" format="text" optional="true" label="Family terms to search" multiple="true" help="Terms avilable to query from the Lysis-family synonym database (see terms in Shared Data/Lysis family Terms)" display="checkboxes">
            <option value="endolysins" selected="true">Endolysins</option>
            <option value="holins" selected="true">Holins and Anti-holins</option>
            <option value="spanins" selected="true">Spanins</option>
            <option value="endolysin_domains" selected="true">Endolysin Associated Domains</option>
            <option value="spanin_domains" selected="true">Spanin Associated Domains</option>
            <option value="holin_domains" selected="true">Holin Associated Domains</option>
        </param>
        <conditional name="term_add">
            <param name="term_selector" type="select" label="Choose if you'd like to add custom terms">
                <option value="nocustom" selected="false">No Custom Terms</option>
                <option value="customtxt" selected="false">Custom Text</option>
                <option value="customfile" selected="false">Custom File</option>
            </param>
            <when value="customtxt">
                <param name="custom_txt" label="Custom Text" optional="true" type="text" area="true" help="Custom text box for search terms, must be separated by newline (enter)"/>
            </when>
            <when value="customfile">
                <param name="custom_file" label="Custom File" optional="true" type="data" format="txt" help="Custom search terms, uploaded via file, where terms must be separated by newline"/>
            </when>
        </conditional>
        <repeat name="input_files" title="Input Files">
            <conditional name="file_selector">
                <param name="file_select" type="select" label="Choose the type of file(s) you'd like to query">
                    <option value="gff3selection" selected="false">GFF3</option>
                    <option value="gbkselection" selected="false">Genbank</option>
                    <option value="faselection" selected="false">FASTA</option>
                    <option value="blastselection" selected="false">BLAST-XML</option>
                </param>
                <when value="gff3selection">
                    <param name="gff3_files" label="GFF3 Input" optional="true" multiple="true" type="data" format="gff3"/>
                </when>
                <when value="gbkselection">
                    <param name="gbk_files" label="Genbank Input" optional="true" multiple="true" type="data" format="genbank"/>
                </when>
                <when value="faselection">
                    <param name="fa_files" label="FASTA Input" optional="true" multiple="true" type="data" format="fasta"/>
                </when>
                <when value="blastselection">
                    <param name="blast_files" label="BLAST-xml Input" optional="true" multiple="true" type="data" format="xml"/>
                </when>
            </conditional>
        </repeat>
    </inputs>
    <outputs>
        <data format="txt" name="output" label="termHits">
            <change_format>
                <when input="prox" value="--prox" format="gff3"/>
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="prox" value="--prox"/>
            <param name="check" value="spanins"/>
            <repeat name="input_files">
                <conditional name="file_selector">
                    <param name="file_select" value="gff3selection"/>
                    <param name="gff3_files" value="lambda_spaninBLAST.gff3"/>
                </conditional>
            </repeat>
            <output name="output" file="termHits"/>
        </test>
    </tests>
    <help><![CDATA[
INPUT : A user's file(s) of interest (gff3, gbk, fasta, or blast-xml) and query said file(s) with a determined set of search terms. The search terms can be either from the curated lysis synonym database, and/or a custom input via file or text box. Additionally, if a user is performing this script in the proximity to lysis workflow, the selector needs to be selected to true, and only gff3 files will be able to complete the job.

OUTPUT : "termHits.txt" output file has file-type separated returns for features, or lines, that contained hits to a query term. See file search areas, for the regions of each type of file that are searched. If used for the proximity to lysis workflow, the output will be a proxHits.gff3 file that will be able to be fed to the next stages of the pipeline.

DBase Family Term Descriptions :

- Endolysins: enzymes that attack one of the structural bonds of the peptidoglycan. The endolysin step requires the hole-forming function of the holins.

- Holins and Anti-holins: small cytoplasmic membrane proteins that control the timing of lysis by forming a lethal membrane hole at a programmed time (programmed into the holin itself).

- Spanins: A periplasm-spanning protein complex that disrupts the outer membrane of the host in phage lysis. These complexes are currently identified as either a unimolecular spanin (u-spanin) or two-component system (i-spanin and o-spanin pair).

- Endolysins Domains: information pulled from PMID: 30873139

- Spanin Domains: information pulled from http://www.ebi.ac.uk/interpro/search/text/spanin/

- Holin Domains: information pulled from http://www.ebi.ac.uk/interpro/search/text/holin/

Additionally, some terms were added by querying QuickGO, located at https://www.ebi.ac.uk/QuickGO/

File Search Areas:

- gff3 : entire row

- genbank : product and note qualifier of each feature

- fasta : header field

- blast-xml : hit description
    ]]></help>
    <citations>
        <citation type="doi">10.1371/journal.pcbi.1008214</citation>
        <citation type="bibtex">
                @unpublished{galaxyTools,
                author = {C. Ross},
                title = {CPT Galaxy Tools},
                year = {2020-},
                note = {https://github.com/tamu-cpt/galaxy-tools/}
                }
            </citation>
    </citations>
</tool>