view tapscan.xml @ 1:c4f865bd101a draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/tapscan commit af8605d266717ed3453bd2a0947fed79c7098fb3
author bgruening
date Thu, 22 Feb 2024 10:07:53 +0000
parents 196795831b6a
children
line wrap: on
line source

<tool id="tapscan_classify" name="TAPScan Classify" version="4.76+galaxy0" profile="23.0">
    <description>Detect Transcription Associated Proteins (TAPs)</description>
    <edam_topics>
        <edam_topic>topic_0121</edam_topic>
    </edam_topics>
    <requirements>
        <requirement type="package" version="3.3.2">hmmer</requirement>
        <requirement type="package" version="5.26">perl</requirement>
        <requirement type="package" version="4.8">sed</requirement>
    </requirements>
    <required_files>
        <include type="literal" path="tapscan_classify.pl"/>
        <include type="literal" path="tapscan_domains_v13.txt.gz"/>
        <include type="literal" path="tapscan_rules_v82.txt"/>
        <include type="literal" path="tapscan_coverage_values_v11.txt"/>
    </required_files>
    <command detect_errors="aggressive"><![CDATA[

hmmsearch
  --domtblout domtblout.txt
  --cut_ga
  '${__tool_directory__}/tapscan_domains_v13.txt.gz'
  '$protein_fasta_in'

&&

perl '${__tool_directory__}/tapscan_classify.pl'
  domtblout.txt
  '${__tool_directory__}/tapscan_rules_v82.txt'
  '$taps_detected'
  '$taps_family_counts'
  '$taps_detected_extra'
  '${__tool_directory__}/tapscan_coverage_values_v11.txt'

&&

## make the outputs tab-separated for Galaxy compatibility
sed -i -e 's/;/\t/' -e 's/;/\t/' -e 's/;/\t/' -e "1d" '$taps_detected' &&
sed -i -e 's/;/\t/g' -e "1d" '$taps_family_counts' &&
sed -i -e 's/;/\t/4' -e 's/;/\t/' -e 's/;/\t/' -e 's/;/\t/' -e "1d" '$taps_detected_extra' &&

## add header lines for clarity
sed -i '1s/^/sequence ID\tTAP family\tnumber of classifications\tdomains\n/' '$taps_detected' &&
sed -i '1s/^/TAP family\tnumber of detected proteins\n/' '$taps_family_counts' &&
sed -i '1s/^/sequence ID\tTAP family\tsubfamily\tnumber of classifications\tdomains\n/' '$taps_detected_extra'

    ]]></command>
    <inputs>
        <param name="protein_fasta_in" type="data" format="fasta" optional="false" label="Proteins in FASTA format" help=""/>
        <param name="domtblout" type="boolean" checked="false" label="Output the HMMer domain hits table?"/>
    </inputs>
    <outputs>
        <data name="taps_detected" format="tabular" label="${tool.name} on ${on_string}: Detected TAPs - domains and assigned TAP family
for each gene ID">
            <actions>
                <action name="column_names" type="metadata" default="sequence ID,TAP family,number of classifications,domains" />
            </actions>
        </data>
        <data name="taps_family_counts" format="tabular" label="${tool.name} on ${on_string}: Count - Summary of the number of
members for each TAP family">
            <actions>
                <action name="column_names" type="metadata" default="TAP family,number of detected proteins" />
            </actions>
        </data>
        <data name="taps_detected_extra" format="tabular" label="${tool.name} on ${on_string}: Detected TAPs Extra - with subfamiliy information">
            <actions>
                <action name="column_names" type="metadata" default="sequence ID,TAP family,subfamily,number of classifications,domains" />
            </actions>
        </data>
        <data name="domtbl" format="tabular" from_work_dir="domtblout.txt" label="${tool.name} on ${on_string}: HMMer Domain Hits Table">
            <filter>domtblout</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="3">
            <param name="protein_fasta_in" value="PUBLIC_Ectocarpus-sp7_proteins_head.fa" ftype="fasta"/>
            <output name="taps_detected" file="output.1.tsv" ftype="tabular" lines_diff="2" />
            <output name="taps_family_counts" file="output.2.tsv" ftype="tabular" lines_diff="2" />
            <output name="taps_detected_extra" file="output.3.tsv" ftype="tabular" lines_diff="2" />
        </test>
        <test expect_num_outputs="4"><!-- test with domtblout -->
            <param name="protein_fasta_in" value="PUBLIC_Ectocarpus-sp7_proteins_head.fa" ftype="fasta"/>
            <param name="domtblout" value="true"/>
            <output name="taps_detected" file="output.1.tsv" ftype="tabular" />
            <output name="taps_family_counts" file="output.2.tsv" ftype="tabular" />
            <output name="taps_detected_extra" file="output.3.tsv" ftype="tabular" />
            <output name="domtbl" file="output.domtbl.tsv" ftype="tabular" lines_diff="10"/>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

TAPscan is a comprehensive tool for annotating TAPs with a special focus on species
belonging to the Archaeplastida. In general, the detection of TAPs is based on the detection
of highly conserved protein domains.

During the first step, each sequence out of a species protein set is scanned for protein domains
(stored as profile Hidden Markov Models) using hmmsearch. The domains list consists of 154 profile
HMMs and functions as the domain reference during the hmmsearch command.

Afterwards, by running TAPscan, specialized rules are applied to finally
assign the protein sequences to TAP families based on the detected domains in the previous step.
With the latest TAPscan v4, a protein set can be scanned for 137 different TAP families with high
accuracy through applying GA-thresholds and coverage values.

**Output Files**

TAPscan provides the user with three different output files. Each output file is
tab-separated.

- **Output 1: "Detected TAPs"** - contains the detected domains and finally assigned TAP family
  for each gene ID. If domains are assigned to a sequence but not all rules are fulfilled, the
  sequence is assigned to “0_no_family_found”.
- **Output 2: "Family Counts"** is a summary of the number of members for each TAP family.
- **Output 3: "Detected TAPs Extra"** - is similar to output 1 but contains additional
  information about subfamilies.

    ]]></help>
    <creator>
        <organization name="Rensing Lab" url="https://plantco.de"/>
    </creator>
    <citations>
        <!-- TODO: add citation for TAPscan v4 paper when published -->
        <citation type="doi">10.3390/genes12071055</citation>
    </citations>
</tool>