view gffcompare.xml @ 2:f99d7825a501 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gffcompare commit a5352fb7f073b0083b750c5cf7d283ebcc16b30d
author iuc
date Tue, 05 Feb 2019 15:51:44 -0500
parents c80cdc2eac6d
children 2bb86e2c417f
line wrap: on
line source

<tool id="gffcompare" name="GffCompare" version="0.10.6">
    <description>compare assembled transcripts to a reference annotation</description>
    <requirements>
        <requirement type="package" version="0.10.6">gffcompare</requirement>
    </requirements>
    <version_command>gffcompare -v | awk '{print $2}'</version_command>
    <command detect_errors="aggressive"><![CDATA[
#import re
#set escaped_element_identifiers = [re.sub('[^\w\-]', '_', str(_.element_identifier)) for _ in $gffinputs]
#for $input, $escaped_element_identifier in zip($gffinputs, $escaped_element_identifiers):
    ln -s '$input' '$escaped_element_identifier' &&
#end for
#if $seq_data.use_seq_data == "Yes":
    #if $seq_data.seq_source.index_source == "history":
        ln -s '$seq_data.seq_source.ref_file' ref_seq.fa &&
    #else:
        ln -s '${seq_data.seq_source.index.fields.path}' ref_seq.fa &&
    #end if
#end if

gffcompare
## Use annotation reference?
#if $annotation.use_ref_annotation == "Yes":
    -r '$annotation.reference_annotation'
    $annotation.ignore_nonoverlapping_reference
    $annotation.ignore_nonoverlapping_transfrags
    #if not $annotation.refmap_tmap:
        -T
    #end if
#end if

## Use sequence data?
#if $seq_data.use_seq_data == "Yes":
    -s ref_seq.fa
#end if

$discard_single_exon
-e $max_dist_exon
-d $max_dist_group
-p '$adv_output.p'
$adv_output.A
$adv_output.C
$adv_output.X
$adv_output.K

#for $escaped_element_identifier in $escaped_element_identifiers:
    '$escaped_element_identifier'
#end for

    ]]></command>
    <inputs>
        <param format="gtf" name="gffinputs" type="data" label="GTF inputs for comparison" help="" multiple="true" />
        <conditional name="annotation">
            <param label="Use Reference Annotation" name="use_ref_annotation" type="select">
                <option value="No">No</option>
                <option value="Yes">Yes</option>
            </param>
            <when value="Yes">
                <param argument="-r" format="gff3,gtf" help="Requires an annotation file in GFF3 or GTF format." label="Reference Annotation" name="reference_annotation" type="data" />
                <param argument="-R" falsevalue="" help="consider only the reference transcripts that overlap any of the input transfrags (Sn correction)" label="Ignore reference transcripts that are not overlapped by any input transfrags" name="ignore_nonoverlapping_reference" truevalue="-R" type="boolean" />
                <param argument="-Q" falsevalue="" help="consider only the input transcripts that overlap any of the reference transcripts (Sp correction). Warning: this will discard all 'novel' loci!" label="Ignore input transcripts that are not overlapped by any reference transcripts" name="ignore_nonoverlapping_transfrags" truevalue="-Q" type="boolean" />
                <param argument="-T" name="refmap_tmap" label="Generate tmap and refmap files for each input file" type="select" multiple="True">
                    <option value="refmap" selected="True">refmap</option>
                    <option value="tmap" selected="True">tmap</option>
                </param>
            </when>
            <when value="No">
            </when>
        </conditional>
        <conditional name="seq_data">
            <param help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff." label="Use Sequence Data" name="use_seq_data" type="select">
                <option value="Yes">Yes</option>
                <option value="No">No</option>
            </param>
            <when value="No"/>
            <when value="Yes">
                <conditional name="seq_source">
                    <param label="Choose the source for the reference list" name="index_source" type="select">
                        <option value="cached">Locally cached</option>
                        <option value="history">History</option>
                    </param>
                    <when value="cached">
                        <param argument="-s" label="Using reference genome" name="index" type="select">
                            <options from_data_table="fasta_indexes">
                                <filter column="1" key="dbkey" ref="gffinputs" type="data_meta" />
                                <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" />
                            </options>
                        </param>
                    </when>
                    <when value="history">
                        <param argument="-s" format="fasta" label="Using reference file" name="ref_file" type="data" />
                    </when>
                </conditional>
            </when>
        </conditional>
        <param argument="-M/-N" label="discard (ignore) single-exon transcripts" name="discard_single_exon" type="select">
            <option selected="True" value="">No</option>
            <option value="-M">Discard single-exon transfrags and reference transcripts</option>
            <option value="-N">Discard single-exon reference transcripts</option>
        </param>
        <param argument="-e" help="max. distance (range) allowed from free ends of terminal exons of reference transcripts when assessing exon accuracy. Default: 100" label="Max. Distance for assessing exon accuracy" name="max_dist_exon" type="integer" value="100" />
        <param argument="-d" help="max. distance (range) for grouping transcript start sites. Default: 100" label="Max distance for transcript grouping" name="max_dist_group" type="integer" value="100" />
        <section name="adv_output" title="Options for the annotated/combined GTF output file">
            <param argument="-p"  type="text" value="TCONS" label="name prefix for consensus transcripts" help="for combined.gtf (default: 'TCONS')" />
            <param argument="-C"  type="boolean" checked="false" truevalue="-C" falsevalue=""  label="discard the 'contained' transfrags" help="i.e. collapse intron-redundant transfrags across all query files" />
            <param argument="-A"  type="boolean" checked="false" truevalue="-A" falsevalue=""  label="discard the 'contained' transfrags except intron-redundant transfrags starting with a different 5' exon" help="like -C but does not discard intron-redundant transfrags if they start with a different 5' exon" />
            <param argument="-X"  type="boolean" checked="false" truevalue="-X" falsevalue=""  label="discard the 'contained' transfrags also if ends stick out within the container's introns" help="like -C but also discard contained transfrags if transfrag ends stick out within the container's introns" />
            <param argument="-K"  type="boolean" checked="false" truevalue="-A" falsevalue=""  label="do NOT discard any redundant transfrag matching a reference" help="for -C/-A/-X" />
        </section>
    </inputs>
    <outputs>
        <data format="txt" from_work_dir="gffcmp.stats" label="${tool.name} on ${on_string}: transcript accuracy" name="transcripts_stats" />
        <data format="tabular" from_work_dir="gffcmp.loci" label="${tool.name} on ${on_string}: loci" name="transcripts_loci" />
        <data format="tabular" from_work_dir="gffcmp.tracking" label="${tool.name} on ${on_string}: data ${gffinputs[0].hid} tracking file" name="transcripts_tracking" />
        <data format="gtf" from_work_dir="gffcmp.combined.gtf" label="${tool.name} on ${on_string}: combined transcripts" name="transcripts_combined">
            <filter>isinstance(gffinputs, list)</filter>
        </data>
        <data format="gtf" from_work_dir="gffcmp.annotated.gtf" label="${tool.name} on ${on_string}: annotated transcripts" name="transcripts_annotated">
            <filter>not isinstance(gffinputs, list)</filter>
        </data>
        <collection name="refmap_output" type="list" label="${tool.name} on ${on_string}: refmap">
            <discover_datasets pattern="gffcmp\.(?P&lt;designation&gt;.+)\.refmap" ext="tabular" />
            <filter>annotation['use_ref_annotation'] == 'Yes' and annotation['refmap_tmap'] != None and 'refmap' in annotation['refmap_tmap']</filter>
        </collection>
        <collection name="tmap_output" type="list" label="${tool.name} on ${on_string}: tmap">
            <discover_datasets pattern="gffcmp\.(?P&lt;designation&gt;.+)\.tmap" ext="tabular" />
            <filter>annotation['use_ref_annotation'] == 'Yes' and annotation['refmap_tmap'] != None and 'tmap' in annotation['refmap_tmap']</filter>
        </collection>
    </outputs>
    <tests>
        <test expect_num_outputs="6">
            <param ftype="gtf" name="gffinputs" value="gffcompare_in1.gtf,gffcompare_in2.gtf" />
            <param name="use_ref_annotation" value="Yes" />
            <conditional name="annotation">
                <param ftype="gtf" name="reference_annotation" value="gffcompare_in3.gtf" />
                <param name="ignore_nonoverlapping_reference" value="Yes" />
                <param name="ignore_nonoverlapping_transfrags" value="No" />
            </conditional>
            <param name="use_seq_data" value="No" />
            <param name="discard_single_exon" value="" />
            <param name="max_dist_exon" value="100" />
            <param name="max_dist_group" value="100" />
            <output file="gffcompare_out1.stats" name="transcripts_stats" lines_diff="6" />
            <output file="gffcompare_out1.loci" name="transcripts_loci" lines_diff="2" />
            <output file="gffcompare_out1.tracking" name="transcripts_tracking" />
            <output file="gffcompare_out1.gtf" name="transcripts_combined" />
            <output_collection name="refmap_output" type="list" count="2">
                <element name="gffcompare_in1_gtf" file="gffcompare_out1-1.refmap" ftype="tabular" />
                <element name="gffcompare_in2_gtf" file="gffcompare_out1-2.refmap" ftype="tabular" />
            </output_collection>
            <output_collection name="tmap_output" type="list" count="2">
                <element name="gffcompare_in1_gtf" file="gffcompare_out1-1.tmap" ftype="tabular" />
                <element name="gffcompare_in2_gtf" file="gffcompare_out1-2.tmap" ftype="tabular" />
            </output_collection>
        </test>
        <test expect_num_outputs="4">
            <param ftype="gtf" name="gffinputs" value="gffcompare_in4.gtf" />
            <param name="use_ref_annotation" value="Yes" />
            <conditional name="annotation">
                <param ftype="gtf" name="reference_annotation" value="gffcompare_in5.gtf" />
                <param name="ignore_nonoverlapping_reference" value="Yes" />
                <param name="ignore_nonoverlapping_transfrags" value="No" />
                <param name="refmap_tmap" value="" />
            </conditional>
            <param name="use_seq_data" value="No" />
            <param name="discard_single_exon" value="" />
            <param name="max_dist_exon" value="100" />
            <param name="max_dist_group" value="100" />
            <output file="gffcompare_out2.stats" name="transcripts_stats" lines_diff="6" />
            <output file="gffcompare_out2.loci" name="transcripts_loci" />
            <output file="gffcompare_out2.tracking" name="transcripts_tracking" />
            <output file="gffcompare_out2.gtf" name="transcripts_annotated" />
        </test>
    </tests>
    <help>
<![CDATA[
**GffCompare Overview**

## GffCompare
* compare and evaluate the accuracy of RNA-Seq transcript assemblers (Cufflinks, Stringtie).
* collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g. resulted from assembly of different samples)
* classify transcripts from one or multiple GTF/GFF3 files as they relate to reference transcripts provided in a
annotation file (also in GTF/GFF3 format)

The original form of this program is also distributed as part of the Cufflinks suite, under the name "CuffCompare"
(see manual: http://cole-trapnell-lab.github.io/cufflinks/cuffcompare/). Most of the options and parameters of CuffCompare
are supported by GffCompare, while new features will likely be added to GffCompare in the future.

A notable difference of GffCompare is that when a single query GTF/GFF file is given as input, along with a reference annotation (-r option),
gffcompare switches into "annotation mode" and it generates a .annotated.gtf file instead of the .merged.gtf produced by CuffCompare with the
same parameters. This file has the same general format as CuffCompare's .merged.gtf file (with "class codes" assigned to transcripts as per
their relationship with the matching/overlapping reference transcript),  but the original transcript IDs are preserved, so gffcompare can thus
be used as a simple way of annotating a set of transcripts.

Another important difference is that the input transcripts are no longer discarded when they are found to be "intron redundant", i.e.
contained within other, longer isoforms. CuffCompare had the -G option to prevent collapsing of such intron redundant isoforms into
their longer "containers", but GffCompare has made this the default mode of operation (hence the -G option is no longer needed
and is simply ignored when given).
    ]]>
    </help>
    <citations>
        <citation type="doi">10.1038/nbt.1621</citation>
    </citations>
</tool>