view cuffnorm_wrapper.xml @ 4:6cbfede05833 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/cufflinks/cuffnorm commit a0b0845a9d1b3e7ecdeacd1e606133617e3918bd"
author iuc
date Tue, 16 Jun 2020 13:01:30 -0400
parents f5b2ee725d34
children 9a854107dbb2
line wrap: on
line source

<tool id="cuffnorm" name="Cuffnorm" version="@VERSION@.2">
    <description>Create normalized expression levels</description>
    <macros>
      <import>cuff_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <version_command><![CDATA[cuffnorm 2>&1 | head -n 1]]></version_command>
    <command detect_errors="aggressive"><![CDATA[
        cuffnorm
            --no-update-check
            --num-threads="\${GALAXY_SLOTS:-4}"
            --library-norm-method=$library_norm_method
            --output-format=$output_format

            @CONDITION_LABELS@

            ## Set advanced parameters for cufflinks
            #if $advanced_settings.sAdvanced == "Yes":
                #if str($advanced_settings.library_type) != 'auto':
                        --library-type=$advanced_settings.library_type
                #end if
                $advanced_settings.hits_norm
            #end if
            ## Inputs.
            '$gtf_input'

            @CONDITION_SAMPLES@
    ]]></command>
    <inputs>
        <param format="gtf,gff3" name="gtf_input" type="data" label="Transcripts" help="A transcript GFF3 or GTF file produced by cufflinks, cuffcompare, or other source."/>
        <expand macro="condition_inputs" />
        <param name="library_norm_method" type="select" label="Library normalization method">
            <option value="geometric" selected="True">geometric</option>
            <option value="classic-fpkm">classic-fpkm</option>
            <option value="quartile">quartile</option>
        </param>

        <param name="include_read_group_files" type="select" label="Include Read_Group/Attribute Datasets"
            help="Read group/attribute datasets provide information on replicates. One of both is provided, depending on the output format.">
            <option value="No" selected="true">No</option>
            <option value="Yes">Yes</option>
        </param>

        <param name="output_format" type="select" label="Output format" 
            help="By default, Cuffnorm reports expression levels in the 'simple-table' tab-delimted text files. The program also reports information about your samples and about the genes, transcripts, TSS groups, and CDS groups as tab delimited text files. Note that these files have a different format than the files used by Cuffdiff. However, you can direct Cuffnorm to report its output in the same format as used by Cuffdiff if you wish" >
                <option value="simple-table" selected="True">Simple Table</option>
                <option value="cuffdiff">Cuffdiff format</option>
        </param>

        <conditional name="advanced_settings">
                <param name="sAdvanced" type="select" label="Set Advanced Cuffnorm parameters? ">
                    <option value="No" selected="True">No</option>
                    <option value="Yes">Yes</option>
                </param>
                <when value="No"></when>
                <when value="Yes">
                    <param type="select" name="library_type" label="Library prep used for input reads" help="">
                        <option value="auto" selected="True">Auto Detect</option>
                        <option value="ff-firststrand">ff-firststrand</option>
                        <option value="ff-secondstrand">ff-secondstrand</option>
                        <option value="ff-unstranded">ff-unstranded</option>
                        <option value="fr-firststrand">fr-firststrand</option>
                        <option value="fr-secondstrand">fr-secondstrand</option>
                        <option value="fr-unstranded" >fr-unstranded</option>
                        <option value="transfrags">transfrags</option>
                    </param>
                    <param name="hits_norm" type="select" label="Hits included in normalization" help="All Hits: With this option, Cufflinks counts all fragments, including those not compatible with any reference transcript, towards the number of mapped fragments used in the FPKM denominator. Compatible Hits: With this option, Cufflinks counts only those fragments compatible with some reference transcript towards the number of mapped fragments used in the FPKM denominator. Using this mode is generally recommended in Cuffdiff to reduce certain types of bias caused by differential amounts of ribosomal reads which can create the impression of falsely differentially expressed genes. It is active by default." >
                            <option value="--compatible-hits-norm" selected="True">Compatible Hits</option>
                            <option value="--total-hits-norm">All Hits</option>
                    </param>
                </when>
        </conditional>
    </inputs>

    <outputs>
        <!-- Optional read group / attr datasets. -->
        <!-- cuffdif format -->
        <data format="tabular" name="isoforms_read_group" label="${tool.name} on ${on_string}: isoforms read group tracking" from_work_dir="isoforms.read_group_tracking" >
            <filter>(include_read_group_files == 'Yes')</filter>
            <filter>(output_format == 'cuffdiff')</filter>
        </data>
        <data format="tabular" name="genes_read_group" label="${tool.name} on ${on_string}: genes read group tracking" from_work_dir="genes.read_group_tracking" >
            <filter>(include_read_group_files == 'Yes')</filter>
            <filter>(output_format == 'cuffdiff')</filter>
        </data>
        <data format="tabular" name="cds_read_group" label="${tool.name} on ${on_string}: CDs read group tracking" from_work_dir="cds.read_group_tracking" >
            <filter>(include_read_group_files == 'Yes')</filter>
            <filter>(output_format == 'cuffdiff')</filter>
        </data>
        <data format="tabular" name="tss_groups_read_group" label="${tool.name} on ${on_string}: TSS groups read group tracking" from_work_dir="tss_groups.read_group_tracking" >
            <filter>(include_read_group_files == 'Yes')</filter>
            <filter>(output_format == 'cuffdiff')</filter>
        </data>
        <data format="txt" name="read_groups_info" label="${tool.name} on ${on_string}: Read Groups" from_work_dir="read_groups.info" >
            <filter>(include_read_group_files == 'Yes')</filter>
            <filter>(output_format == 'cuffdiff')</filter>
        </data>
        <!-- simple-table format-->
         <data format="tabular" name="isoforms_attr" label="${tool.name} on ${on_string}: isoforms read group tracking" from_work_dir="isoforms.attr_table" >
            <filter>(include_read_group_files == 'Yes')</filter>
            <filter>(output_format == 'simple-table')</filter>
        </data>
        <data format="tabular" name="genes_read_group" label="${tool.name} on ${on_string}: genes read group tracking" from_work_dir="genes.attr_table" >
            <filter>(include_read_group_files == 'Yes')</filter>
            <filter>(output_format == 'simple-table')</filter>
        </data>
        <data format="tabular" name="cds_read_group" label="${tool.name} on ${on_string}: CDs read group tracking" from_work_dir="cds.attr_table" >
            <filter>(include_read_group_files == 'Yes')</filter>
            <filter>(output_format == 'simple-table')</filter>
        </data>
        <data format="tabular" name="tss_groups_read_group" label="${tool.name} on ${on_string}: TSS groups read group tracking" from_work_dir="tss_groups.attr_table" >
            <filter>(include_read_group_files == 'Yes')</filter>
            <filter>(output_format == 'simple-table')</filter>
        </data>

        <!-- Cuffdiff format datasets. -->
        <data format="tabular" name="cds_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM tracking" from_work_dir="cds.fpkm_tracking" >
            <filter>(output_format == 'cuffdiff')</filter>
        </data>
        <data format="tabular" name="cds_count_tracking" label="${tool.name} on ${on_string}: CDS count tracking" from_work_dir="cds.count_tracking" >
            <filter>(output_format == 'cuffdiff')</filter>
        </data>
        <data format="tabular" name="tss_groups_fpkm_tracking" label="${tool.name} on ${on_string}: TSS groups FPKM tracking" from_work_dir="tss_groups.fpkm_tracking" >
            <filter>(output_format == 'cuffdiff')</filter>
        </data>
        <data format="tabular" name="tss_groups_count_tracking" label="${tool.name} on ${on_string}: TSS groups count tracking" from_work_dir="tss_groups.count_tracking" >
            <filter>(output_format == 'cuffdiff')</filter>
        </data>
        <data format="tabular" name="genes_fpkm_tracking" label="${tool.name} on ${on_string}: gene FPKM tracking" from_work_dir="genes.fpkm_tracking" >
            <filter>(output_format == 'cuffdiff')</filter>
        </data>        
        <data format="tabular" name="genes_count_tracking" label="${tool.name} on ${on_string}: gene count tracking" from_work_dir="genes.count_tracking" >
            <filter>(output_format == 'cuffdiff')</filter>
        </data>
        <data format="tabular" name="isoforms_fpkm_tracking" label="${tool.name} on ${on_string}: transcript FPKM tracking" from_work_dir="isoforms.fpkm_tracking" >
            <filter>(output_format == 'cuffdiff')</filter>
        </data>
        <data format="tabular" name="isoforms_count_tracking" label="${tool.name} on ${on_string}: transcript count tracking" from_work_dir="isoforms.count_tracking" >
            <filter>(output_format == 'cuffdiff')</filter>
        </data>
        <!-- Simple-table format datasets. -->
        <data format="tabular" name="cds_fpkm_table" label="${tool.name} on ${on_string}: CDS FPKM table" from_work_dir="cds.fpkm_table" >
            <filter>(output_format == 'simple-table')</filter>
        </data>
        <data format="tabular" name="cds_count_table" label="${tool.name} on ${on_string}: CDS count table" from_work_dir="cds.count_table" >
            <filter>(output_format == 'simple-table')</filter>
        </data>
        <data format="tabular" name="tss_groups_fpkm_table" label="${tool.name} on ${on_string}: TSS groups FPKM table" from_work_dir="tss_groups.fpkm_table" >
            <filter>(output_format == 'simple-table')</filter>
        </data>
        <data format="tabular" name="tss_groups_count_table" label="${tool.name} on ${on_string}: TSS groups count table" from_work_dir="tss_groups.count_table" >
            <filter>(output_format == 'simple-table')</filter>
        </data>
        <data format="tabular" name="genes_fpkm_table" label="${tool.name} on ${on_string}: gene FPKM table" from_work_dir="genes.fpkm_table" >
            <filter>(output_format == 'simple-table')</filter>
        </data>        
        <data format="tabular" name="genes_count_table" label="${tool.name} on ${on_string}: gene count table" from_work_dir="genes.count_table" >
            <filter>(output_format == 'simple-table')</filter>
        </data>
        <data format="tabular" name="isoforms_fpkm_table" label="${tool.name} on ${on_string}: transcript FPKM table" from_work_dir="isoforms.fpkm_table" >
            <filter>(output_format == 'simple-table')</filter>
        </data>
        <data format="tabular" name="isoforms_count_table" label="${tool.name} on ${on_string}: transcript count table" from_work_dir="isoforms.count_table" >
            <filter>(output_format == 'simple-table')</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="8">
            <param name="gtf_input" value="cuffcompare_out5.gtf" ftype="gtf"/>
            <conditional name="in_type">
                <param name="set_in_type" value="BAM"/>
                <repeat name="conditions">
                    <param name="name" value="in1"/>
                    <param name="samples" value="cuffdiff_in1.sam" ftype="sam"/>
                </repeat>
                <repeat name="conditions">
                    <param name="name" value="in2"/>
                    <param name="samples" value="cuffdiff_in2.sam" ftype="sam"/>
                </repeat>
            </conditional>
            <output ftype="tabular" name="cds_fpkm_table" value="cds.fpkm_table"/>
            <output ftype="tabular" name="cds_count_table" value="cds.count_table"/>
            <output ftype="tabular" name="tss_groups_fpkm_table" value="tss_groups.fpkm_table"/>
            <output ftype="tabular" name="tss_groups_count_table" value="tss_groups.count_table"/>
            <output ftype="tabular" name="genes_fpkm_table" value="genes.fpkm_table"/>
            <output ftype="tabular" name="genes_count_table" value="genes.count_table"/>
            <output ftype="tabular" name="isoforms_fpkm_table" value="isoforms.fpkm_table"/>
            <output ftype="tabular" name="isoforms_count_table" value="isoforms.count_table"/>
        </test>
    </tests>

    <help>
**Cuffnorm Overview**

Cuffnorm is part of Cufflinks_. Running Cuffnorm is very similar to running Cuffdiff. Cuffnorm takes a GTF2/GFF3 file of transcripts as input, along with two or more SAM, BAM, or CXB files for two or more samples. It produces a number of output files that contain expression levels and normalized fragment counts at the level of transcripts, primary transcripts, and genes. It also tracks changes in the relative abundance of transcripts sharing a common transcription start site, and in the relative abundances of the primary transcripts of each gene. Tracking the former allows one to see changes in splicing, and the latter lets one see changes in relative promoter use within a gene.. Please cite: Trapnell C, Williams BA, Pertea G, Mortazavi AM, Kwan G, van Baren MJ, Salzberg SL, Wold B, Pachter L. Transcript assembly and abundance estimation from RNA-Seq reveals thousands of new transcripts and switching among isoforms. Nature Biotechnology doi:10.1038/nbt.1621

.. _Cufflinks: http://cole-trapnell-lab.github.io/cufflinks/

------

**Know what you are doing**

.. class:: warningmark

There is no such thing (yet) as an automated gearshift in expression analysis. It is all like stick-shift driving in San Francisco. In other words, running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.

.. __: http://cole-trapnell-lab.github.io/cufflinks/cuffnorm/

------

**Input format**

Cuffdiff takes Cufflinks or Cuffcompare GTF files as input along with two SAM files containing the fragment alignments for two or more samples.

------

**Outputs**

Cuffnorm outputs a set of files containing normalized expression levels for each gene, transcript, TSS group, and CDS group in the experiment. It does not perform differential expression analysis. To assess the significance of changes in expression for genes and transcripts between conditions, use Cuffdiff. Cuffnorm's output files are useful when you have many samples and you simply want to cluster them or plot expression levels of genes important in your study.
By default, Cuffnorm reports expression levels in the "simple-table" tab-delimted text files. The program also reports information about your samples and about the genes, transcripts, TSS groups, and CDS groups as tab delimited text files. Note that these files have a different format than the files used by Cuffdiff. However, you can direct Cuffnorm to report its output in the same format as used by Cuffdiff if you wish. Simply supply the option --output-format cuffdiff at the command line.
Cuffnorm will report both FPKM values and normalized, estimates for the number of fragments that originate from each gene, transcript, TSS group, and CDS group. Note that because these counts are already normalized to account for differences in library size, they should not be used with downstream differential expression tools that require raw counts as input.
To see the details of the simple table format used by Cuffnorm, refer to the simple table expression format, simple table sample attribute format, and simple table feature (e.g. gene) attribute format sections below. 
    
-------

**Settings**

All of the options have a default value. You can change any of them. Most of the options in Cuffdiff have been implemented here.

------

**Cuffdiff parameter list**

This is a list of implemented Cuffdiff options::

  --library-norm-method          Library Normalization method : Geometric (default), classic-fpkm, quartile
  --library-type                 ff-firststrand,ff-secondstrand,ff-unstranded,fr-firstrand,fr-secondstrand,fr-unstranded,transfrags
  --compatible-hits-norm         With this option, Cufflinks counts only those fragments compatible with some reference transcript towards the number of mapped fragments used in the FPKM denominator. Using this mode is generally recommended in Cuffdiff to reduce certain types of bias caused by differential amounts of ribosomal reads which can create the impression of falsely differentially expressed genes.
  --total-hits-norm              With this option, Cufflinks counts all fragments, including those not compatible with any reference transcript, towards the number of mapped fragments used in the FPKM denominator
    </help>
    <expand macro="citations"/>
</tool>