view trinityrnaseq.xml @ 4:f98f80675d60 draft

Corrected perl module name and improved output handling for better performance from Phil Blood.
author nate
date Fri, 10 Mar 2017 10:42:24 -0500
parents 3d072b99688e
children 1de64ac61b4e
line wrap: on
line source

<tool id="trinity_psc" name="Trinity" version="0.0.1">

    <!-- Written by Jeremy Goecks, now maintained here by bhaas and additional
         modifications by Nate Coraor -->
    <description>(Beta) De novo assembly of RNA-Seq data Using Trinity on PSC's Bridges</description>
    <requirements>
        <!-- These are versions available as modules on Bridges -->
        <requirement type="package" version="1.1.2">bowtie</requirement>
        <requirement type="package" version="1.3">samtools</requirement>
        <requirement type="package" version="jre7">java</requirement>
        <requirement type="package" version="5.18.4-threads">perl</requirement>
        <requirement type="package" version="2.2.0">trinity</requirement>
    </requirements>
    <command>
        MEM=`expr "\${GALAXY_SLOTS:-16}" \* 48 - 16` ;

        workdir=`pwd`;
        echo "workdir is \$workdir";
        cd \$LOCAL;
        echo "Running Trinity from `pwd`";

        Trinity --max_memory "\${MEM}G"
                --CPU "\${GALAXY_SLOTS:-16}"
                --bflyHeapSpaceMax "32G"
                --bflyHeapSpaceInit "2G"
                --bflyGCThreads "6"

        #if $additional_params.use_additional == "yes" and $additional_params.normalize_reads == "yes":
            --normalize_reads
        #end if
        
        ## Inputs.
        #if str($inputs.paired_or_single) == "paired":
            --left $inputs.left_input --right $inputs.right_input
            #if  $inputs.left_input.ext == 'fa':
                --seqType fa
            #else:
                --seqType fq
            #end if
            #if str($inputs.library_type) != "undefined":
                --SS_lib_type $inputs.library_type
            #end if
            --group_pairs_distance $inputs.group_pairs_distance
        #else:
            --single $inputs.input
            #if  str($inputs.input.ext) == 'fa':
                --seqType fa
            #else:
                --seqType fq
            #end if
            #if str($inputs.library_type) != "undefined":
                --SS_lib_type $inputs.library_type
            #end if
        #end if

        ## Additional parameters.
        #if str($additional_params.use_additional) == "yes":
            --min_kmer_cov $additional_params.min_kmer_cov --max_reads_per_graph $additional_params.max_reads_per_graph
            #if $additional_params.bfly_opts != 'None':
                --bfly_opts " $additional_params.bfly_opts "
            #end if
        #end if

        ## direct to output
        > $trinity_log 2>&amp;1
 
        ## if Trinity fails, output the end of the log to stderr for Galaxy, and touch the output file for Pulsar                         
        || (ec=\$? ; cp -p $trinity_log \$workdir; cd \$workdir; cat $trinity_log >&amp;2 ; mkdir -p trinity_out_dir ; touch trinity_out_dir/Trinity.fasta ; exit \$ec);

        mkdir -p \$workdir/trinity_out_dir;
        cp -p trinity_out_dir/Trinity* \$workdir/trinity_out_dir; 
        cd \$workdir;   
    </command>
    <stdio>
        <exit_code range="1:" level="fatal" description="Program failed" />
        <exit_code range=":-1" level="fatal" description="DRM killed job" />
    </stdio>
    <inputs>
        <conditional name="inputs">
            <param name="paired_or_single" type="select" label="Paired or Single-end data?">
                <option value="paired">Paired</option>
                <option value="single">Single</option>
            </param>
            <when value="paired">
                <param format="fasta,fastq" name="left_input" type="data" label="Left/Forward strand reads" help=""/>
                <param format="fasta,fastq" name="right_input" type="data" label="Right/Reverse strand reads" help=""/>
                <param name="library_type" type="select" label="Strand-specific Library Type">
                    <option value="undefined">Not set</option>
                    <option value="FR">FR</option>
                    <option value="RF">RF</option>
                </param>
                <param name="group_pairs_distance" type="integer" value="500" min="1" label="Group pairs distance" help="Maximum length expected between fragment pairs"/>
                <param name="path_reinforcement_distance" type="integer" value="75" min="1" label="Path reinforcement distance" help="Minimum read overlap required for path extension in the graph" />    
            </when>
            <when value="single">
                <param format="fasta,fastq" name="input" type="data" label="Single-end reads" help=""/>
                <param name="library_type" type="select" label="Strand-specific Library Type">
                    <option value="undefined">Not set</option>
                    <option value="F">F</option>
                    <option value="R">R</option>
                </param>
                <param name="path_reinforcement_distance" type="integer" value="40" min="1" label="Path reinforcement distance" help="Minimum read overlap required for path extension in the graph" />    
            </when>
        </conditional>
        
        <conditional name="additional_params">
            <param name="use_additional" type="select" label="Use Additional Params?">
                <option value="no">No</option>
                <option value="yes">Yes</option>
            </param>
            <when value="no">
            </when>
            <when value="yes">            
                <param name="min_kmer_cov" type="integer" value="1" min="1" label="inchworm_min_kmer_cov" help="Minimum kmer coverage required by Inchworm for initial contig construction" />
                <param name="max_reads_per_graph" type="integer" value="20000000" min="10000" label="chrysalis_max_reads_per_graph" help="Maximum number of reads to be anchored within each transcript graph by Chrysalis" />
                <param name="bfly_opts" type="text" value="None" label="bfly_opts" help="Options to pass on to Butterfly" />
                <param name="min_contig_length" type="integer" value="200" min="1" label="Minimum Contig Length" help=""/>
                <param name="normalize_reads" type="boolean" truevalue="yes" falsevalue="no" help="(--normalize_reads) Normalize reads, can decrease runtime and memory requirements for datasets exceeding 300M pairs"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="txt" name="trinity_log" label="${tool.name} on ${on_string}: log" />
        <data format="fasta" name="assembled_transcripts" label="${tool.name} on ${on_string}: Assembled Transcripts" from_work_dir="trinity_out_dir/Trinity.fasta"/>
    </outputs>
    <tests>
    </tests>
    <help>
Trinity is a de novo transcript assembler that uses RNA-seq data as input. This tool runs all Trinity_ commands--Inchworm, Chrysalis, and Butterfly--in a single pass.  This version of Trinity runs on Bridges_ at the `Pittsburgh Supercomputing Center`_ using a version of Trinity 2.2.0 optimized for the unique memory profile of that system.
        
.. _Trinity: http://trinityrnaseq.github.io
.. _Pittsburgh Supercomputing Center: http://www.psc.edu
.. _Bridges: http://www.psc.edu/bridges 
    </help>
</tool>