view sm_STAR2_V2.xml @ 1:e8dbc8b9a59a draft

Uploaded
author sarahinraauzeville
date Tue, 12 Dec 2017 10:08:21 -0500
parents
children
line wrap: on
line source

<!--# Copyright (C) 2014 INRA
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see http://www.gnu.org/licenses/.
#-->
<tool id="sm_star_single_V2" name="Map with STAR 2.4.0i">
    <description> with GTF and reference</description>
          <command interpreter="perl">sm_STAR2_V2.pl
     --runThreadN $Nthreads
     --refselector $ref.ref_selector
      #if $ref.ref_selector =="genotoul":
              --genomeDir $ref.reffile.fields.path
	  #else:
              --refownfastaref $ref.ownfastaref 
              --refowngtf $ref.owngtf 
	  #end if
	  #if $reads.reads_selector =="paired":
              --readsselector $reads.reads_selector  
			  --readFilesIn1 $Read1fastqgz 
               --readFilesIn2 $Read2fastqgz 
	  #else:
              --readsselector $reads.reads_selector  
			  --readsinputread $reads.inputread
	  #end if	
     --compress $compress
     --alignIntronMin $alignIntronMin
     --alignIntronMax $alignIntronMax    
     --outFilterMismatchNmax $outFilterMismatchNmax
     --orientation $orientation
     --cufflinks $cufflinks
     --outputfile $outputfile 
     --outputfileT $outputfileT
     --outputlogSJ $outputlogSJ
     --outputlogfinal $outputlogfinal
           </command>
                  <inputs>
                  
                       <conditional name="reads">
						      <param name="reads_selector" type="select" label="Paired or single reads">
							    <option value="paired">Paired reads</option>
							    <option value="single">Single reads</option>
						      </param>
						      <when value="paired">
							        <param format="fastq.gz" name="Read1fastqgz" type="data" label="First input fastq gzipped file (read1.fastq.gz)"/>
                                    <param format="fastq.gz" name="Read2fastqgz" type="data" label="Second input fastq gzipped file (read2.fastq.gz)"/>      							  						  
						      </when>
						      <when value="single"> 
							       <param format="fastq, fastqsanger, fastqillumina" name="inputread" type="data" label="Your single read RNA-Seq FASTQ file"/>
						      </when>
						</conditional>  
						
						<param name="compress" type="select" help="fastq files are compressed or not" label="compressed fastq file">
                               <option value="compress">Yes, compressed</option>
                               <option value="notcompress">Not compressed</option>
                          </param> 
						
                        
                         <param name="Nthreads" size="30" type="text" value="8" label="Threads number"/>
                         <param name="alignIntronMin" size="30" type="text" value="20" label="alignIntronMin"/>
                         <param name="alignIntronMax" size="30" type="text" value="1000000" label="alignIntronMax"/>
                         <param name="outFilterMismatchNmax" size="30" type="text" value="10" label="outFilterMismatchNmax"/>
                         <param name="orientation" type="select" help="Instead, you need to run Cufflinks with the library option --library-type options. For example, cufflinks -library-type fr-firststrand should be used for the “standard” dUTP protocol. This option has to be used only for Cufflinks runs and not for STAR runs." label="RNAseq oriented (default : oriented for STAR - Option to be set only for cufflinks runs)">
                               <option value="Yes">Yes</option>
                               <option value="No">No</option>
                          </param> 
                          
                          
                          
                          
                           <conditional name="ref">
						      <param name="ref_selector" type="select" label="Genotoul reference genome or your own fasta file">
							    <option value="genotoul">Genotoul reference genome</option>
							    <option value="ownfasta">Your own fasta file</option>
						      </param>
						      <when value="ownfasta">
							        <param format="fasta, fa" name="ownfastaref" type="data" label="Your own reference genome"/>
                                    <param format="gtf" name="owngtf" type="data" label="Your own GTF file"/>
						      </when>
						      <when value="genotoul"> 
							        <param name="reffile" type="select" label="Using reference genome" help="Select genome from the list">
						              <options from_data_table="STAR_indexes">
						                <filter type="sort_by" column="2" />
						                <validator type="no_options" message="No indexes are available" />
						              </options>
						           <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
						           </param>
						      </when>
						</conditional> 
                        
                        <param name="cufflinks" type="select" label="For Cufflinks-like strand field flag and types of quantification requested">
                               <option value="cuff">Yes</option>
                               <option value="nocuff">No</option>
                          </param>
                        
                          
                          
                  </inputs>
                  <outputs>
                     <data format="bam" name="outputfile" label ="Aligned.sortedByCoord.out.bam"/>  <!-- choisir un label le plus court possible -->
                     <data format="bam" name="outputfileT" label ="Aligned.toTranscriptome.out.bam"/>  <!-- choisir un label le plus court possible -->
                     <data format="txt" name="outputlogSJ" label ="SJ.out.tab"/> 
                     <data format="txt" name="outputlogfinal" label ="Log.final.out"/> 
                  </outputs>
  <help>

.. class:: infomark

 What it does : This program STAR allows you to aligns RNA-seq reads to a reference genome using uncompressed suffix arrays.

.. class:: warningmark

**Command line change if RNAseq reads are oriented or not.** 


Command line :  
  
STAR --readFilesIn R1.fastq R2.fastq --genomeDir /path/to/STARindex/ --sjdbGTFfile ref.gtf  --alignIntronMin 20 --alignIntronMax 1000000 --outFilterMismatchNmax 10 --outSAMtype BAM SortedByCoordinate --runThreadN 4 --outFileNamePrefix  galaxyName

If fastq input files are compressed, add this option:

--readFilesCommand zcat  for fastq.gz files.

For Cufflinks-like strand field flag and types of quantification requested, add these options:

--outSAMstrandField intronMotif --outFilterIntronMotifs RemoveNoncanonical --outFilterType BySJout --quantMode TranscriptomeSAM  



**Parameters**
    
--readFilesIn : name(s) (with path) of the files containing the sequences to be mapped (e.g. RNA-seq FASTQ files). If using Illumina paired-end reads, the read1 and read2 files have to be supplied. STAR can process both FASTA and FASTQ files. Multi-line (i.e. sequence split in multiple lines) FASTA file are supported. 

--sjdbGTFfile : species the path to the file with annotated transcripts in the standard GTF format. STAR  will  extract  splice  junctions  from  this  file  and  use  them  to  greatly  improve accuracy of the mapping.  While this is optional, and STAR can be run without annotations, using annotations is highly recommended whenever they are available. 


**If fastq input files are compressed :**

--readFilesCommand : UncompressionCommand option, where UncompressionCommand is the un-compression command that takes the file name as input parameter, and sends the uncompressed output to stdout. For example, for gzipped files (*.gz) use --readFilesCommand zcat OR --readFilesCommand gzip -c. For bzip2-compressed files, use --readFilesCommand bzip2 -c


**Other parameters**

--alignIntronMin (default: 21) : minimum intron size: genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion

--alignIntronMax (default: 0) : maximum intron size, if 0, max intron size will be determined by (2ˆwinBinNbits)*winAnchorDistNbins

--outFilterMismatchNmax (default: 10) alignment will be output only if it has fewer mismatches than this value.

--outSAMtype BAM SortedByCoordinate : output sorted by coordinate Aligned.sortedByCoord.out.bam file, similar to samtools sort command.

--outFileNamePrefix : output files name prefix (including full or relative path). Can only be defined on the command line.


**For Cufflinks-like strand field flag**

--outSAMstrandField intronMotif : strand derived from the intron motif. Reads with inconsistent and/or non-canonical introns are filtered out.

In addition, it is recommended to remove the non-canonical junctions for Cufflinks runs using --outFilterIntronMotifs RemoveNoncanonical.

--outFilterType BySJout  : keep only those reads that contain junctions that passed filtering into SJ.out.tab


**Types of quantification requested**

--quantMode TranscriptomeSAM  : output SAM/BAM alignments to transcriptome into a separate file


**job**

--runThreadN option defines the number of threads to be used for genome generation, it has to be set to the number of available cores on the server node.



---

Version Galaxy Tool : V2.0

Versions of bioinformatics tools used :
    
    A. Dobin et al, Bioinformatics 2012; doi: 10.1093/bioinformatics/bts635
    
    "STAR: ultrafast universal RNA-seq aligner" 
    
    Important announcements from the author: https://groups.google.com/d/forum/rna-star-announce
    
    General user mailing list (recommended): https://groups.google.com/d/forum/rna-star
    
    Email: dobin@cshl.edu 
    
    STAR_2.4.0i

---

Contacts (noms et emails) : sigenae-support@listes.inra.fr

E-learning available : Not yet.

Please cite :
    
    Depending on the help provided you can cite us in acknowledgements, references or both.
    
    Examples :
    Acknowledgements
    We wish to thank the SIGENAE group for ....
    
    References
    X. SIGENAE [http://www.sigenae.org/]

  </help>
</tool>