comparison sm_STAR2_V2.xml @ 1:e8dbc8b9a59a draft

Uploaded
author sarahinraauzeville
date Tue, 12 Dec 2017 10:08:21 -0500
parents
children
comparison
equal deleted inserted replaced
0:1a0416feb74b 1:e8dbc8b9a59a
1 <!--# Copyright (C) 2014 INRA
2 # This program is free software: you can redistribute it and/or modify
3 # it under the terms of the GNU General Public License as published by
4 # the Free Software Foundation, either version 3 of the License, or
5 # (at your option) any later version.
6 #
7 # This program is distributed in the hope that it will be useful,
8 # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 # GNU General Public License for more details.
11 #
12 # You should have received a copy of the GNU General Public License
13 # along with this program. If not, see http://www.gnu.org/licenses/.
14 #-->
15 <tool id="sm_star_single_V2" name="Map with STAR 2.4.0i">
16 <description> with GTF and reference</description>
17 <command interpreter="perl">sm_STAR2_V2.pl
18 --runThreadN $Nthreads
19 --refselector $ref.ref_selector
20 #if $ref.ref_selector =="genotoul":
21 --genomeDir $ref.reffile.fields.path
22 #else:
23 --refownfastaref $ref.ownfastaref
24 --refowngtf $ref.owngtf
25 #end if
26 #if $reads.reads_selector =="paired":
27 --readsselector $reads.reads_selector
28 --readFilesIn1 $Read1fastqgz
29 --readFilesIn2 $Read2fastqgz
30 #else:
31 --readsselector $reads.reads_selector
32 --readsinputread $reads.inputread
33 #end if
34 --compress $compress
35 --alignIntronMin $alignIntronMin
36 --alignIntronMax $alignIntronMax
37 --outFilterMismatchNmax $outFilterMismatchNmax
38 --orientation $orientation
39 --cufflinks $cufflinks
40 --outputfile $outputfile
41 --outputfileT $outputfileT
42 --outputlogSJ $outputlogSJ
43 --outputlogfinal $outputlogfinal
44 </command>
45 <inputs>
46
47 <conditional name="reads">
48 <param name="reads_selector" type="select" label="Paired or single reads">
49 <option value="paired">Paired reads</option>
50 <option value="single">Single reads</option>
51 </param>
52 <when value="paired">
53 <param format="fastq.gz" name="Read1fastqgz" type="data" label="First input fastq gzipped file (read1.fastq.gz)"/>
54 <param format="fastq.gz" name="Read2fastqgz" type="data" label="Second input fastq gzipped file (read2.fastq.gz)"/>
55 </when>
56 <when value="single">
57 <param format="fastq, fastqsanger, fastqillumina" name="inputread" type="data" label="Your single read RNA-Seq FASTQ file"/>
58 </when>
59 </conditional>
60
61 <param name="compress" type="select" help="fastq files are compressed or not" label="compressed fastq file">
62 <option value="compress">Yes, compressed</option>
63 <option value="notcompress">Not compressed</option>
64 </param>
65
66
67 <param name="Nthreads" size="30" type="text" value="8" label="Threads number"/>
68 <param name="alignIntronMin" size="30" type="text" value="20" label="alignIntronMin"/>
69 <param name="alignIntronMax" size="30" type="text" value="1000000" label="alignIntronMax"/>
70 <param name="outFilterMismatchNmax" size="30" type="text" value="10" label="outFilterMismatchNmax"/>
71 <param name="orientation" type="select" help="Instead, you need to run Cufflinks with the library option --library-type options. For example, cufflinks -library-type fr-firststrand should be used for the “standard” dUTP protocol. This option has to be used only for Cufflinks runs and not for STAR runs." label="RNAseq oriented (default : oriented for STAR - Option to be set only for cufflinks runs)">
72 <option value="Yes">Yes</option>
73 <option value="No">No</option>
74 </param>
75
76
77
78
79 <conditional name="ref">
80 <param name="ref_selector" type="select" label="Genotoul reference genome or your own fasta file">
81 <option value="genotoul">Genotoul reference genome</option>
82 <option value="ownfasta">Your own fasta file</option>
83 </param>
84 <when value="ownfasta">
85 <param format="fasta, fa" name="ownfastaref" type="data" label="Your own reference genome"/>
86 <param format="gtf" name="owngtf" type="data" label="Your own GTF file"/>
87 </when>
88 <when value="genotoul">
89 <param name="reffile" type="select" label="Using reference genome" help="Select genome from the list">
90 <options from_data_table="STAR_indexes">
91 <filter type="sort_by" column="2" />
92 <validator type="no_options" message="No indexes are available" />
93 </options>
94 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
95 </param>
96 </when>
97 </conditional>
98
99 <param name="cufflinks" type="select" label="For Cufflinks-like strand field flag and types of quantification requested">
100 <option value="cuff">Yes</option>
101 <option value="nocuff">No</option>
102 </param>
103
104
105
106 </inputs>
107 <outputs>
108 <data format="bam" name="outputfile" label ="Aligned.sortedByCoord.out.bam"/> <!-- choisir un label le plus court possible -->
109 <data format="bam" name="outputfileT" label ="Aligned.toTranscriptome.out.bam"/> <!-- choisir un label le plus court possible -->
110 <data format="txt" name="outputlogSJ" label ="SJ.out.tab"/>
111 <data format="txt" name="outputlogfinal" label ="Log.final.out"/>
112 </outputs>
113 <help>
114
115 .. class:: infomark
116
117 What it does : This program STAR allows you to aligns RNA-seq reads to a reference genome using uncompressed suffix arrays.
118
119 .. class:: warningmark
120
121 **Command line change if RNAseq reads are oriented or not.**
122
123
124 Command line :
125
126 STAR --readFilesIn R1.fastq R2.fastq --genomeDir /path/to/STARindex/ --sjdbGTFfile ref.gtf --alignIntronMin 20 --alignIntronMax 1000000 --outFilterMismatchNmax 10 --outSAMtype BAM SortedByCoordinate --runThreadN 4 --outFileNamePrefix galaxyName
127
128 If fastq input files are compressed, add this option:
129
130 --readFilesCommand zcat for fastq.gz files.
131
132 For Cufflinks-like strand field flag and types of quantification requested, add these options:
133
134 --outSAMstrandField intronMotif --outFilterIntronMotifs RemoveNoncanonical --outFilterType BySJout --quantMode TranscriptomeSAM
135
136
137
138 **Parameters**
139
140 --readFilesIn : name(s) (with path) of the files containing the sequences to be mapped (e.g. RNA-seq FASTQ files). If using Illumina paired-end reads, the read1 and read2 files have to be supplied. STAR can process both FASTA and FASTQ files. Multi-line (i.e. sequence split in multiple lines) FASTA file are supported.
141
142 --sjdbGTFfile : species the path to the file with annotated transcripts in the standard GTF format. STAR will extract splice junctions from this file and use them to greatly improve accuracy of the mapping. While this is optional, and STAR can be run without annotations, using annotations is highly recommended whenever they are available.
143
144
145 **If fastq input files are compressed :**
146
147 --readFilesCommand : UncompressionCommand option, where UncompressionCommand is the un-compression command that takes the file name as input parameter, and sends the uncompressed output to stdout. For example, for gzipped files (*.gz) use --readFilesCommand zcat OR --readFilesCommand gzip -c. For bzip2-compressed files, use --readFilesCommand bzip2 -c
148
149
150 **Other parameters**
151
152 --alignIntronMin (default: 21) : minimum intron size: genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion
153
154 --alignIntronMax (default: 0) : maximum intron size, if 0, max intron size will be determined by (2ˆwinBinNbits)*winAnchorDistNbins
155
156 --outFilterMismatchNmax (default: 10) alignment will be output only if it has fewer mismatches than this value.
157
158 --outSAMtype BAM SortedByCoordinate : output sorted by coordinate Aligned.sortedByCoord.out.bam file, similar to samtools sort command.
159
160 --outFileNamePrefix : output files name prefix (including full or relative path). Can only be defined on the command line.
161
162
163 **For Cufflinks-like strand field flag**
164
165 --outSAMstrandField intronMotif : strand derived from the intron motif. Reads with inconsistent and/or non-canonical introns are filtered out.
166
167 In addition, it is recommended to remove the non-canonical junctions for Cufflinks runs using --outFilterIntronMotifs RemoveNoncanonical.
168
169 --outFilterType BySJout : keep only those reads that contain junctions that passed filtering into SJ.out.tab
170
171
172 **Types of quantification requested**
173
174 --quantMode TranscriptomeSAM : output SAM/BAM alignments to transcriptome into a separate file
175
176
177 **job**
178
179 --runThreadN option defines the number of threads to be used for genome generation, it has to be set to the number of available cores on the server node.
180
181
182
183 ---
184
185 Version Galaxy Tool : V2.0
186
187 Versions of bioinformatics tools used :
188
189 A. Dobin et al, Bioinformatics 2012; doi: 10.1093/bioinformatics/bts635
190
191 "STAR: ultrafast universal RNA-seq aligner"
192
193 Important announcements from the author: https://groups.google.com/d/forum/rna-star-announce
194
195 General user mailing list (recommended): https://groups.google.com/d/forum/rna-star
196
197 Email: dobin@cshl.edu
198
199 STAR_2.4.0i
200
201 ---
202
203 Contacts (noms et emails) : sigenae-support@listes.inra.fr
204
205 E-learning available : Not yet.
206
207 Please cite :
208
209 Depending on the help provided you can cite us in acknowledgements, references or both.
210
211 Examples :
212 Acknowledgements
213 We wish to thank the SIGENAE group for ....
214
215 References
216 X. SIGENAE [http://www.sigenae.org/]
217
218 </help>
219 </tool>