comparison rg_rnaStarSolo.xml @ 0:e3c94e2933c9 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgrnastar commit 686574b0392e554b75035a9b79bc919dfda9ab97"
author iuc
date Thu, 15 Aug 2019 01:53:49 -0400
parents
children 3a1253ee137b
comparison
equal deleted inserted replaced
-1:000000000000 0:e3c94e2933c9
1 <tool id="rna_starsolo" name="RNA STARSolo" version="@VERSION@@WRAPPER@" profile="17.01">
2 <description>mapping, demultiplexing and gene quantification for single cell RNA-seq</description>
3 <macros>
4 <import>macros.xml</import>
5 <token name="@WRAPPER@"></token>
6 </macros>
7 <expand macro="requirements"/>
8 <expand macro="stdio" />
9
10 <command><![CDATA[
11 @TEMPINDEX@
12 STAR
13 @REFGENOMEHANDLING@
14
15 ## cDNA sequence always goes first, then barcode
16 --readFilesIn
17 '$input2' '$input1'
18 #if $input1.is_of_type('fastq.gz', 'fastqsanger.gz'):
19 @FASTQ_GZ_OPTION@
20 #end if
21
22 ## Droplet is the only mode available for now
23 --soloType Droplet
24
25 ## 1 - check length of barcode, 0 - do not check
26 ## Good for checking custom chemistries
27 --soloBarcodeReadLength 1
28 --soloCBwhitelist '$soloCBwhitelist'
29
30 #if str($solo.params.chemistry) == "CR2":
31 --soloCBstart 1
32 --soloCBlen 16
33 --soloUMIstart 17
34 --soloUMIlen 10
35 #else if str($solo.params.chemistry) == "CR3":
36 --soloCBstart 1
37 --soloCBlen 16
38 --soloUMIstart 17
39 --soloUMIlen 12
40 #else if str($solo.params.chemistry) == "custom":
41 --soloCBstart '$solo.params.soloCBstart'
42 --soloCBlen '$solo.params.soloCBlen'
43 --soloUMIstart '$solo.params.soloUMIstart'
44 --soloUMIlen '$solo.params.soloUMIlen'
45 #end if
46
47 --soloStrand '$solo.soloStrand'
48 --soloFeatures '$solo.soloFeatures'
49 --soloUMIdedup '$solo.soloUMIdedup'
50 ]]></command>
51 <inputs>
52 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" label="RNA-Seq FASTQ/FASTA file, cDNA reads"/>
53 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" label="RNA-Seq FASTQ/FASTA file, Barcode reads"/>
54 <param format="txt,tsv" argument="--soloCBwhitelist" type="data" label="RNA-Seq Cell Barcode Whitelist" />
55
56 <expand macro="refgenomehandling" />
57 <section name="solo" title="Advanced Settings" expanded="true">
58 <conditional name="params">
59 <param name="chemistry" type="select" label="Configure Chemistry Options">
60 <option value="CR2" selected="true">Cell Ranger v2</option>
61 <option value="CR3">Cell Ranger v3</option>
62 <option value="custom">Custom</option>
63 </param>
64 <when value="CR2" />
65 <when value="CR3" />
66 <when value="custom" >
67 <param argument="--soloCBstart" type="integer" min="1" value="1" label="Cell Barcode Start Base" />
68 <param argument="--soloCBlen" type="integer" min="1" value="16" label="Cell Barcode Length" />
69 <param argument="--soloUMIstart" type="integer" min="1" value="17" label="UMI Start Base" />
70 <param argument="--soloUMIlen" type="integer" min="1" value="10" label="UMI Length" />
71 </when>
72 </conditional>
73 <param argument="--soloStrand" type="select" label="Strandedness of Library" help="Unstranded has no strand information, Forward has the read strand the same as the original RNA molecule, Reverse has the read strand opposite to the original RNA molecule">
74 <option value="Unstranded" />
75 <option value="Forward" selected="true" />
76 <option value="Reverse" />
77 </param>
78 <param argument="--soloFeatures" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, None has UMIs with 1 mismatch distance to others not collapsed">
79 <option value="Gene" selected="true">Gene: Count reads matching the Gene Transcript</option>
80 <option value="SJ" >Splice Junctions: Count reads at exon-intron junctions</option>
81 <option value="GeneFull" >Full: Count all reads overlapping genes' exons and introns</option>
82 </param>
83 <param argument="--soloUMIdedup" type="select" label="UMI deduplication (collapsing) algorithm" help="All has all UMIs with 1 mismatch distance to each other collapsed, Directional follows the 'directional' method given in UMI-tools, None has UMIs with 1 mismatch distance to others not collapsed">
84 <option value="1MM_All" selected="true">All</option>
85 <option value="1MM_Directional" >Directional</option>
86 <option value="1MM_NotCollapsed" >None</option>
87 </param>
88 </section>
89 </inputs>
90 <outputs>
91 <data format="txt" name="output_log" label="${tool.name} on ${on_string}: log" from_work_dir="Log.final.out">
92 <expand macro="dbKeyActions" />
93 </data>
94 <data format="tsv" name="output_genes" label="${tool.name} on ${on_string}: Genes" from_work_dir="Solo.out/genes.tsv" />
95 <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes" from_work_dir="Solo.out/barcodes.tsv" />
96 <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Gene Counts" from_work_dir="Solo.out/matrix.mtx" >
97 <filter>solo['soloFeatures'] == "Gene" </filter>
98 </data>
99 <data format="mtx" name="output_matrixSJ" label="${tool.name} on ${on_string}: Matrix Splice Junction Counts" from_work_dir="Solo.out/matrixSJ.mtx" >
100 <filter>solo['soloFeatures'] == "SJ" </filter>
101 </data>
102 <data format="mtx" name="output_matrixGeneFull" label="${tool.name} on ${on_string}: Matrix Full Gene Counts" from_work_dir="Solo.out/matrixGeneFull.mtx" >
103 <filter>solo['soloFeatures'] == "GeneFull" </filter>
104 </data>
105 <data format="txt" name="output_stats" label="${tool.name} on ${on_string}: Feature Statistic Summaries" from_work_dir="Solo.out/Gene.stats" />
106 </outputs>
107 <tests>
108 <test expect_num_outputs="5">
109 <param name="input1" value="41737_R1.fastq.sub240k.gz" ftype="fastqsanger.gz" />
110 <param name="input2" value="41737_R2.fastq.sub240k.gz" ftype="fastqsanger.gz" />
111 <param name="soloCBwhitelist" value="737K-august-2016.small.txt.gz" />
112 <conditional name="refGenomeSource">
113 <param name="geneSource" value="history" />
114 <param name="genomeFastaFiles" value="SNORD83B.22.fa" />
115 <param name="genomeSAindexNbases" value="5" />
116 <conditional name="GTFconditional">
117 <param name="GTFselect" value="with-gtf" />
118 <param name="sjdbOverhang" value="75"/>
119 <param name="sjdbGTFfile" value="SNORD83B.22.gtf" ftype="gtf"/>
120 </conditional>
121 </conditional>
122 <section name="solo" >
123 <conditional name="params">
124 <param name="chemistry" value="CR2" />
125 </conditional>
126 <param name="soloStrand" value="Forward" />
127 <param name="soloFeatures" value="Gene" />
128 <param name="soloUMIdedup" value="1MM_All" />
129 </section>
130 <output name="output_genes">
131 <assert_contents>
132 <has_line_matching expression="ENSG00000209480\sSNORD83B" />
133 </assert_contents>
134 </output>
135 <output name="output_matrix" >
136 <assert_contents>
137 <has_line_matching expression="1\s137281\s0" />
138 </assert_contents>
139 </output>
140 <output name="output_stats" >
141 <assert_contents>
142 <has_line_matching expression="\s+nNoFeature\s+3253" />
143 <has_line_matching expression="\s+nUMIs\s+0" />
144 </assert_contents>
145 </output>
146 </test>
147 <test expect_num_outputs="5">
148 <param name="input1" value="41737_R1.fastq.sub240k.gz" ftype="fastqsanger.gz" />
149 <param name="input2" value="41737_R2.fastq.sub240k.gz" ftype="fastqsanger.gz" />
150 <param name="soloCBwhitelist" value="737K-august-2016.small.txt.gz" />
151 <conditional name="refGenomeSource">
152 <param name="geneSource" value="history" />
153 <param name="genomeFastaFiles" value="SNORD83B.22.fa" />
154 <param name="genomeSAindexNbases" value="5" />
155 <conditional name="GTFconditional">
156 <param name="GTFselect" value="with-gtf" />
157 <param name="sjdbOverhang" value="75" />
158 <param name="sjdbGTFfile" value="SNORD83B.22.gtf" ftype="gtf"/>
159 </conditional>
160 </conditional>
161 <section name="solo" >
162 <conditional name="params">
163 <param name="chemistry" value="custom" />
164 <param name="soloCBstart" value="1" />
165 <param name="soloCBlen" value="16" />
166 <param name="soloUMIstart" value="17" />
167 <param name="soloUMIlen" value="10" />
168 </conditional>
169 <param name="soloStrand" value="Forward" />
170 <param name="soloFeatures" value="GeneFull" />
171 <param name="soloUMIdedup" value="1MM_Directional" />
172 </section>
173 <output name="output_barcodes" >
174 <assert_contents>
175 <has_line line="TTTGTCATCTTAGAGC" />
176 <has_line line="TTTGTCATCTTTCCTC" />
177 </assert_contents>
178 </output>
179 </test>
180 </tests>
181 <help><![CDATA[
182 **What it does**
183
184 **STARSolo** is a turnkey solution for analyzing droplet single cell RNA sequencing data (e.g. 10X Genomics Chromium System) built directly into STAR code. STARsolo inputs the raw FASTQ reads files, and performs the following operations:
185
186 * Error correction and demultiplexing of cell barcodes using user-input whitelist
187 * Mapping the reads to the reference genome using the standard STAR spliced read alignment algorithm
188 * Error correction and collapsing (deduplication) of Unique Molecular Identifiers (UMIs)
189 * Quantification of per-cell gene expression by counting the number of reads per gene
190
191 STARsolo output is designed to be a drop-in replacement for 10X CellRanger gene quantification output. It follows CellRanger logic for cell barcode whitelisting and UMI deduplication, and produces nearly identical gene counts in the same format. At the same time STARsolo is 10 times faster than CellRanger.
192
193 ]]></help>
194 <expand macro="citations"/>
195 </tool>