Mercurial > repos > bgruening > alevin
comparison alevin.xml @ 0:908a8d400fa2 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/salmon commit 8f432498890670fd03a197bd3d1aa2638d1ff2b3"
author | bgruening |
---|---|
date | Mon, 09 Sep 2019 11:12:51 -0400 |
parents | |
children | e53f19161c59 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:908a8d400fa2 |
---|---|
1 <tool id="alevin" name="Alevin" version="@VERSION@"> | |
2 <description>Quantification and analysis of 3’ tagged-end single-cell sequencing data</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements"/> | |
7 <command detect_errors="exit_code"><![CDATA[ | |
8 mkdir ./index | |
9 && mkdir ./output | |
10 #if '${refTranscriptSource.TranscriptSource}' != "indexed": | |
11 && salmon index -i ./index | |
12 --kmerLen '${refTranscriptSource.s_index.kmer}' | |
13 --gencode | |
14 --transcripts '${refTranscriptSource.s_index.fasta}' | |
15 #set $index_path = './index' | |
16 #else | |
17 #set $index_path = $refTranscriptSource.index.fields.path | |
18 #end if | |
19 #if $pairstraight.readselect == 'paired': | |
20 #if $pairstraight.file1.is_of_type("fastq.gz"): | |
21 && cp '${pairstraight.file1}' ./mate1.fastq.gz | |
22 && gunzip ./mate1.fastq.gz | |
23 && cp '${pairstraight.file2}' ./mate2.fastq.gz | |
24 && gunzip ./mate2.fastq.gz | |
25 #else if $pairstraight.file1.is_of_type("fastq.bz2"): | |
26 && cp '${pairstraight.file1}' ./mate1.fastq.bz2 | |
27 && bzip2 -d ./mate1.fastq.bz2 | |
28 && cp '${pairstraight.file2}' ./mate2.fastq.bz2 | |
29 && bzip2 -d ./mate2.fastq.bz2 | |
30 #else: | |
31 && ln -s '${pairstraight.file1}' ./mate1.fastq | |
32 && ln -s '${pairstraight.file2}' ./mate2.fastq | |
33 #end if | |
34 #else: | |
35 #if $pairstraight.unmatedreads.is_of_type("fastq.gz"): | |
36 && cp '${pairstraight.unmatedreads}' ./unmate.fastq.gz | |
37 && gunzip ./unmate.fastq.gz | |
38 #else if $pairstraight.unmatedreads.is_of_type("fastq.bz2"): | |
39 && cp '${pairstraight.unmatedreads}' ./unmate.fastq.bz2 | |
40 && bzip2 -d unmate.fastq.bz2 | |
41 #else: | |
42 && ln -s '${pairstraight.unmatedreads}' ./unmate.fastq | |
43 #end if | |
44 #end if | |
45 | |
46 && ln -s '${tgmap}' ./alevinmap.tsv | |
47 && salmon alevin -l | |
48 #if $pairstraight.readselect == 'paired': | |
49 ${pairstraight.orientation}${pairstraight.strandedness} | |
50 -i $index_path | |
51 -1 ./mate1.fastq | |
52 -2 ./mate2.fastq | |
53 #else: | |
54 '${pairstraight.strandedness}' | |
55 -i $index_path | |
56 -r zcat ./unmate.fastq | |
57 #end if | |
58 -o ./output | |
59 -p "\${GALAXY_SLOTS:-4}" | |
60 ${protocol} | |
61 --tgMap ./alevinmap.tsv | |
62 #if $whitelist: | |
63 --whitelist '${optional.whitelist}' | |
64 #end if | |
65 #if $optional.numCellBootstraps: | |
66 --numCellBootstraps '${optional.numCellBootstraps}' | |
67 #end if | |
68 #if $optional.forceCells: | |
69 --forceCells '${optional.forceCells}' | |
70 #end if | |
71 #if $optional.expectCells: | |
72 --expectCells '${optional.expectCells}' | |
73 #end if | |
74 #if $optional.mrna: | |
75 --mrna '${optional.mrna}' | |
76 #end if | |
77 #if $optional.rrna: | |
78 --rrna '${optional.rrna}' | |
79 #end if | |
80 #if $optional.keepCBFraction: | |
81 --keepCBFraction '${optional.keepCBFraction}' | |
82 #end if | |
83 ${optional.dumpBfh} | |
84 ${optional.dumpFeatures} | |
85 ${optional.dumpUmiGraph} | |
86 ${optional.dumpMtx} | |
87 #if $optional.lowRegionMinNumBarcodes: | |
88 --lowregionMinNumBarcodes '${optional.lowRegionMinNumBarcodes}' | |
89 #end if | |
90 #if $optional.maxNumBarcodes: | |
91 --maxNumBarcodes '${optional.maxNumBarcodes}' | |
92 #end if | |
93 ]]> | |
94 </command> | |
95 <inputs> | |
96 <expand macro="index"/> | |
97 <conditional name="pairstraight"> | |
98 <param name="readselect" label="Single or paired-end reads?" type="select"> | |
99 <option value="paired">Paired-end</option> | |
100 <option value="unmated">Single-end</option> | |
101 </param> | |
102 <when value="paired"> | |
103 <param name="file1" type="data" format="fastq,fastq.gz,fastqsanger.gz,fastq.bz2" help="CB+UMI raw sequence file(s)"/> | |
104 <param name="file2" type="data" format="fastq,fastq.gz,fastqsanger.gz,fastq.bz2" help="Read-sequence file(s)"/> | |
105 <expand macro="orient"/> | |
106 <expand macro="stranded"/> | |
107 </when> | |
108 <when value="unmated"> | |
109 <param name="unmatedreads" type="data" format="fastq,fastq.gz,fastqsanger.gz,fastq.bz2" label="Unmated reads files"/> | |
110 <expand macro="stranded"/> | |
111 </when> | |
112 </conditional> | |
113 <param name="protocol" type="select"> | |
114 <option value="--dropseq">DropSeq Single Cell protocol</option> | |
115 <option value="--chromium">10x chromium v2 Single Cell protocol</option> | |
116 <option value="--chromiumV3">10x chromium v3 Single Cell protocol</option> | |
117 <option value="--gemcode">Gemcode v1 Single Cell protocol</option> | |
118 <option value="--celseq">CEL-Seq Single Cell protocol</option> | |
119 <option value="--celseq2">CEL-Seq2 Single Cell protocol</option> | |
120 </param> | |
121 <param name="tgmap" type="data" format="tsv,tabular" label="Transcript to gene map file" help="Tsv with no header, containing two columns mapping each transcript present in the reference to the corresponding gene (the first column is a transcript and the second is the corresponding gene)."/> | |
122 <param name="allout" type="boolean" label="Retrieve all output files" truevalue="Yes" falsevalue="No" checked="false" help="If not selected, all log, info.txt, and json files output by Alevin will not be retrieved"/> | |
123 <section name="optional" title="Optional commands" expanded="false"> | |
124 <param name="whitelist" type="data" format="tsv,tabular" optional="true" label="Whitelist file" help="Explicitly specify whitelist CP for cell detection and CB sequence correction. If not specified, putative CBs generated."/> | |
125 <param name="noDedup" type="boolean" truevalue="Yes" falsevalue="No" checked="false" help="Causes pipeline to only perform CB correction, then maps the read-sequences to the transcriptome generating the interim data-structure of CB-EqClass-UMI-count. Used in parallel with --dumpBarcodeEq or --dumpBfh for the purposes of obtaining raw information or debugging."/> | |
126 <param name="mrna" type="data" format="tsv" optional="true" help="Single column tsv of mitochondrial genes which are to be used as a feature for CB whitelising naive Bayes classification."/> | |
127 <param name="rrna" type="data" format="tsv" optional="true" help="Single column tsv of ribosomal genes which are to be used as a feature for CB whitelising naive Bayes classification."/> | |
128 <param name="dumpBfh" type="boolean" truevalue="--dumpBfh" falsevalue="" checked="false" help="Dumps the full CB-EqClass-UMI-count data-structure for the purposed of allowing raw data analysis and debugging."/> | |
129 <param name="dumpFeatures" type="boolean" truevalue="--dumpFeatures" falsevalue="" checked="false" help="Dumps all features used by the CB classification and their counts at each cell level. Generally, this is used for the purposes of debugging."/> | |
130 <param name="dumpUmiGraph" type="boolean" truevalue="--dumpUmiGraph" falsevalue="" checked="false" help="Dump the per-cell level umi graph"/> | |
131 <param name="dumpMtx" type="boolean" truevalue="--dumpMtx" falsevalue="" checked="false" help="Converts the default binary format of alevin for gene-count matrix into a human readable mtx (matrix market exchange) sparse format."/> | |
132 <param name="forceCells" type="integer" optional="true" help="Explicitly specify the number of cells."/> | |
133 <param name="expectCells" type="integer" optional="true" help="define a close upper bound on expected number of cells."/> | |
134 <param name="numCellBootstraps" type="integer" optional="true" help="Performs certain number of bootstrap and generate the mean and variance of the count matrix"/> | |
135 <param name="minScoreFraction" type="float" optional="true" help="This value controls the minimum allowed score for a mapping to be considered valid. It matters only when --validateMappings has been passed to Salmon. The maximum possible score for a fragment is ms = read_len * ma (or ms = (left_read_len + right_read_len) * ma for paired-end reads). The argument to --minScoreFraction determines what fraction of the maximum score s a mapping must achieve to be potentially retained. For a minimum score fraction of f, only mappings with a score less than (f * s) will be kept. Mappings with lower scores will be considered as low-quality, and will be discarded."/> | |
136 <param name="keepCBFraction" type="float" optional="true" help="Fraction of cellular barcodes to keep (Between 0 and 1)."/> | |
137 <param name="lowRegionMinNumBarcodes" type="integer" optional="true" help="Minimum number of cell barcodes to use fo learning low confidence region (defaults to 200)"/> | |
138 <param name="maxNumBarcodes" type="integer" optional="true" help="Maximum allowable limit to process the cell barcodes. Defaults to 100000"/> | |
139 </section> | |
140 </inputs> | |
141 <outputs> | |
142 <data name="quants_mat.gz" label="quants_mat.gz" format="txt" from_work_dir="output/alevin/quants_mat.gz"> | |
143 <filter>optional["dumpMtx"] != "Yes"</filter> | |
144 </data> | |
145 <data name="quants_mat.mtx.gz" label="quants_mat.mtx.gz" format="mtx" from_work_dir="output/alevin/quants_mat.mtx.gz"/> | |
146 <data name="quants_mat_cols.txt" label="quants_mat_cols.txt" format="txt" from_work_dir="output/alevin/quants_mat_cols.txt"/> | |
147 <data name="quants_mat_rows.txt" label="quants_mat_rows.txt" format="txt" from_work_dir="output/alevin/quants_mat_rows.txt"/> | |
148 <data name="quants_tier_mat.gz" label="quants_tier_mat.gz" format="mtx" from_work_dir="output/alevin/quants_tier_mat.gz"/> | |
149 <data name="alevin.log" label="alevin.log" format="txt" from_work_dir="output/alevin/alevin.log"> | |
150 <filter>allout</filter> | |
151 </data> | |
152 <data name="featureDump.txt" label="featureDump.txt" format="txt" from_work_dir="output/alevin/featureDump.txt"/> | |
153 <data name="whitelist.txt" label="whitelist.txt" format="txt" from_work_dir="output/alevin/whitelist.txt"/> | |
154 <data name="bfh.txt" label="bfh.txt" format="txt" from_work_dir="output/alevin/bfh.txt"> | |
155 <filter>optional["dumpBfh"] == "Yes"</filter> | |
156 </data> | |
157 <data name="quants_mean_mat.gz" label="quants_mean_mat.gz" format="mtx" from_work_dir="output/alevin/quants_mean_mat.gz"> | |
158 <filter>optional["numCellBootstraps"]</filter> | |
159 </data> | |
160 <data name="quants_var_mat.gz" label="quants_var_mat.gz" format="mtx" from_work_dir="output/alevin/quants_var_mat.gz"> | |
161 <filter>optional["numCellBootstraps"]</filter> | |
162 </data> | |
163 <data name="quants_boot_rows.txt" label="quants_boot_rows.txt" format="txt" from_work_dir="output/alevin/quants_boot_rows.txt"> | |
164 <filter>optional["numCellBootstraps"]</filter> | |
165 </data> | |
166 <data name="alevinmeta_info.json" format="json" label="meta_info.json" from_work_dir="output/aux_info/alevin_meta_info.json"> | |
167 <filter>allout</filter> | |
168 </data> | |
169 <data name="ambig_info.tsv" format="tsv" label="ambig_info.tsv" from_work_dir="output/aux_info/ambig_info.tsv"> | |
170 <filter>allout</filter> | |
171 </data> | |
172 <data name="meta_info.json" format="json" label="meta_info.json" from_work_dir="output/aux_info/meta_info.json"> | |
173 <filter>allout</filter> | |
174 </data> | |
175 <data name="expected_bias.gz" format="txt" label="expected_bias.gz" from_work_dir="output/aux_info/fld.gz"/> | |
176 <data name="observed_bias.gz" format="txt" label="observed_bias.gz" from_work_dir="output/aux_info/observed_bias.gz"/> | |
177 <data name="observed_bias_3p.gz" format="txt" label="observed_bias_3p.gz" from_work_dir="output/aux_info/observed_bias_3p.gz"/> | |
178 <data name="flenDist.txt" format="txt" label="flenDist.txt" from_work_dir="output/libParams/flenDist.txt"/> | |
179 <data name="salmon_quant.log" format="txt" label="salmon_quant.log" from_work_dir="output/logs/salmon_quant.log"> | |
180 <filter>allout</filter> | |
181 </data> | |
182 </outputs> | |
183 <tests> | |
184 <test expect_num_outputs="11"> | |
185 <conditional name="refTranscriptSource"> | |
186 <param name="TranscriptSource" value="history"/> | |
187 <section name="s_index"> | |
188 <param name="fasta" value="minitranscript.fa"/> | |
189 </section> | |
190 </conditional> | |
191 <conditional name="pairstraight"> | |
192 <param name="readselect" value="paired"/> | |
193 <param name="file1" value="fastqs/moreminifastq1.fastq.gz"/> | |
194 <param name="file2" value="fastqs/moreminifastq2.fastq.gz"/> | |
195 <param name="orientation" value="I"/> | |
196 <param name="strandedness" value="SR"/> | |
197 </conditional> | |
198 <param name="protocol" value="--chromium"/> | |
199 <param name="tgmap" value="minitxp.tsv"/> | |
200 <param name="dumpMtx" value="Yes"/> | |
201 <output name="quants_mat.mtx.gz" file="alevin_mat.mtx.gz" ftype="mtx" compare="sim_size"/> | |
202 </test> | |
203 </tests> | |
204 <help><![CDATA[ | |
205 @salmonhelp@ | |
206 @alevinhelp@ | |
207 ]]></help> | |
208 <expand macro="citations"/> | |
209 </tool> |