comparison alevin.xml @ 0:908a8d400fa2 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/salmon commit 8f432498890670fd03a197bd3d1aa2638d1ff2b3"
author bgruening
date Mon, 09 Sep 2019 11:12:51 -0400
parents
children e53f19161c59
comparison
equal deleted inserted replaced
-1:000000000000 0:908a8d400fa2
1 <tool id="alevin" name="Alevin" version="@VERSION@">
2 <description>Quantification and analysis of 3’ tagged-end single-cell sequencing data</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <command detect_errors="exit_code"><![CDATA[
8 mkdir ./index
9 && mkdir ./output
10 #if '${refTranscriptSource.TranscriptSource}' != "indexed":
11 && salmon index -i ./index
12 --kmerLen '${refTranscriptSource.s_index.kmer}'
13 --gencode
14 --transcripts '${refTranscriptSource.s_index.fasta}'
15 #set $index_path = './index'
16 #else
17 #set $index_path = $refTranscriptSource.index.fields.path
18 #end if
19 #if $pairstraight.readselect == 'paired':
20 #if $pairstraight.file1.is_of_type("fastq.gz"):
21 && cp '${pairstraight.file1}' ./mate1.fastq.gz
22 && gunzip ./mate1.fastq.gz
23 && cp '${pairstraight.file2}' ./mate2.fastq.gz
24 && gunzip ./mate2.fastq.gz
25 #else if $pairstraight.file1.is_of_type("fastq.bz2"):
26 && cp '${pairstraight.file1}' ./mate1.fastq.bz2
27 && bzip2 -d ./mate1.fastq.bz2
28 && cp '${pairstraight.file2}' ./mate2.fastq.bz2
29 && bzip2 -d ./mate2.fastq.bz2
30 #else:
31 && ln -s '${pairstraight.file1}' ./mate1.fastq
32 && ln -s '${pairstraight.file2}' ./mate2.fastq
33 #end if
34 #else:
35 #if $pairstraight.unmatedreads.is_of_type("fastq.gz"):
36 && cp '${pairstraight.unmatedreads}' ./unmate.fastq.gz
37 && gunzip ./unmate.fastq.gz
38 #else if $pairstraight.unmatedreads.is_of_type("fastq.bz2"):
39 && cp '${pairstraight.unmatedreads}' ./unmate.fastq.bz2
40 && bzip2 -d unmate.fastq.bz2
41 #else:
42 && ln -s '${pairstraight.unmatedreads}' ./unmate.fastq
43 #end if
44 #end if
45
46 && ln -s '${tgmap}' ./alevinmap.tsv
47 && salmon alevin -l
48 #if $pairstraight.readselect == 'paired':
49 ${pairstraight.orientation}${pairstraight.strandedness}
50 -i $index_path
51 -1 ./mate1.fastq
52 -2 ./mate2.fastq
53 #else:
54 '${pairstraight.strandedness}'
55 -i $index_path
56 -r zcat ./unmate.fastq
57 #end if
58 -o ./output
59 -p "\${GALAXY_SLOTS:-4}"
60 ${protocol}
61 --tgMap ./alevinmap.tsv
62 #if $whitelist:
63 --whitelist '${optional.whitelist}'
64 #end if
65 #if $optional.numCellBootstraps:
66 --numCellBootstraps '${optional.numCellBootstraps}'
67 #end if
68 #if $optional.forceCells:
69 --forceCells '${optional.forceCells}'
70 #end if
71 #if $optional.expectCells:
72 --expectCells '${optional.expectCells}'
73 #end if
74 #if $optional.mrna:
75 --mrna '${optional.mrna}'
76 #end if
77 #if $optional.rrna:
78 --rrna '${optional.rrna}'
79 #end if
80 #if $optional.keepCBFraction:
81 --keepCBFraction '${optional.keepCBFraction}'
82 #end if
83 ${optional.dumpBfh}
84 ${optional.dumpFeatures}
85 ${optional.dumpUmiGraph}
86 ${optional.dumpMtx}
87 #if $optional.lowRegionMinNumBarcodes:
88 --lowregionMinNumBarcodes '${optional.lowRegionMinNumBarcodes}'
89 #end if
90 #if $optional.maxNumBarcodes:
91 --maxNumBarcodes '${optional.maxNumBarcodes}'
92 #end if
93 ]]>
94 </command>
95 <inputs>
96 <expand macro="index"/>
97 <conditional name="pairstraight">
98 <param name="readselect" label="Single or paired-end reads?" type="select">
99 <option value="paired">Paired-end</option>
100 <option value="unmated">Single-end</option>
101 </param>
102 <when value="paired">
103 <param name="file1" type="data" format="fastq,fastq.gz,fastqsanger.gz,fastq.bz2" help="CB+UMI raw sequence file(s)"/>
104 <param name="file2" type="data" format="fastq,fastq.gz,fastqsanger.gz,fastq.bz2" help="Read-sequence file(s)"/>
105 <expand macro="orient"/>
106 <expand macro="stranded"/>
107 </when>
108 <when value="unmated">
109 <param name="unmatedreads" type="data" format="fastq,fastq.gz,fastqsanger.gz,fastq.bz2" label="Unmated reads files"/>
110 <expand macro="stranded"/>
111 </when>
112 </conditional>
113 <param name="protocol" type="select">
114 <option value="--dropseq">DropSeq Single Cell protocol</option>
115 <option value="--chromium">10x chromium v2 Single Cell protocol</option>
116 <option value="--chromiumV3">10x chromium v3 Single Cell protocol</option>
117 <option value="--gemcode">Gemcode v1 Single Cell protocol</option>
118 <option value="--celseq">CEL-Seq Single Cell protocol</option>
119 <option value="--celseq2">CEL-Seq2 Single Cell protocol</option>
120 </param>
121 <param name="tgmap" type="data" format="tsv,tabular" label="Transcript to gene map file" help="Tsv with no header, containing two columns mapping each transcript present in the reference to the corresponding gene (the first column is a transcript and the second is the corresponding gene)."/>
122 <param name="allout" type="boolean" label="Retrieve all output files" truevalue="Yes" falsevalue="No" checked="false" help="If not selected, all log, info.txt, and json files output by Alevin will not be retrieved"/>
123 <section name="optional" title="Optional commands" expanded="false">
124 <param name="whitelist" type="data" format="tsv,tabular" optional="true" label="Whitelist file" help="Explicitly specify whitelist CP for cell detection and CB sequence correction. If not specified, putative CBs generated."/>
125 <param name="noDedup" type="boolean" truevalue="Yes" falsevalue="No" checked="false" help="Causes pipeline to only perform CB correction, then maps the read-sequences to the transcriptome generating the interim data-structure of CB-EqClass-UMI-count. Used in parallel with --dumpBarcodeEq or --dumpBfh for the purposes of obtaining raw information or debugging."/>
126 <param name="mrna" type="data" format="tsv" optional="true" help="Single column tsv of mitochondrial genes which are to be used as a feature for CB whitelising naive Bayes classification."/>
127 <param name="rrna" type="data" format="tsv" optional="true" help="Single column tsv of ribosomal genes which are to be used as a feature for CB whitelising naive Bayes classification."/>
128 <param name="dumpBfh" type="boolean" truevalue="--dumpBfh" falsevalue="" checked="false" help="Dumps the full CB-EqClass-UMI-count data-structure for the purposed of allowing raw data analysis and debugging."/>
129 <param name="dumpFeatures" type="boolean" truevalue="--dumpFeatures" falsevalue="" checked="false" help="Dumps all features used by the CB classification and their counts at each cell level. Generally, this is used for the purposes of debugging."/>
130 <param name="dumpUmiGraph" type="boolean" truevalue="--dumpUmiGraph" falsevalue="" checked="false" help="Dump the per-cell level umi graph"/>
131 <param name="dumpMtx" type="boolean" truevalue="--dumpMtx" falsevalue="" checked="false" help="Converts the default binary format of alevin for gene-count matrix into a human readable mtx (matrix market exchange) sparse format."/>
132 <param name="forceCells" type="integer" optional="true" help="Explicitly specify the number of cells."/>
133 <param name="expectCells" type="integer" optional="true" help="define a close upper bound on expected number of cells."/>
134 <param name="numCellBootstraps" type="integer" optional="true" help="Performs certain number of bootstrap and generate the mean and variance of the count matrix"/>
135 <param name="minScoreFraction" type="float" optional="true" help="This value controls the minimum allowed score for a mapping to be considered valid. It matters only when --validateMappings has been passed to Salmon. The maximum possible score for a fragment is ms = read_len * ma (or ms = (left_read_len + right_read_len) * ma for paired-end reads). The argument to --minScoreFraction determines what fraction of the maximum score s a mapping must achieve to be potentially retained. For a minimum score fraction of f, only mappings with a score less than (f * s) will be kept. Mappings with lower scores will be considered as low-quality, and will be discarded."/>
136 <param name="keepCBFraction" type="float" optional="true" help="Fraction of cellular barcodes to keep (Between 0 and 1)."/>
137 <param name="lowRegionMinNumBarcodes" type="integer" optional="true" help="Minimum number of cell barcodes to use fo learning low confidence region (defaults to 200)"/>
138 <param name="maxNumBarcodes" type="integer" optional="true" help="Maximum allowable limit to process the cell barcodes. Defaults to 100000"/>
139 </section>
140 </inputs>
141 <outputs>
142 <data name="quants_mat.gz" label="quants_mat.gz" format="txt" from_work_dir="output/alevin/quants_mat.gz">
143 <filter>optional["dumpMtx"] != "Yes"</filter>
144 </data>
145 <data name="quants_mat.mtx.gz" label="quants_mat.mtx.gz" format="mtx" from_work_dir="output/alevin/quants_mat.mtx.gz"/>
146 <data name="quants_mat_cols.txt" label="quants_mat_cols.txt" format="txt" from_work_dir="output/alevin/quants_mat_cols.txt"/>
147 <data name="quants_mat_rows.txt" label="quants_mat_rows.txt" format="txt" from_work_dir="output/alevin/quants_mat_rows.txt"/>
148 <data name="quants_tier_mat.gz" label="quants_tier_mat.gz" format="mtx" from_work_dir="output/alevin/quants_tier_mat.gz"/>
149 <data name="alevin.log" label="alevin.log" format="txt" from_work_dir="output/alevin/alevin.log">
150 <filter>allout</filter>
151 </data>
152 <data name="featureDump.txt" label="featureDump.txt" format="txt" from_work_dir="output/alevin/featureDump.txt"/>
153 <data name="whitelist.txt" label="whitelist.txt" format="txt" from_work_dir="output/alevin/whitelist.txt"/>
154 <data name="bfh.txt" label="bfh.txt" format="txt" from_work_dir="output/alevin/bfh.txt">
155 <filter>optional["dumpBfh"] == "Yes"</filter>
156 </data>
157 <data name="quants_mean_mat.gz" label="quants_mean_mat.gz" format="mtx" from_work_dir="output/alevin/quants_mean_mat.gz">
158 <filter>optional["numCellBootstraps"]</filter>
159 </data>
160 <data name="quants_var_mat.gz" label="quants_var_mat.gz" format="mtx" from_work_dir="output/alevin/quants_var_mat.gz">
161 <filter>optional["numCellBootstraps"]</filter>
162 </data>
163 <data name="quants_boot_rows.txt" label="quants_boot_rows.txt" format="txt" from_work_dir="output/alevin/quants_boot_rows.txt">
164 <filter>optional["numCellBootstraps"]</filter>
165 </data>
166 <data name="alevinmeta_info.json" format="json" label="meta_info.json" from_work_dir="output/aux_info/alevin_meta_info.json">
167 <filter>allout</filter>
168 </data>
169 <data name="ambig_info.tsv" format="tsv" label="ambig_info.tsv" from_work_dir="output/aux_info/ambig_info.tsv">
170 <filter>allout</filter>
171 </data>
172 <data name="meta_info.json" format="json" label="meta_info.json" from_work_dir="output/aux_info/meta_info.json">
173 <filter>allout</filter>
174 </data>
175 <data name="expected_bias.gz" format="txt" label="expected_bias.gz" from_work_dir="output/aux_info/fld.gz"/>
176 <data name="observed_bias.gz" format="txt" label="observed_bias.gz" from_work_dir="output/aux_info/observed_bias.gz"/>
177 <data name="observed_bias_3p.gz" format="txt" label="observed_bias_3p.gz" from_work_dir="output/aux_info/observed_bias_3p.gz"/>
178 <data name="flenDist.txt" format="txt" label="flenDist.txt" from_work_dir="output/libParams/flenDist.txt"/>
179 <data name="salmon_quant.log" format="txt" label="salmon_quant.log" from_work_dir="output/logs/salmon_quant.log">
180 <filter>allout</filter>
181 </data>
182 </outputs>
183 <tests>
184 <test expect_num_outputs="11">
185 <conditional name="refTranscriptSource">
186 <param name="TranscriptSource" value="history"/>
187 <section name="s_index">
188 <param name="fasta" value="minitranscript.fa"/>
189 </section>
190 </conditional>
191 <conditional name="pairstraight">
192 <param name="readselect" value="paired"/>
193 <param name="file1" value="fastqs/moreminifastq1.fastq.gz"/>
194 <param name="file2" value="fastqs/moreminifastq2.fastq.gz"/>
195 <param name="orientation" value="I"/>
196 <param name="strandedness" value="SR"/>
197 </conditional>
198 <param name="protocol" value="--chromium"/>
199 <param name="tgmap" value="minitxp.tsv"/>
200 <param name="dumpMtx" value="Yes"/>
201 <output name="quants_mat.mtx.gz" file="alevin_mat.mtx.gz" ftype="mtx" compare="sim_size"/>
202 </test>
203 </tests>
204 <help><![CDATA[
205 @salmonhelp@
206 @alevinhelp@
207 ]]></help>
208 <expand macro="citations"/>
209 </tool>