comparison preprocessing.xml @ 6:20c4011e1458 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/snapatac2 commit e0f59fae19e57f54ae0c351a16dd1805d12aba1d
author iuc
date Tue, 25 Nov 2025 16:40:47 +0000
parents c5c94e01a1b5
children
comparison
equal deleted inserted replaced
5:c5c94e01a1b5 6:20c4011e1458
6 <expand macro="xrefs"/> 6 <expand macro="xrefs"/>
7 <requirements> 7 <requirements>
8 <expand macro="requirements"/> 8 <expand macro="requirements"/>
9 </requirements> 9 </requirements>
10 <command detect_errors="exit_code"><![CDATA[ 10 <command detect_errors="exit_code"><![CDATA[
11 #if $method.method != 'pp.make_fragment_file' and $method.method != 'pp.import_data' 11 #if $method.method != 'pp.make_fragment_file' and $method.method != 'pp.import_contacts' and $method.method != 'pp.import_fragments'
12 @PREP_ADATA@ 12 @CMD_PREP_ADATA@
13 #end if 13 #end if
14 @CMD@ 14 @CMD@
15 ]]></command> 15 ]]></command>
16 <configfiles> 16 <configfiles>
17 <configfile name="script_file"><![CDATA[ 17 <configfile name="script_file"><![CDATA[
18 @CMD_imports@ 18 @CONF_IMPORTS@
19 19
20 #if $method.method != 'pp.make_fragment_file' and $method.method != 'pp.import_data' 20 #if $method.method != 'pp.make_fragment_file' and $method.method != 'pp.import_contacts' and $method.method != 'pp.import_fragments'
21 @CMD_read_inputs@ 21 @CONF_READ_INPUTS@
22 #end if 22 #end if
23 23
24 #if $method.method == 'pp.make_fragment_file' 24 #if $method.method == 'pp.make_fragment_file'
25 sa.pp.make_fragment_file( 25 snap.pp.make_fragment_file(
26 bam_file = '$method.bam_file', 26 bam_file = '$method.bam_file',
27 is_paired = $method.is_paired, 27 is_paired = $method.is_paired,
28 #if $method.barcode.extract_type == 'from_tag' 28 #if $method.barcode.extract_type == 'from_tag'
29 #if $method.barcode.barcode_tag != '' 29 #if $method.barcode.barcode_tag != ''
30 barcode_tag = '$method.barcode.barcode_tag', 30 barcode_tag = '$method.barcode.barcode_tag',
31 #end if 31 #end if
32 #elif $method.barcode.extract_type == 'from_read_names' 32 #else if $method.barcode.extract_type == 'from_read_names'
33 #if $method.barcode.barcode_regex != '' 33 #if $method.barcode.barcode_regex != ''
34 barcode_regex = '$method.barcode.barcode_regex', 34 barcode_regex = '$method.barcode.barcode_regex',
35 #end if 35 #end if
36 #end if 36 #end if
37 #if $method.umi_tag != '' 37 #if $method.umi_tag != ''
47 compression = 'gzip', 47 compression = 'gzip',
48 output_file = '$fragments_out', 48 output_file = '$fragments_out',
49 tempdir = "." 49 tempdir = "."
50 ) 50 )
51 51
52 #else if $method.method == 'pp.import_data' 52 #else if $method.method == 'pp.import_fragments'
53 import csv 53
54 with open('$method.chrom_sizes') as f: 54 chrom_sizes = {}
55 chr_sizes = {x[0]:int(x[1]) for x in csv.reader(f, delimiter='\t')} 55 with open('$method.chrom_sizes', 'r') as f:
56 56 for line in f:
57 sa.pp.import_data( 57 chrom, size = line.strip().split('\t')
58 fragment_file = '$method.fragment_file', 58 chrom_sizes[chrom] = int(size)
59 chrom_sizes = chr_sizes, 59
60 min_num_fragments = $method.min_num_fragments, 60 ## suggested by authors: https://github.com/scverse/SnapATAC2/blob/5a87c5ad4e0c4008fa9b58907a85b542073287b4/snapatac2-python/python/snapatac2/preprocessing/_basic.py#L244
61 if __name__ == '__main__':
62 adata = snap.pp.import_fragments(
63 fragment_file = '$method.fragment_file',
64 chrom_sizes = chrom_sizes,
65 min_num_fragments = $method.min_num_fragments,
66 sorted_by_barcode = $method.sorted_by_barcode,
67 #if $method.whitelist:
68 whitelist = '$method.whitelist',
69 #end if
70 #if $method.chrM != ''
71 #set $chrM = ([x.strip() for x in str($method.chrM).split(',')])
72 chrM = $chrM,
73 #end if
74 shift_left = $method.shift_left,
75 shift_right = $method.shift_right,
76 chunk_size = $method.chunk_size,
77 tempdir = ".",
78 backend = 'hdf5',
79 n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
80 )
81 ## Not sure how this should work and there is no documentation for it.
82 ## #else if $method.method == 'pp.import_values'
83 ## chrom_sizes = {}
84 ## with open('$method.chrom_sizes', 'r') as f:
85 ## for line in f:
86 ## chrom, size = line.strip().split('\t')
87 ## chrom_sizes[chrom] = int(size)
88 ##
89 ## snap.pp.import_values(
90 ## input_dir = 'input',
91 ## file = '$anndata_out',
92 ## chrom_sizes = chrom_sizes,
93 ## chunk_size = $method.chunk_size,
94 ## backend = 'hdf5'
95 ## )
96
97 #else if $method.method == 'pp.import_contacts'
98 chrom_sizes = {}
99 with open('$method.chrom_sizes', 'r') as f:
100 for line in f:
101 chrom, size = line.strip().split('\t')
102 chrom_sizes[chrom] = int(size)
103
104 adata = snap.pp.import_contacts(
105 contact_file = '$method.contact_file',
106 chrom_sizes = chrom_sizes,
61 sorted_by_barcode = $method.sorted_by_barcode, 107 sorted_by_barcode = $method.sorted_by_barcode,
62 #if str($method.whitelist) != 'None' 108 bin_size = $method.bin_size,
63 whitelist = '$method.whitelist',
64 #end if
65 shift_left = $method.shift_left,
66 shift_right = $method.shift_right,
67 #set $chr_mt = ([x.strip() for x in str($method.chrM).split(',')])
68 chrM = $chr_mt,
69 chunk_size = $method.chunk_size, 109 chunk_size = $method.chunk_size,
70 file = 'anndata.h5ad', 110 tempdir = ".",
71 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) 111 backend = 'hdf5'
72 ) 112 )
113 ## Not sure how this should work and there is no documentation for it.
114 ## #else if $method.method == 'pp.call_cells'
115 ## snap.pp.call_cells(
116 ## adata,
117 ## use_rep = $method.use_rep,
118 ## inplace = True,
119 ## n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
120 ## )
73 121
74 #else if $method.method == 'pp.add_tile_matrix' 122 #else if $method.method == 'pp.add_tile_matrix'
75 sa.pp.add_tile_matrix( 123 snap.pp.add_tile_matrix(
76 adata, 124 adata,
77 bin_size = $method.bin_size, 125 bin_size = $method.bin_size,
126 inplace = True,
78 chunk_size = $method.chunk_size, 127 chunk_size = $method.chunk_size,
79 #if $method.exclude_chroms != '' 128 #if $method.exclude_chroms != ''
80 #set $excl_chroms = ([x.strip() for x in str($method.exclude_chroms).split(',')]) 129 #set $excl_chroms = ([x.strip() for x in str($method.exclude_chroms).split(',')])
81 exclude_chroms = $excl_chroms, 130 exclude_chroms = $excl_chroms,
82 #end if 131 #end if
88 #end if 137 #end if
89 counting_strategy = '$method.counting_strategy', 138 counting_strategy = '$method.counting_strategy',
90 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) 139 n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
91 ) 140 )
92 141
142
143
93 #else if $method.method == 'pp.make_gene_matrix' 144 #else if $method.method == 'pp.make_gene_matrix'
94 adata = sa.pp.make_gene_matrix( 145 adata = snap.pp.make_gene_matrix(
95 adata, 146 adata,
96 gene_anno = '$method.gene_anno', 147 #if $method.gff_file_condi.gffSource == 'cached':
148 gene_anno = '$method.gff_file_condi.gff_pre_installed.fields.path',
149 #else:
150 gene_anno = '$method.gff_file_condi.gff_history',
151 #end if
152 inplace = False,
97 chunk_size = $method.chunk_size, 153 chunk_size = $method.chunk_size,
98 use_x = $method.use_x, 154 use_x = $method.use_x,
99 id_type = '$method.id_type', 155 id_type = '$method.id_type',
156 upstream = $method.upstream,
157 downstream = $method.downstream,
158 include_gene_body = $method.include_gene_body,
100 transcript_name_key = '$method.transcript_name_key', 159 transcript_name_key = '$method.transcript_name_key',
101 transcript_id_key = '$method.transcript_id_key', 160 transcript_id_key = '$method.transcript_id_key',
102 gene_name_key = '$method.gene_name_key', 161 gene_name_key = '$method.gene_name_key',
103 gene_id_key = '$method.gene_id_key', 162 gene_id_key = '$method.gene_id_key',
104 #if $method.min_frag_size 163 #if $method.min_frag_size
109 #end if 168 #end if
110 counting_strategy = '$method.counting_strategy' 169 counting_strategy = '$method.counting_strategy'
111 ) 170 )
112 171
113 #else if $method.method == 'pp.filter_cells' 172 #else if $method.method == 'pp.filter_cells'
114 sa.pp.filter_cells( 173 snap.pp.filter_cells(
115 adata, 174 adata,
116 min_counts = $method.min_counts, 175 min_counts = $method.min_counts,
117 min_tsse = $method.min_tsse, 176 min_tsse = $method.min_tsse,
118 #if $method.max_counts 177 #if str($method.max_counts) != '':
119 max_counts = $method.max_counts, 178 max_counts = $method.max_counts,
120 #end if 179 #end if
121 #if $method.max_tsse 180 #if str($method.max_tsse) != '':
122 max_tsse = $method.max_tsse, 181 max_tsse = $method.max_tsse,
123 #end if 182 #end if
124 inplace = True, 183 inplace = True,
125 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) 184 n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
126 ) 185 )
127 186
128 #else if $method.method == 'pp.select_features' 187 #else if $method.method == 'pp.select_features'
129 sa.pp.select_features( 188 snap.pp.select_features(
130 adata, 189 adata,
131 n_features = $method.n_features, 190 n_features = $method.n_features,
132 filter_lower_quantile = $method.filter_lower_quantile, 191 filter_lower_quantile = $method.filter_lower_quantile,
133 filter_upper_quantile = $method.filter_upper_quantile, 192 filter_upper_quantile = $method.filter_upper_quantile,
134 #if str($method.whitelist) != 'None' 193 #if str($method.whitelist) != 'None'
141 inplace = True, 200 inplace = True,
142 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) 201 n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
143 ) 202 )
144 203
145 #else if $method.method == 'pp.scrublet' 204 #else if $method.method == 'pp.scrublet'
146 sa.pp.scrublet( 205 ## somewhere in the SnapATAC2 code, a pandas Series is being passed where a numpy array is expected.
206 ## This is a workaround to add the nonzero method back to pandas Series.
207 ## Add the nonzero method back to pandas Series
208 import pandas as pd
209 def series_nonzero(self):
210 return (self != 0).values.nonzero()
211
212 pd.Series.nonzero = series_nonzero
213
214 snap.pp.scrublet(
147 adata, 215 adata,
148 #if $method.features 216 #if $method.features
149 features = '$method.features', 217 features = '$method.features',
150 #end if 218 #end if
151 n_comps = $method.n_comps, 219 n_comps = $method.n_comps,
159 inplace = True, 227 inplace = True,
160 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) 228 n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
161 ) 229 )
162 230
163 #else if $method.method == 'pp.filter_doublets' 231 #else if $method.method == 'pp.filter_doublets'
164 sa.pp.filter_doublets( 232 snap.pp.filter_doublets(
165 adata, 233 adata,
166 #if $method.probability_threshold 234 #if $method.probability_threshold
167 probability_threshold = $method.probability_threshold, 235 probability_threshold = $method.probability_threshold,
168 #end if 236 #end if
169 #if $method.score_threshold 237 #if $method.score_threshold
172 inplace = True, 240 inplace = True,
173 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) 241 n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
174 ) 242 )
175 243
176 #else if $method.method == 'pp.mnc_correct' 244 #else if $method.method == 'pp.mnc_correct'
177 sa.pp.mnc_correct( 245 snap.pp.mnc_correct(
178 adata, 246 adata,
179 batch = '$method.batch', 247 batch = '$method.batch',
180 n_neighbors = $method.n_neighbors, 248 n_neighbors = $method.n_neighbors,
181 n_clusters = $method.n_clusters, 249 n_clusters = $method.n_clusters,
182 n_iter = $method.n_iter, 250 n_iter = $method.n_iter,
183 @CMD_params_data_integration@ 251 @CONF_PARAMS_DATA_INTEGRATION@
184 inplace = True, 252 inplace = True,
185 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) 253 n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
186 ) 254 )
187 255
188 #else if $method.method == 'pp.harmony' 256 #else if $method.method == 'pp.harmony'
189 sa.pp.harmony( 257 snap.pp.harmony(
190 adata, 258 adata,
191 batch = '$method.batch', 259 batch = '$method.batch',
192 @CMD_params_data_integration@ 260 @CONF_PARAMS_DATA_INTEGRATION@
193 inplace = True 261 inplace = True
194 ) 262 )
195 263
196 #else if $method.method == 'pp.scanorama_integrate' 264 #else if $method.method == 'pp.scanorama_integrate'
197 sa.pp.scanorama_integrate( 265 snap.pp.scanorama_integrate(
198 adata, 266 adata,
199 batch = '$method.batch', 267 batch = '$method.batch',
200 n_neighbors = $method.n_neighbors, 268 n_neighbors = $method.n_neighbors,
201 @CMD_params_data_integration@ 269 @CONF_PARAMS_DATA_INTEGRATION@
202 inplace = True 270 inplace = True
203 ) 271 )
204 272
205 #else if $method.method == 'metrics.frag_size_distr' 273 #else if $method.method == 'ex.export_fragments'
206 sa.metrics.frag_size_distr( 274 snap.ex.export_fragments(
207 adata, 275 adata,
208 max_recorded_size = $method.max_recorded_size, 276 groupby = '$method.groupby',
209 add_key = '$method.add_key', 277 #if $method.min_frag_length:
210 inplace = True, 278 min_frag_length = $method.min_frag_length,
211 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) 279 #end if
280 #if $method.max_frag_length:
281 max_frag_length = $method.max_frag_length,
282 #end if
283 out_dir='./fragments',
284 suffix = '.bed.gz',
285 compression = 'gzip'
212 ) 286 )
213 287
214 #else if $method.method == 'metrics.tsse' 288 #else if $method.method == 'ex.export_coverage'
215 sa.metrics.tsse( 289 snap.ex.export_coverage(
216 adata, 290 adata,
217 gene_anno = '$method.gene_anno', 291 groupby = '$method.groupby',
218 inplace = True, 292 selections = None, # will add if requested by users
293 bin_size = $method.bin_size,
294 #if $method.blacklist:
295 blacklist = $method.blacklist,
296 #end if
297 normalization = '$method.normalization',
298 #if $method.include_for_norm:
299 include_for_norm = '$method.include_for_norm',
300 #end if
301 #if $method.exclude_for_norm:
302 exclude_for_norm = '$method.exclude_for_norm',
303 #end if
304 #if $method.min_frag_length:
305 min_frag_length = $method.min_frag_length,
306 #end if
307 max_frag_length = $method.max_frag_length,
308 counting_strategy = '$method.counting_strategy',
309 #if $method.smooth_base:
310 smooth_base = $method.smooth_base,
311 #end if
312 out_dir = './coverage',
313 #if str($method.output_format) == 'bedgraph':
314 suffix = '.bedgraph.gz',
315 #else
316 suffix = '.bigwig',
317 #end if
318 output_format = '$method.output_format',
319 #if $method.output_format == 'bedgraph':
320 compression = 'gzip',
321 #end if
322 tempdir = '.',
219 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) 323 n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
220 ) 324 )
221 #end if 325 #end if
222 326
223 #if $method.method != 'pp.make_fragment_file' and $method.method != 'pp.import_data' 327 #if $method.method != 'pp.make_fragment_file' and $method.method != 'ex.export_fragments' and $method.method != 'ex.export_coverage'
224 @CMD_anndata_write_outputs@ 328 @CONF_ANNDATA_WRITE_OUTPUTS@
225 #end if 329 #end if
226 ]]></configfile> 330 ]]></configfile>
227 </configfiles> 331 </configfiles>
228 <inputs> 332 <inputs>
229 <conditional name="method"> 333 <conditional name="method">
230 <param name="method" type="select" label="Method used for preprocessing"> 334 <param name="method" type="select" label="Method used for preprocessing">
231 <option value="pp.make_fragment_file">Convert a BAM file to a fragment file, using 'pp.make_fragment_file'</option> 335 <option value="pp.make_fragment_file">Convert a BAM file to a fragment file, using 'pp.make_fragment_file'</option>
232 <option value="pp.import_data">Import data fragment files and compute basic QC metrics, using 'pp.import_data'</option> 336 <option value="pp.import_fragments">Import data fragment files and compute basic QC metrics, using 'pp.import_fragments'</option>
337 <!-- Not sure how this should work and there is no documentation for it. -->
338 <!-- <option value="pp.import_values">Import values associated with base pairs, using 'pp.import_values'</option> -->
339 <option value="pp.import_contacts">Import chromatin contacts, using 'pp.import_contacts'</option>
340 <!-- Not sure how this should work and there is no documentation for it. -->
341 <!-- <option value="pp.call_cells">Calling cells based on the number of feature counts, using 'pp.call_cells'</option> -->
233 <option value="pp.add_tile_matrix">Generate cell by bin count matrix, using 'pp.add_tile_matrix'</option> 342 <option value="pp.add_tile_matrix">Generate cell by bin count matrix, using 'pp.add_tile_matrix'</option>
234 <option value="pp.make_gene_matrix">Generate cell by gene activity matrix, using 'pp.make_gene_matrix'</option> 343 <option value="pp.make_gene_matrix">Generate cell by gene activity matrix, using 'pp.make_gene_matrix'</option>
235 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using 'pp.filter_cells'</option> 344 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using 'pp.filter_cells'</option>
236 <option value="pp.select_features">Perform feature selection, using 'pp.select_features'</option> 345 <option value="pp.select_features">Perform feature selection, using 'pp.select_features'</option>
237 <option value="pp.scrublet">Compute probability of being a doublet using the scrublet algorithm, using 'pp.scrublet'</option> 346 <option value="pp.scrublet">Compute probability of being a doublet using the scrublet algorithm, using 'pp.scrublet'</option>
238 <option value="pp.filter_doublets">Remove doublets according to the doublet probability or doublet score, using 'pp.filter_doublets'</option> 347 <option value="pp.filter_doublets">Remove doublets according to the doublet probability or doublet score, using 'pp.filter_doublets'</option>
239 <option value="pp.mnc_correct">A modified MNN-Correct algorithm based on cluster centroid, using 'pp.mnc_correct'</option> 348 <option value="pp.mnc_correct">A modified MNN-Correct algorithm based on cluster centroid, using 'pp.mnc_correct'</option>
240 <option value="pp.harmony">Use harmonypy to integrate different experiments,using 'pp.harmony'</option> 349 <option value="pp.harmony">Use harmonypy to integrate different experiments,using 'pp.harmony'</option>
241 <option value="pp.scanorama_integrate">Use Scanorama [Hie19] to integrate different experiments, using 'pp.scanorama_integrate'</option> 350 <option value="pp.scanorama_integrate">Use Scanorama [Hie19] to integrate different experiments, using 'pp.scanorama_integrate'</option>
242 <option value="metrics.frag_size_distr">Compute the fragment size distribution of the dataset, using 'metrics.frag_size_distr'</option> 351 <option value="ex.export_fragments">Export and save fragments in a BED format file, using 'ex.export_fragments'</option>
243 <option value="metrics.tsse">Compute the TSS enrichment score (TSSe) for each cell, using 'metrics.tsse'</option> 352 <option value="ex.export_coverage">Export and save coverage information in a bedgraph or bigwig format file, using 'ex.export_coverage'</option>
244 </param> 353 </param>
245 <when value="pp.make_fragment_file"> 354 <when value="pp.make_fragment_file">
246 <param argument="bam_file" type="data" format="bam" label="File name of the BAM file"/> 355 <param argument="bam_file" type="data" format="bam" label="File name of the BAM file"/>
247 <param argument="is_paired" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Indicate whether the BAM file contain paired-end reads"/> 356 <param argument="is_paired" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Indicate whether the BAM file contain paired-end reads"/>
248 <conditional name="barcode"> 357 <conditional name="barcode">
249 <param name="extract_type" type="select" label="How to extract barcodes from BAM records?"> 358 <param name="extract_type" type="select" label="How to extract barcodes from BAM records?">
250 <option value="from_tag">From TAG fileds</option> 359 <option value="from_tag">From TAG fields</option>
251 <option value="from_read_names">From read names using regular expressions</option> 360 <option value="from_read_names">From read names using regular expressions</option>
252 </param> 361 </param>
253 <when value="from_tag"> 362 <when value="from_tag">
254 <param argument="barcode_tag" type="text" value="CB" optional="true" label="Extract barcodes from TAG fields of BAM records"/> 363 <param argument="barcode_tag" type="text" value="CB" optional="true" label="Extract barcodes from TAG fields of BAM records"/>
255 </when> 364 </when>
257 <param argument="barcode_regex" type="text" value="" optional="true" label="Extract barcodes from read names of BAM records using regular expressions" help="`(..:..:..:..):\w+$` extracts `bd:69:Y6:10` from `A01535:24:HW2MMDSX2:2:1359:8513:3458:bd:69:Y6:10:TGATAGGTT``"/> 366 <param argument="barcode_regex" type="text" value="" optional="true" label="Extract barcodes from read names of BAM records using regular expressions" help="`(..:..:..:..):\w+$` extracts `bd:69:Y6:10` from `A01535:24:HW2MMDSX2:2:1359:8513:3458:bd:69:Y6:10:TGATAGGTT``"/>
258 </when> 367 </when>
259 </conditional> 368 </conditional>
260 <param argument="umi_tag" type="text" value="" optional="true" label="Extract UMI from TAG fields of BAM records"/> 369 <param argument="umi_tag" type="text" value="" optional="true" label="Extract UMI from TAG fields of BAM records"/>
261 <param argument="umi_regex" type="text" value="" optional="true" label="Extract UMI from read names of BAM records using regular expressions"/> 370 <param argument="umi_regex" type="text" value="" optional="true" label="Extract UMI from read names of BAM records using regular expressions"/>
262 <expand macro="param_shift"/> 371 <expand macro="param_shift" varname="shift_left" label="Shift left" value="4"/>
372 <expand macro="param_shift" varname="shift_right" label="Shift right" value="-5"/>
263 <param argument="min_mapq" type="integer" min="0" value="30" label="Filter the reads based on MAPQ"/> 373 <param argument="min_mapq" type="integer" min="0" value="30" label="Filter the reads based on MAPQ"/>
264 <expand macro="param_chunk_size" size="50000000"/> 374 <expand macro="param_chunk_size" size="50000000"/>
265 </when> 375 </when>
266 <when value="pp.import_data"> 376 <when value="pp.import_fragments">
267 <param argument="fragment_file" type="data" format="interval" label="Fragment file, optionally compressed with gzip or zstd"/> 377 <param argument="fragment_file" type="data" format="bed" label="Fragment file to import" help=" A fragment file must contain at least 5 columns: chromosome, start, end, barcode, count"/>
268 <param argument="chrom_sizes" type="data" format="tabular" label="A tabular file containing chromosome names and sizes"/> 378 <expand macro="param_chrom_sizes"/>
269 <param argument="min_num_fragments" type="integer" value="200" label="Number of unique fragments threshold used to filter cells"/> 379 <param argument="min_num_fragments" type="integer" value="200" label="Minimum number of fragments required for a cell to pass filtering"/>
270 <param argument="sorted_by_barcode" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether the fragment file has been sorted by cell barcodes"/> 380 <param argument="sorted_by_barcode" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether the fragment file is sorted by barcode"/>
271 <param argument="whitelist" type="data" format="txt" optional="True" label="Whitelist file with a list of barcodes" help="Each line must contain a valid barcode. When provided, only barcodes in the whitelist will be retained."/> 381 <param argument="whitelist" type="data" format="bed" optional="true" label="A user provided bed file containing genome-wide whitelist regions"/>
272 <param argument="chrM" type="text" value="chrM, M" label="A list of chromosome names that are considered mitochondrial DNA"> 382 <param argument="chrM" type="text" value="" optional="true" label="A list of chromosomes to be treated as mitochondrial chromosomes, e.g. chrM, M, mt, mtDNA">
273 <expand macro="sanitize_query"/> 383 <expand macro="sanitize_query"/>
274 </param> 384 </param>
275 <param argument="shift_left" type="integer" value="0" label="Insertion site correction for the left end" help="Note this has no effect on single-end reads"/> 385 <expand macro="param_shift" varname="shift_left" label="Shift left" value="0"/>
276 <param argument="shift_right" type="integer" value="0" label="Insertion site correction for the right end" help="Note this has no effect on single-end reads"/> 386 <expand macro="param_shift" varname="shift_right" label="Shift right" value="0"/>
277 <expand macro="param_chunk_size" size="2000"/> 387 <expand macro="param_chunk_size" size="2000"/>
278 </when> 388 </when>
389 <!-- Not sure how this should work and there is no documentation for it. -->
390 <!-- <when value="pp.import_values">
391 <expand macro="param_inputs_anndata" multiple="true"/>
392 <expand macro="param_chrom_sizes"/>
393 <expand macro="param_chunk_size" size="200"/>
394 </when> -->
395 <when value="pp.import_contacts">
396 <param argument="contact_file" type="data" format="bed" label="Contact file to import"/>
397 <expand macro="param_chrom_sizes"/>
398 <param argument="sorted_by_barcode" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether the contact file is sorted by barcode"/>
399 <param argument="bin_size" type="integer" value="500000" label="The size of consecutive genomic regions used to record the counts"/>
400 <expand macro="param_chunk_size" size="200"/>
401 </when>
402 <!-- Not sure how this should work and there is no documentation for it. -->
403 <!-- <when value="pp.call_cells">
404 <expand macro="param_inputs_anndata"/>
405 <param argument="use_rep" type="float" value="" optional="true" label="The representation to use for filtering"/>
406 </when> -->
279 <when value="pp.add_tile_matrix"> 407 <when value="pp.add_tile_matrix">
280 <expand macro="inputs_anndata"/> 408 <expand macro="param_inputs_anndata"/>
281 <param argument="bin_size" type="integer" value="500" label="The size of consecutive genomic regions used to record the counts"/> 409 <param argument="bin_size" type="integer" value="500" label="The size of consecutive genomic regions used to record the counts"/>
282 <expand macro="param_chunk_size" size="500"/> 410 <expand macro="param_chunk_size" size="500"/>
283 <param argument="exclude_chroms" type="text" value="chrM, chrY, M, Y" optional="true" label="A list of chromosomes to exclude"> 411 <param argument="exclude_chroms" type="text" value="chrM, chrY, M, Y" optional="true" label="A list of chromosomes to exclude">
284 <expand macro="sanitize_query"/> 412 <expand macro="sanitize_query"/>
285 </param> 413 </param>
286 <expand macro="min_max_frag_size"/> 414 <expand macro="param_min_max_frag_size"/>
287 <expand macro="param_counting_strategy"/> 415 <expand macro="param_counting_strategy"/>
288 </when> 416 </when>
289 <when value="pp.make_gene_matrix"> 417 <when value="pp.make_gene_matrix">
290 <expand macro="inputs_anndata"/> 418 <expand macro="param_inputs_anndata"/>
291 <param argument="gene_anno" type="data" format="gtf,gff3" label="GTF/GFF file containing the gene annotation"/> 419 <expand macro="param_gene_anno"/>
292 <expand macro="param_chunk_size" size="500"/> 420 <expand macro="param_chunk_size" size="500"/>
293 <param argument="use_x" type="boolean" truevalue="True" falsevalue="False" checked="false" label="If True, use the matrix stored in .X as raw counts"/> 421 <param argument="use_x" type="boolean" truevalue="True" falsevalue="False" checked="false" label="If True, use the matrix stored in .X as raw counts"/>
294 <param argument="id_type" type="select" label="Id type, 'gene' or 'transcript'"> 422 <param argument="id_type" type="select" label="Id type, 'gene' or 'transcript'">
295 <option value="gene" selected="true">gene</option> 423 <option value="gene" selected="true">gene</option>
296 <option value="transcript">transcript</option> 424 <option value="transcript">transcript</option>
297 </param> 425 </param>
426 <param argument="upstream" type="integer" value="2000" label="Number of base pairs upstream of the regulatory domain"/>
427 <param argument="downstream" type="integer" value="0" label="Number of base pairs downstream of regulatory domain"/>
428 <param argument="include_gene_body" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether to include the gene body in the regulatory domain. If False, the TSS is used as the regulatory domain."/>
298 <param argument="transcript_name_key" type="text" value="transcript_name" label="The key of the transcript name in the gene annotation file"/> 429 <param argument="transcript_name_key" type="text" value="transcript_name" label="The key of the transcript name in the gene annotation file"/>
299 <param argument="transcript_id_key" type="text" value="transcript_id" label="The key of the transcript id in the gene annotation file"/> 430 <param argument="transcript_id_key" type="text" value="transcript_id" label="The key of the transcript id in the gene annotation file"/>
300 <param argument="gene_name_key" type="text" value="gene_name" label="The key of the gene name in the gene annotation file"/> 431 <param argument="gene_name_key" type="text" value="gene_name" label="The key of the gene name in the gene annotation file"/>
301 <param argument="gene_id_key" type="text" value="gene_id" label="The key of the gene id in the gene annotation file"/> 432 <param argument="gene_id_key" type="text" value="gene_id" label="The key of the gene id in the gene annotation file"/>
302 <expand macro="min_max_frag_size"/> 433 <expand macro="param_min_max_frag_size"/>
303 <expand macro="param_counting_strategy"/> 434 <expand macro="param_counting_strategy"/>
304 </when> 435 </when>
305 <when value="pp.filter_cells"> 436 <when value="pp.filter_cells">
306 <expand macro="inputs_anndata"/> 437 <expand macro="param_inputs_anndata"/>
307 <param argument="min_counts" type="integer" value="1000" label="Minimum number of counts required for a cell to pass filtering"/> 438 <param argument="min_counts" type="integer" value="1000" label="Minimum number of counts required for a cell to pass filtering"/>
308 <param argument="min_tsse" type="float" value="5.0" label="Minimum TSS enrichemnt score required for a cell to pass filtering"/> 439 <param argument="min_tsse" type="float" value="5.0" label="Minimum TSS enrichment score required for a cell to pass filtering"/>
309 <param argument="max_counts" type="integer" value="" optional="true" label="Maximum number of counts required for a cell to pass filtering"/> 440 <param argument="max_counts" type="integer" value="" optional="true" label="Maximum number of counts required for a cell to pass filtering"/>
310 <param argument="max_tsse" type="float" value="" optional="true" label="Maximum TSS enrichment score expressed required for a cell to pass filtering"/> 441 <param argument="max_tsse" type="float" value="" optional="true" label="Maximum TSS enrichment score expressed required for a cell to pass filtering"/>
311 </when> 442 </when>
312 <when value="pp.select_features"> 443 <when value="pp.select_features">
313 <expand macro="inputs_anndata"/> 444 <expand macro="param_inputs_anndata"/>
314 <param argument="n_features" type="integer" min="1" value="500000" label="Number of features to keep"/> 445 <param argument="n_features" type="integer" min="1" value="500000" label="Number of features to keep"/>
315 <param argument="filter_lower_quantile" type="float" min="0" value="0.005" label="Lower quantile of the feature count distribution to filter out"/> 446 <param argument="filter_lower_quantile" type="float" min="0" value="0.005" label="Lower quantile of the feature count distribution to filter out"/>
316 <param argument="filter_upper_quantile" type="float" min="0" value="0.005" label="Upper quantile of the feature count distribution to filter out"/> 447 <param argument="filter_upper_quantile" type="float" min="0" value="0.005" label="Upper quantile of the feature count distribution to filter out"/>
317 <param argument="whitelist" type="data" format="bed" optional="true" label="A user provided bed file containing genome-wide whitelist regions"/> 448 <param argument="whitelist" type="data" format="bed" optional="true" label="A user provided bed file containing genome-wide whitelist regions"/>
318 <param argument="blacklist" type="data" format="bed" optional="true" label="A user provided bed file containing genome-wide blacklist regions"/> 449 <param argument="blacklist" type="data" format="bed" optional="true" label="A user provided bed file containing genome-wide blacklist regions"/>
319 <param argument="max_iter" type="integer" value="1" label="If greater than 1, this function will perform iterative clustering and feature selection"/> 450 <param argument="max_iter" type="integer" value="1" label="If greater than 1, this function will perform iterative clustering and feature selection"/>
320 </when> 451 </when>
321 <when value="pp.scrublet"> 452 <when value="pp.scrublet">
322 <expand macro="inputs_anndata"/> 453 <expand macro="param_inputs_anndata"/>
323 <param argument="features" type="text" value="" optional="true" label=" Boolean index mask, where True means that the feature is kept, and False means the feature is removed."/> 454 <param argument="features" type="text" value="" optional="true" label=" Boolean index mask, where True means that the feature is kept, and False means the feature is removed."/>
324 <param argument="n_comps" type="integer" value="15" label="Number of components" help="15 is usually sufficient. The algorithm is not sensitive to this parameter"/> 455 <expand macro="param_n_comps" value="15" label="Number of components" help="15 is usually sufficient. The algorithm is not sensitive to this parameter"/>
325 <param argument="sim_doublet_ratio" type="float" value="2.0" label="Number of doublets to simulate relative to the number of observed cells"/> 456 <param argument="sim_doublet_ratio" type="float" value="2.0" label="Number of doublets to simulate relative to the number of observed cells"/>
326 <param argument="expected_doublet_rate" type="float" value="0.1" label="Expected doublet rate"/> 457 <param argument="expected_doublet_rate" type="float" value="0.1" label="Expected doublet rate"/>
327 <param argument="n_neighbors" type="integer" value="" optional="true" label="Number of neighbors used to construct the KNN graph of observed cells and simulated doublets"/> 458 <param argument="n_neighbors" type="integer" value="" optional="true" label="Number of neighbors used to construct the KNN graph of observed cells and simulated doublets"/>
328 <param argument="use_approx_neighbors" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Whether to use approximate search"/> 459 <param argument="use_approx_neighbors" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Whether to use approximate search"/>
329 <param argument="random_state" type="integer" value="0" label="Random state"/> 460 <param argument="random_state" type="integer" value="0" label="Random state"/>
330 </when> 461 </when>
331 <when value="pp.filter_doublets"> 462 <when value="pp.filter_doublets">
332 <expand macro="inputs_anndata"/> 463 <expand macro="param_inputs_anndata"/>
333 <param argument="probability_threshold" type="float" value="0.5" label="Threshold for doublet probability"/> 464 <param argument="probability_threshold" type="float" value="0.5" label="Threshold for doublet probability"/>
334 <param argument="score_threshold" type="float" value="" optional="true" label="Threshold for doublet score"/> 465 <param argument="score_threshold" type="float" value="" optional="true" label="Threshold for doublet score"/>
335 </when> 466 </when>
336 <when value="pp.mnc_correct"> 467 <when value="pp.mnc_correct">
337 <expand macro="inputs_anndata"/> 468 <expand macro="param_inputs_anndata"/>
338 <param argument="batch" type="text" value="batch" label="Batch labels for cells"> 469 <param argument="batch" type="text" value="batch" label="Batch labels for cells">
339 <expand macro="sanitize_query"/> 470 <expand macro="sanitize_query"/>
340 </param> 471 </param>
341 <param argument="n_neighbors" type="integer" value="5" label="Number of mutual nearest neighbors"/> 472 <param argument="n_neighbors" type="integer" value="5" label="Number of mutual nearest neighbors"/>
342 <param argument="n_clusters" type="integer" value="40" label="Number of clusters"/> 473 <param argument="n_clusters" type="integer" value="40" label="Number of clusters"/>
343 <param argument="n_iter" type="integer" value="1" label="Number of iterations"/> 474 <param argument="n_iter" type="integer" value="1" label="Number of iterations"/>
344 <expand macro="params_data_integration"/> 475 <expand macro="param_data_integration"/>
345 </when> 476 </when>
346 <when value="pp.harmony"> 477 <when value="pp.harmony">
347 <expand macro="inputs_anndata"/> 478 <expand macro="param_inputs_anndata"/>
348 <param argument="batch" type="text" value="batch" label="Batch labels for cells"> 479 <param argument="batch" type="text" value="batch" label="Batch labels for cells">
349 <expand macro="sanitize_query"/> 480 <expand macro="sanitize_query"/>
350 </param> 481 </param>
351 <expand macro="params_data_integration"/> 482 <expand macro="param_data_integration"/>
352 </when> 483 </when>
353 <when value="pp.scanorama_integrate"> 484 <when value="pp.scanorama_integrate">
354 <expand macro="inputs_anndata"/> 485 <expand macro="param_inputs_anndata"/>
355 <param argument="batch" type="text" value="batch" label="Batch labels for cells"> 486 <param argument="batch" type="text" value="batch" label="Batch labels for cells">
356 <expand macro="sanitize_query"/> 487 <expand macro="sanitize_query"/>
357 </param> 488 </param>
358 <param argument="n_neighbors" type="integer" value="20" label="Number of mutual nearest neighbors"/> 489 <param argument="n_neighbors" type="integer" value="20" label="Number of mutual nearest neighbors"/>
359 <expand macro="params_data_integration"/> 490 <expand macro="param_data_integration"/>
360 </when> 491 </when>
361 <when value="metrics.frag_size_distr"> 492 <when value="ex.export_fragments">
362 <!-- TODO move this to plotting --> 493 <expand macro="param_inputs_anndata"/>
363 <expand macro="inputs_anndata"/> 494 <expand macro="param_groupby"/>
364 <param argument="max_recorded_size" type="integer" min="1" value="1000" label="The maximum fragment size to record in the result"/> 495 <param argument="--min_frag_length" type="integer" optional="true" min="0" value="" label="Minimum fragment length to be included in the computation"/>
365 <param argument="add_key" type="text" value="frag_size_distr" label="Key used to store the result in `adata.uns`"/> 496 <param argument="--max_frag_length" type="integer" optional="true" min="0" value="" label="Maximum fragment length to be included in the computation"/>
366 </when> 497 </when>
367 <when value="metrics.tsse"> 498 <when value="ex.export_coverage">
368 <!-- TODO move this to plotting --> 499 <expand macro="param_inputs_anndata"/>
369 <expand macro="inputs_anndata"/> 500 <expand macro="param_groupby"/>
370 <param argument="gene_anno" type="data" format="gtf,gff3" label="GTF/GFF file containing the gene annotation"/> 501 <param argument="--bin_size" type="integer" min="0" value="10" label="Size of the bins, in bases, for the output of the bigwig/bedgraph file"/>
502 <param argument="--blacklist" type="data" format="bed" optional="true" label="A BED file containing the blacklisted regions"/>
503 <param argument="--normalization" type="select" label="Normalization method for coverage calculation">
504 <option value="RPKM" selected="true">RPKM (per bin) = #reads per bin / (#mapped_reads (in millions) * bin length (kb))</option>
505 <option value="None">No normalization</option>
506 <option value="CPM">CPM (per bin) = #reads per bin / #mapped_reads (in millions)</option>
507 <option value="BPM">BPM (per bin) = #reads per bin / sum of all reads per bin (in millions)</option>
508 </param>
509 <param argument="--include_for_norm" type="data" format="bed" optional="true" label="A BED file containing the genomic loci to include for normalization"/>
510 <param argument="--exclude_for_norm" type="data" format="bed" optional="true" label="A BED file containing the genomic loci to exclude for normalization"/>
511 <param argument="--min_frag_length" type="integer" optional="true" min="0" value="" label="Minimum fragment length to be included in the computation"/>
512 <param argument="--max_frag_length" type="integer" min="0" value="2000" label="Maximum fragment length to be included in the computation"/>
513 <param argument="--counting_strategy" type="select" label="The strategy to compute feature counts">
514 <option value="fragment" selected="true">fragment- the feature counts are assigned based on the number of fragments that overlap with a region of interest</option>
515 <option value="insertion">insertion - the feature counts are assigned based on the number of insertions that overlap with a region of interest</option>
516 <option value="paired-insertion">paired-insertion - same as insertion but it only counts the insertions once if the pair of insertions of a fragment are both within the same region of interest</option>
517 </param>
518 <param argument="--smooth_base" type="integer" optional="true" min="0" value="" label="Length of the smoothing window in bases for the output of the bigwig/bedgraph file"/>
519 <param argument="--output_format" type="select" label="The output format">
520 <option value="bigwig" selected="true">bigwig</option>
521 <option value="bedgraph">bedgraph</option>
522 </param>
371 </when> 523 </when>
372 </conditional> 524 </conditional>
373 <expand macro="inputs_common_advanced"/> 525 <expand macro="param_common_advanced"/>
374 </inputs> 526 </inputs>
375 <outputs> 527 <outputs>
376 <data name="fragments_out" format="interval" label="${tool.name} (${method.method}) on ${on_string}: Fragment file"> 528 <data name="fragments_out" format="interval" label="${tool.name} (${method.method}) on ${on_string}: Fragment file">
377 <filter>method['method'] == 'pp.make_fragment_file'</filter> 529 <filter>method['method'] == 'pp.make_fragment_file'</filter>
378 </data> 530 </data>
379 <data name="anndata_out" format="h5ad" from_work_dir="anndata.h5ad" label="${tool.name} (${method.method}) on ${on_string}: Annotated data matrix"> 531 <data name="anndata_out" format="h5ad" from_work_dir="anndata.h5ad.gz" label="${tool.name} (${method.method}) on ${on_string}: Annotated data matrix">
380 <filter>method['method'] != 'pp.make_fragment_file'</filter> 532 <filter>method['method'] != 'pp.make_fragment_file' and method['method'] != 'ex.export_fragments' and method['method'] != 'ex.export_coverage'</filter>
533 <change_format>
534 <when input="method.method" value="pp.import_fragments" format="h5" />
535 </change_format>
381 </data> 536 </data>
537 <collection name="export_fragment" type="list" format="bed" label="${tool.name} (${method.method}) on ${on_string}: exported fragments">
538 <discover_datasets pattern="__name_and_ext__" directory="fragments"/>
539 <filter>method['method'] == 'ex.export_fragments'</filter>
540 </collection>
541 <collection name="export_coverage" type="list" label="${tool.name} (${method.method}) on ${on_string}: exported coverage">
542 <discover_datasets pattern="__name_and_ext__" directory="coverage"/>
543 <filter>method['method'] == 'ex.export_coverage'</filter>
544 </collection>
382 <data name="hidden_output" format="txt" label="Log file"> 545 <data name="hidden_output" format="txt" label="Log file">
383 <filter>advanced_common['show_log']</filter> 546 <filter>advanced_common['show_log']</filter>
384 </data> 547 </data>
385 </outputs> 548 </outputs>
386 <tests> 549 <tests>
387 <test expect_num_outputs="1"> 550 <test expect_num_outputs="1">
388 <!-- pp.make_fragment_file --> 551 <!-- pp.make_fragment_file -->
389 <conditional name="method"> 552 <conditional name="method">
390 <param name="method" value="pp.make_fragment_file"/> 553 <param name="method" value="pp.make_fragment_file"/>
391 <param name="bam_file" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21_subsample.bam"/> 554 <param name="bam_file" location="https://zenodo.org/records/17512085/files/pbmc_500_chr21_subsample.bam"/>
392 <param name="is_paired" value="true"/> 555 <param name="is_paired" value="true"/>
393 <conditional name="barcode"> 556 <conditional name="barcode">
394 <param name="extract_type" value="from_tag"/> 557 <param name="extract_type" value="from_tag"/>
395 <param name="barcode_tag" value="CB"/> 558 <param name="barcode_tag" value="CB"/>
396 </conditional> 559 </conditional>
397 <param name="shift_left" value="4"/> 560 <param name="shift_left" value="4"/>
398 <param name="shift_right" value="-5"/> 561 <param name="shift_right" value="-5"/>
399 <param name="min_mapq" value="10"/> 562 <param name="min_mapq" value="10"/>
400 <param name="chunk_size" value="50000000"/> 563 <param name="chunk_size" value="50000000"/>
401 </conditional> 564 </conditional>
402 <output name="fragments_out" location="https://zenodo.org/records/11260316/files/pp.make_fragment_file.pbmc_500_chr21.tsv.gz" ftype="interval" compare="sim_size" delta_frac="0.1"/> 565 <output name="fragments_out" location="https://zenodo.org/records/17512085/files/pp.make_fragment_file.pbmc_500_chr21.tsv.gz" ftype="interval" compare="sim_size" delta_frac="0.1"/>
403 </test> 566 </test>
404 <test expect_num_outputs="2"> 567 <test expect_num_outputs="2">
405 <!-- pp.pp.import_data --> 568 <!-- pp.import_fragments -->
406 <conditional name="method"> 569 <conditional name="method">
407 <param name="method" value="pp.import_data"/> 570 <param name="method" value="pp.import_fragments"/>
408 <param name="fragment_file" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.tsv.gz"/> 571 <param name="fragment_file" location="https://zenodo.org/records/17512085/files/pp.make_fragment_file.pbmc_500_chr21.tsv.gz"/>
409 <param name="chrom_sizes" location="https://zenodo.org/records/11260316/files/chr21_size.tabular"/> 572 <param name="chrom_sizes" location="https://zenodo.org/records/17512085/files/chrom_size.tabular"/>
410 <param name="min_num_fragments" value="1"/> 573 <param name="min_num_fragments" value="200"/>
411 <param name="sorted_by_barcode" value="False"/> 574 <param name="sorted_by_barcode" value="true"/>
575 <param name="chrM" value="" />
412 <param name="shift_left" value="0"/> 576 <param name="shift_left" value="0"/>
413 <param name="chrM" value="chrM, M"/>
414 <param name="shift_right" value="0"/> 577 <param name="shift_right" value="0"/>
415 <param name="chunk_size" value="1000"/> 578 <param name="chunk_size" value="2000"/>
416 </conditional> 579 </conditional>
417 <section name="advanced_common"> 580 <section name="advanced_common">
418 <param name="show_log" value="true"/> 581 <param name="show_log" value="true" />
419 </section> 582 </section>
420 <output name="hidden_output"> 583 <output name="hidden_output">
421 <assert_contents> 584 <assert_contents>
422 <has_text_matching expression="sa.pp.import_data"/> 585 <has_text_matching expression="snap.pp.import_fragments"/>
423 <has_text_matching expression="min_num_fragments = 1"/> 586 <has_text_matching expression="chrom_sizes"/>
424 <has_text_matching expression="sorted_by_barcode = False"/>
425 <has_text_matching expression="shift_left = 0"/> 587 <has_text_matching expression="shift_left = 0"/>
426 <has_text_matching expression="chrM = \['chrM', 'M'\]"/>
427 <has_text_matching expression="shift_right = 0"/> 588 <has_text_matching expression="shift_right = 0"/>
428 <has_text_matching expression="chunk_size = 1000"/> 589 <has_text_matching expression="chunk_size = 2000"/>
429 </assert_contents> 590 </assert_contents>
430 </output> 591 </output>
431 <output name="anndata_out" location="https://zenodo.org/records/11260316/files/pp.import_data.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1"/> 592 <output name="anndata_out" ftype="h5">
432 </test> 593 <assert_contents>
433 <test expect_num_outputs="2"> 594 <has_h5_keys keys="obs/n_fragment,obs/frac_dup,obs/frac_mito"/>
434 <!-- pp.make_gene_matrix --> 595 <has_h5_keys keys="uns/reference_sequences"/>
596 <has_h5_keys keys="obsm/fragment_paired"/>
597 </assert_contents>
598 </output>
599 </test>
600 <test expect_num_outputs="2">
601 <!-- pp.make_gene_matrix - history -->
435 <conditional name="method"> 602 <conditional name="method">
436 <param name="method" value="pp.make_gene_matrix"/> 603 <param name="method" value="pp.make_gene_matrix"/>
437 <param name="adata" location="https://zenodo.org/records/11260316/files/tl.leiden.modularity.pbmc_500_chr21.h5ad"/> 604 <param name="adata" location="https://zenodo.org/records/17512085/files/tl.leiden.modularity.pbmc_500_chr21.h5ad"/>
438 <param name="gene_anno" location="https://zenodo.org/records/11260316/files/chr21.gff3.gz"/> 605 <conditional name="gff_file_condi">
606 <param name="gffSource" value="history"/>
607 <param name="gff_history" location="https://zenodo.org/records/17512085/files/chr21.gff3.gz"/>
608 </conditional>
439 <param name="chunk_size" value="500"/> 609 <param name="chunk_size" value="500"/>
440 <param name="use_x" value="False"/> 610 <param name="use_x" value="False"/>
441 <param name="id_type" value="gene"/> 611 <param name="id_type" value="gene"/>
442 <param name="transcript_name_key" value="transcript_name"/> 612 <param name="transcript_name_key" value="transcript_name"/>
443 <param name="transcript_id_key" value="transcript_id"/> 613 <param name="transcript_id_key" value="transcript_id"/>
448 <section name="advanced_common"> 618 <section name="advanced_common">
449 <param name="show_log" value="true" /> 619 <param name="show_log" value="true" />
450 </section> 620 </section>
451 <output name="hidden_output"> 621 <output name="hidden_output">
452 <assert_contents> 622 <assert_contents>
453 <has_text_matching expression="sa.pp.make_gene_matrix"/> 623 <has_text_matching expression="snap.pp.make_gene_matrix"/>
454 <has_text_matching expression="chunk_size = 500"/> 624 <has_text_matching expression="chunk_size = 500"/>
455 <has_text_matching expression="use_x = False"/> 625 <has_text_matching expression="use_x = False"/>
456 <has_text_matching expression="id_type = 'gene'"/> 626 <has_text_matching expression="id_type = 'gene'"/>
457 <has_text_matching expression="transcript_name_key = 'transcript_name'"/> 627 <has_text_matching expression="transcript_name_key = 'transcript_name'"/>
458 <has_text_matching expression="transcript_id_key = 'transcript_id'"/> 628 <has_text_matching expression="transcript_id_key = 'transcript_id'"/>
459 <has_text_matching expression="gene_name_key = 'gene_name'"/> 629 <has_text_matching expression="gene_name_key = 'gene_name'"/>
460 <has_text_matching expression="gene_id_key = 'gene_id'"/> 630 <has_text_matching expression="gene_id_key = 'gene_id'"/>
461 <has_text_matching expression="counting_strategy = 'insertion'"/> 631 <has_text_matching expression="counting_strategy = 'insertion'"/>
462 </assert_contents> 632 </assert_contents>
463 </output> 633 </output>
464 <output name="anndata_out" location="https://zenodo.org/records/12548681/files/pp.make_gene_matrix.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" /> 634 <output name="anndata_out" ftype="h5ad">
465 </test> 635 <assert_contents>
466 <test expect_num_outputs="2"> 636 <has_h5_keys keys="obs/n_fragment,obs/frac_dup,obs/tsse"/>
467 <!-- metrics.tsse --> 637 </assert_contents>
468 <conditional name="method"> 638 </output>
469 <param name="method" value="metrics.tsse"/> 639 </test>
470 <param name="adata" location="https://zenodo.org/records/11260316/files/pp.import_data.pbmc_500_chr21.h5ad"/> 640 <test expect_num_outputs="2">
471 <param name="gene_anno" location="https://zenodo.org/records/11260316/files/chr21.gff3.gz"/> 641 <!-- pp.make_gene_matrix - cached -->
642 <conditional name="method">
643 <param name="method" value="pp.make_gene_matrix"/>
644 <param name="adata" location="https://zenodo.org/records/17512085/files/tl.leiden.modularity.pbmc_500_chr21.h5ad"/>
645 <conditional name="gff_file_condi">
646 <param name="gffSource" value="cached"/>
647 <param name="gff_pre_installed" value="hg38"/>
472 </conditional> 648 </conditional>
473 <section name="advanced_common"> 649 <param name="chunk_size" value="500"/>
474 <param name="show_log" value="true" /> 650 <param name="use_x" value="False"/>
475 </section> 651 <param name="id_type" value="gene"/>
476 <output name="hidden_output"> 652 <param name="transcript_name_key" value="transcript_name"/>
477 <assert_contents> 653 <param name="transcript_id_key" value="transcript_id"/>
478 <has_text_matching expression="sa.metrics.tsse"/> 654 <param name="gene_name_key" value="gene_name"/>
479 </assert_contents> 655 <param name="gene_id_key" value="gene_id"/>
480 </output> 656 <param name="counting_strategy" value="insertion"/>
481 <output name="anndata_out" location="https://zenodo.org/records/11260316/files/metrics.tsse.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" /> 657 </conditional>
658 <section name="advanced_common">
659 <param name="show_log" value="true" />
660 </section>
661 <output name="hidden_output">
662 <assert_contents>
663 <has_text_matching expression="snap.pp.make_gene_matrix"/>
664 <has_text_matching expression="chunk_size = 500"/>
665 <has_text_matching expression="use_x = False"/>
666 <has_text_matching expression="id_type = 'gene'"/>
667 <has_text_matching expression="transcript_name_key = 'transcript_name'"/>
668 <has_text_matching expression="transcript_id_key = 'transcript_id'"/>
669 <has_text_matching expression="gene_name_key = 'gene_name'"/>
670 <has_text_matching expression="gene_id_key = 'gene_id'"/>
671 <has_text_matching expression="counting_strategy = 'insertion'"/>
672 </assert_contents>
673 </output>
674 <output name="anndata_out" ftype="h5ad">
675 <assert_contents>
676 <has_h5_keys keys="obs/n_fragment,obs/frac_dup,obs/tsse"/>
677 </assert_contents>
678 </output>
482 </test> 679 </test>
483 <test expect_num_outputs="2"> 680 <test expect_num_outputs="2">
484 <!-- pp.filter_cells --> 681 <!-- pp.filter_cells -->
485 <conditional name="method"> 682 <conditional name="method">
486 <param name="method" value="pp.filter_cells"/> 683 <param name="method" value="pp.filter_cells"/>
487 <param name="adata" location="https://zenodo.org/records/11260316/files/metrics.tsse.pbmc_500_chr21.h5ad"/> 684 <param name="adata" location="https://zenodo.org/records/17512085/files/pp.make_gene_matrix.pbmc_500_chr21.h5ad"/>
488 <param name="min_counts" value="200"/> 685 <param name="min_counts" value="500"/>
489 <param name="min_tsse" value="5"/> 686 <param name="min_tsse" value="1"/>
490 <param name="max_counts" value="10000"/> 687 <param name="max_counts" value="10000"/>
491 </conditional> 688 </conditional>
492 <section name="advanced_common"> 689 <section name="advanced_common">
493 <param name="show_log" value="true" /> 690 <param name="show_log" value="true" />
494 </section> 691 </section>
495 <output name="hidden_output"> 692 <assert_stdout>
496 <assert_contents> 693 <has_text_matching expression="6 × 9342"/>
497 <has_text_matching expression="sa.pp.filter_cells"/> 694 </assert_stdout>
498 <has_text_matching expression="min_counts = 200"/> 695 <output name="hidden_output">
499 <has_text_matching expression="min_tsse = 5"/> 696 <assert_contents>
697 <has_text_matching expression="snap.pp.filter_cells"/>
698 <has_text_matching expression="min_counts = 500"/>
699 <has_text_matching expression="min_tsse = 1"/>
500 <has_text_matching expression="max_counts = 10000"/> 700 <has_text_matching expression="max_counts = 10000"/>
501 </assert_contents> 701 </assert_contents>
502 </output> 702 </output>
503 <output name="anndata_out" location="https://zenodo.org/records/11260316/files/pp.filter_cells.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" /> 703 <output name="anndata_out" ftype="h5ad">
704 <assert_contents>
705 <has_h5_keys keys="obs/n_fragment"/>
706 </assert_contents>
707 </output>
504 </test> 708 </test>
505 <test expect_num_outputs="2"> 709 <test expect_num_outputs="2">
506 <!-- pp.add_tile_matrix --> 710 <!-- pp.add_tile_matrix -->
507 <conditional name="method"> 711 <conditional name="method">
508 <param name="method" value="pp.add_tile_matrix"/> 712 <param name="method" value="pp.add_tile_matrix"/>
509 <param name="adata" location="https://zenodo.org/records/11260316/files/pp.filter_cells.pbmc_500_chr21.h5ad"/> 713 <param name="adata" location="https://zenodo.org/records/17512085/files/pp.filter_cells.pbmc_500_chr21.h5ad"/>
510 <param name="bin_size" value="5000"/> 714 <param name="bin_size" value="5000"/>
511 <param name="chunk_size" value="500"/> 715 <param name="chunk_size" value="500"/>
512 <param name="exclude_chroms" value="chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr20, chr22, chrX, chrY"/> 716 <param name="exclude_chroms" value="chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr20, chr22, chrX, chrY"/>
513 <param name="counting_strategy" value="insertion"/> 717 <param name="counting_strategy" value="insertion"/>
514 </conditional> 718 </conditional>
515 <section name="advanced_common"> 719 <section name="advanced_common">
516 <param name="show_log" value="true" /> 720 <param name="show_log" value="true" />
517 </section> 721 </section>
518 <output name="hidden_output"> 722 <output name="hidden_output">
519 <assert_contents> 723 <assert_contents>
520 <has_text_matching expression="sa.pp.add_tile_matrix"/> 724 <has_text_matching expression="snap.pp.add_tile_matrix"/>
521 <has_text_matching expression="bin_size = 5000"/> 725 <has_text_matching expression="bin_size = 5000"/>
522 <has_text_matching expression="chunk_size = 500"/> 726 <has_text_matching expression="chunk_size = 500"/>
523 <has_text_matching expression="exclude_chroms = \['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chr20', 'chr22', 'chrX', 'chrY'\]"/> 727 <has_text_matching expression="exclude_chroms = \['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chr20', 'chr22', 'chrX', 'chrY'\]"/>
524 <has_text_matching expression="counting_strategy = 'insertion'"/> 728 <has_text_matching expression="counting_strategy = 'insertion'"/>
525 </assert_contents> 729 </assert_contents>
526 </output> 730 </output>
527 <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.add_tile_matrix.pbmc_500_chr21.h5ad"/> 731 <output name="anndata_out" ftype="h5ad">
732 <assert_contents>
733 <has_h5_keys keys="obs/n_fragment,obs/frac_dup,obs/frac_mito,obs/tsse"/>
734 <has_h5_keys keys="uns/reference_sequences"/>
735 <has_h5_keys keys="obsm/fragment_paired"/>
736 </assert_contents>
737 </output>
528 </test> 738 </test>
529 <test expect_num_outputs="2"> 739 <test expect_num_outputs="2">
530 <!-- pp.add_tile_matrix counting_strategy fragment --> 740 <!-- pp.add_tile_matrix counting_strategy fragment -->
531 <conditional name="method"> 741 <conditional name="method">
532 <param name="method" value="pp.add_tile_matrix"/> 742 <param name="method" value="pp.add_tile_matrix"/>
533 <param name="adata" location="https://zenodo.org/records/11260316/files/pp.filter_cells.pbmc_500_chr21.h5ad"/> 743 <param name="adata" location="https://zenodo.org/records/17512085/files/pp.filter_cells.pbmc_500_chr21.h5ad"/>
534 <param name="bin_size" value="5000"/> 744 <param name="bin_size" value="5000"/>
535 <param name="chunk_size" value="500"/> 745 <param name="chunk_size" value="500"/>
536 <param name="exclude_chroms" value="chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr20, chr22, chrX, chrY"/> 746 <param name="exclude_chroms" value="chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr20, chr22, chrX, chrY"/>
537 <param name="counting_strategy" value="fragment"/> 747 <param name="counting_strategy" value="fragment"/>
538 </conditional> 748 </conditional>
539 <section name="advanced_common"> 749 <section name="advanced_common">
540 <param name="show_log" value="true" /> 750 <param name="show_log" value="true" />
541 </section> 751 </section>
542 <output name="hidden_output"> 752 <output name="hidden_output">
543 <assert_contents> 753 <assert_contents>
544 <has_text_matching expression="sa.pp.add_tile_matrix"/> 754 <has_text_matching expression="snap.pp.add_tile_matrix"/>
545 <has_text_matching expression="bin_size = 5000"/> 755 <has_text_matching expression="bin_size = 5000"/>
546 <has_text_matching expression="chunk_size = 500"/> 756 <has_text_matching expression="chunk_size = 500"/>
547 <has_text_matching expression="exclude_chroms = \['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chr20', 'chr22', 'chrX', 'chrY'\]"/> 757 <has_text_matching expression="exclude_chroms = \['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chr20', 'chr22', 'chrX', 'chrY'\]"/>
548 <has_text_matching expression="counting_strategy = 'fragment'"/> 758 <has_text_matching expression="counting_strategy = 'fragment'"/>
549 </assert_contents> 759 </assert_contents>
550 </output> 760 </output>
551 <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.add_tile_matrix.pbmc_500_chr21.h5ad"/> 761 <output name="anndata_out" ftype="h5ad">
762 <assert_contents>
763 <has_h5_keys keys="obs/n_fragment,obs/frac_dup,obs/tsse"/>
764 <has_h5_keys keys="obsm/fragment_paired"/>
765 </assert_contents>
766 </output>
552 </test> 767 </test>
553 <test expect_num_outputs="2"> 768 <test expect_num_outputs="2">
554 <!-- pp.select_features --> 769 <!-- pp.select_features -->
555 <conditional name="method"> 770 <conditional name="method">
556 <param name="method" value="pp.select_features"/> 771 <param name="method" value="pp.select_features"/>
557 <param name="adata" location="https://zenodo.org/records/11260316/files/pp.add_tile_matrix.pbmc_500_chr21.h5ad"/> 772 <param name="adata" location="https://zenodo.org/records/17512085/files/pp.add_tile_matrix.pbmc_500_chr21.h5ad"/>
558 <param name="n_features" value="15000"/> 773 <param name="n_features" value="15000"/>
559 </conditional> 774 </conditional>
560 <section name="advanced_common"> 775 <section name="advanced_common">
561 <param name="show_log" value="true" /> 776 <param name="show_log" value="true" />
562 </section> 777 </section>
563 <output name="hidden_output"> 778 <output name="hidden_output">
564 <assert_contents> 779 <assert_contents>
565 <has_text_matching expression="sa.pp.select_features"/> 780 <has_text_matching expression="snap.pp.select_features"/>
566 <has_text_matching expression="n_features = 15000"/> 781 <has_text_matching expression="n_features = 15000"/>
567 </assert_contents> 782 </assert_contents>
568 </output> 783 </output>
569 <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.select_features.pbmc_500_chr21.h5ad"/> 784 <output name="anndata_out" ftype="h5ad">
785 <assert_contents>
786 <has_h5_keys keys="var/count,var/selected"/>
787 </assert_contents>
788 </output>
570 </test> 789 </test>
571 <test expect_num_outputs="2"> 790 <test expect_num_outputs="2">
572 <!-- pp.scrublet --> 791 <!-- pp.scrublet -->
573 <conditional name="method"> 792 <conditional name="method">
574 <param name="method" value="pp.scrublet"/> 793 <param name="method" value="pp.scrublet"/>
575 <param name="adata" location="https://zenodo.org/records/11260316/files/pp.select_features.pbmc_500_chr21.h5ad"/> 794 <param name="adata" location="https://zenodo.org/records/17512085/files/pp.select_features.pbmc_500_chr21.h5ad"/>
576 <param name="n_comps" value="15"/> 795 <param name="n_comps" value="15"/>
577 <param name="sim_doublet_ratio" value="2.0"/> 796 <param name="sim_doublet_ratio" value="2.0"/>
578 <param name="expected_doublet_rate" value="0.1"/> 797 <param name="expected_doublet_rate" value="0.1"/>
579 <param name="random_state" value="0"/> 798 <param name="random_state" value="0"/>
580 </conditional> 799 </conditional>
581 <section name="advanced_common"> 800 <section name="advanced_common">
582 <param name="show_log" value="true" /> 801 <param name="show_log" value="true" />
583 </section> 802 </section>
584 <output name="hidden_output"> 803 <assert_stdout>
585 <assert_contents> 804 <has_text_matching expression="158 × 9342"/>
586 <has_text_matching expression="sa.pp.scrublet"/> 805 </assert_stdout>
806 <output name="hidden_output">
807 <assert_contents>
808 <has_text_matching expression="snap.pp.scrublet"/>
587 <has_text_matching expression="n_comps = 15"/> 809 <has_text_matching expression="n_comps = 15"/>
588 <has_text_matching expression="sim_doublet_ratio = 2.0"/> 810 <has_text_matching expression="sim_doublet_ratio = 2.0"/>
589 <has_text_matching expression="expected_doublet_rate = 0.1"/> 811 <has_text_matching expression="expected_doublet_rate = 0.1"/>
590 <has_text_matching expression="random_state = 0"/> 812 <has_text_matching expression="random_state = 0"/>
591 </assert_contents> 813 </assert_contents>
592 </output> 814 </output>
593 <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.scrublet.pbmc_500_chr21.h5ad"/> 815 <output name="anndata_out" ftype="h5ad">
816 <assert_contents>
817 <has_h5_keys keys="obs/doublet_probability,obs/doublet_score"/>
818 </assert_contents>
819 </output>
594 </test> 820 </test>
595 <test expect_num_outputs="2"> 821 <test expect_num_outputs="2">
596 <!-- pp.filter_doublets --> 822 <!-- pp.filter_doublets -->
597 <conditional name="method"> 823 <conditional name="method">
598 <param name="method" value="pp.filter_doublets"/> 824 <param name="method" value="pp.filter_doublets"/>
599 <param name="adata" location="https://zenodo.org/records/11260316/files/pp.scrublet.pbmc_500_chr21.h5ad"/> 825 <param name="adata" location="https://zenodo.org/records/17512085/files/pp.scrublet.pbmc_500_chr21.h5ad"/>
600 <param name="probability_threshold" value="0.1"/> 826 <param name="probability_threshold" value="0.1"/>
601 </conditional> 827 </conditional>
602 <section name="advanced_common"> 828 <section name="advanced_common">
603 <param name="show_log" value="true" /> 829 <param name="show_log" value="true" />
604 </section> 830 </section>
605 <output name="hidden_output"> 831 <assert_stdout>
606 <assert_contents> 832 <has_text_matching expression="156 × 9342"/>
607 <has_text_matching expression="sa.pp.filter_doublets"/> 833 </assert_stdout>
608 </assert_contents> 834 <output name="hidden_output">
609 </output> 835 <assert_contents>
610 <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.filter_doublets.pbmc_500_chr21.h5ad"/> 836 <has_text_matching expression="snap.pp.filter_doublets"/>
837 </assert_contents>
838 </output>
839 <output name="anndata_out" ftype="h5ad">
840 <assert_contents>
841 <has_h5_keys keys="obs/doublet_probability,obs/doublet_score"/>
842 <has_h5_keys keys="uns/doublet_rate"/>
843 </assert_contents>
844 </output>
611 </test> 845 </test>
612 <test expect_num_outputs="2"> 846 <test expect_num_outputs="2">
613 <!-- pp.mnc_correct --> 847 <!-- pp.mnc_correct -->
614 <conditional name="method"> 848 <conditional name="method">
615 <param name="method" value="pp.mnc_correct"/> 849 <param name="method" value="pp.mnc_correct"/>
616 <param name="adata" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.batch.h5ad"/> 850 <param name="adata" location="https://zenodo.org/records/17512085/files/pbmc_500_chr21.batch.h5ad"/>
617 <param name="batch" value="batch"/> 851 <param name="batch" value="batch"/>
618 <param name="n_neighbors" value="3"/> 852 <param name="n_neighbors" value="3"/>
619 <param name="n_clusters" value="10"/> 853 <param name="n_clusters" value="10"/>
620 <param name="use_rep" value="X_spectral"/> 854 <param name="use_rep" value="X_spectral"/>
621 </conditional> 855 </conditional>
622 <section name="advanced_common"> 856 <section name="advanced_common">
623 <param name="show_log" value="true" /> 857 <param name="show_log" value="true" />
624 </section> 858 </section>
625 <output name="hidden_output"> 859 <output name="hidden_output">
626 <assert_contents> 860 <assert_contents>
627 <has_text_matching expression="sa.pp.mnc_correct"/> 861 <has_text_matching expression="snap.pp.mnc_correct"/>
628 <has_text_matching expression="batch = 'batch'"/> 862 <has_text_matching expression="batch = 'batch'"/>
629 <has_text_matching expression="n_neighbors = 3"/> 863 <has_text_matching expression="n_neighbors = 3"/>
630 <has_text_matching expression="n_clusters = 10"/> 864 <has_text_matching expression="n_clusters = 10"/>
631 <has_text_matching expression="batch = 'batch'"/> 865 <has_text_matching expression="batch = 'batch'"/>
632 <has_text_matching expression="use_rep = 'X_spectral'"/> 866 <has_text_matching expression="use_rep = 'X_spectral'"/>
633 </assert_contents> 867 </assert_contents>
634 </output> 868 </output>
635 <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.mnc_correct.pbmc_500_chr21.h5ad"/> 869 <output name="anndata_out" ftype="h5ad">
870 <assert_contents>
871 <has_h5_keys keys="obsm/X_spectral_mnn"/>
872 </assert_contents>
873 </output>
636 </test> 874 </test>
637 <test expect_num_outputs="2"> 875 <test expect_num_outputs="2">
638 <!-- pp.harmony --> 876 <!-- pp.harmony -->
639 <conditional name="method"> 877 <conditional name="method">
640 <param name="method" value="pp.harmony"/> 878 <param name="method" value="pp.harmony"/>
641 <param name="adata" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.batch.h5ad"/> 879 <param name="adata" location="https://zenodo.org/records/17512085/files/pbmc_500_chr21.batch.h5ad"/>
642 <param name="batch" value="batch"/> 880 <param name="batch" value="batch"/>
643 <param name="use_rep" value="X_spectral"/> 881 <param name="use_rep" value="X_spectral"/>
644 </conditional> 882 </conditional>
645 <section name="advanced_common"> 883 <section name="advanced_common">
646 <param name="show_log" value="true" /> 884 <param name="show_log" value="true" />
647 </section> 885 </section>
648 <output name="hidden_output"> 886 <output name="hidden_output">
649 <assert_contents> 887 <assert_contents>
650 <has_text_matching expression="sa.pp.harmony"/> 888 <has_text_matching expression="snap.pp.harmony"/>
651 <has_text_matching expression="batch = 'batch'"/> 889 <has_text_matching expression="batch = 'batch'"/>
652 <has_text_matching expression="use_rep = 'X_spectral'"/> 890 <has_text_matching expression="use_rep = 'X_spectral'"/>
653 </assert_contents> 891 </assert_contents>
654 </output> 892 </output>
655 <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.harmony.pbmc_500_chr21.h5ad"/> 893 <output name="anndata_out" ftype="h5ad">
894 <assert_contents>
895 <has_h5_keys keys="obsm/X_spectral_harmony"/>
896 </assert_contents>
897 </output>
656 </test> 898 </test>
657 <test expect_num_outputs="2"> 899 <test expect_num_outputs="2">
658 <!-- pp.scanorama_integrate --> 900 <!-- pp.scanorama_integrate -->
659 <conditional name="method"> 901 <conditional name="method">
660 <param name="method" value="pp.scanorama_integrate"/> 902 <param name="method" value="pp.scanorama_integrate"/>
661 <param name="adata" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.batch.h5ad"/> 903 <param name="adata" location="https://zenodo.org/records/17512085/files/pbmc_500_chr21.batch.h5ad"/>
662 <param name="batch" value="batch"/> 904 <param name="batch" value="batch"/>
663 <param name="use_rep" value="X_spectral"/> 905 <param name="use_rep" value="X_spectral"/>
664 </conditional> 906 </conditional>
665 <section name="advanced_common"> 907 <section name="advanced_common">
666 <param name="show_log" value="true" /> 908 <param name="show_log" value="true" />
667 </section> 909 </section>
668 <output name="hidden_output"> 910 <output name="hidden_output">
669 <assert_contents> 911 <assert_contents>
670 <has_text_matching expression="sa.pp.scanorama_integrate"/> 912 <has_text_matching expression="snap.pp.scanorama_integrate"/>
671 <has_text_matching expression="batch = 'batch'"/> 913 <has_text_matching expression="batch = 'batch'"/>
672 <has_text_matching expression="use_rep = 'X_spectral'"/> 914 <has_text_matching expression="use_rep = 'X_spectral'"/>
673 </assert_contents> 915 </assert_contents>
674 </output> 916 </output>
675 <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.scanorama_integrate.pbmc_500_chr21.h5ad"/> 917 <output name="anndata_out" ftype="h5ad">
676 </test> 918 <assert_contents>
677 <test expect_num_outputs="2"> 919 <has_h5_keys keys="obsm/X_spectral_scanorama"/>
678 <!-- metrics.frag_size_distr --> 920 </assert_contents>
679 <conditional name="method"> 921 </output>
680 <param name="method" value="metrics.frag_size_distr"/> 922 </test>
681 <param name="adata" location="https://zenodo.org/records/11260316/files/pp.import_data.pbmc_500_chr21.h5ad"/> 923 <!-- pp.import_contacts -->
682 <param name="max_recorded_size" value="500"/> 924 <test expect_num_outputs="2">
683 <param name="add_key" value="frag_size_distr"/> 925 <conditional name="method">
926 <param name="method" value="pp.import_contacts"/>
927 <param name="contact_file" location="https://zenodo.org/records/17512085/files/pp.make_fragment_file.pbmc_500_chr21.tsv.gz"/>
928 <param name="chrom_sizes" location="https://zenodo.org/records/17512085/files/chrom_size.tabular"/>
929 </conditional>
930 <section name="advanced_common">
931 <param name="show_log" value="true" />
932 </section>
933 <output name="hidden_output">
934 <assert_contents>
935 <has_text_matching expression="snap.pp.import_contacts"/>
936 <has_text_matching expression="chrom_sizes"/>
937 <has_text_matching expression="sorted_by_barcode = True"/>
938 <has_text_matching expression="bin_size = 500000"/>
939 <has_text_matching expression="chunk_size = 200"/>
940 </assert_contents>
941 </output>
942 <output name="anndata_out" ftype="h5ad">
943 <assert_contents>
944 <has_h5_keys keys="uns/reference_sequences"/>
945 </assert_contents>
946 </output>
947 </test>
948 <test expect_num_outputs="2">
949 <!-- ex.export_fragments -->
950 <conditional name="method">
951 <param name="method" value="ex.export_fragments"/>
952 <param name="adata" location="https://zenodo.org/records/17512085/files/tl.leiden.modularity.pbmc_500_chr21.h5ad"/>
953 <param name="groupby" value="leiden"/>
684 </conditional> 954 </conditional>
685 <section name="advanced_common"> 955 <section name="advanced_common">
686 <param name="show_log" value="true" /> 956 <param name="show_log" value="true" />
687 </section> 957 </section>
688 <output name="hidden_output"> 958 <output name="hidden_output">
689 <assert_contents> 959 <assert_contents>
690 <has_text_matching expression="sa.metrics.frag_size_distr"/> 960 <has_text_matching expression="snap.ex.export_fragments"/>
691 <has_text_matching expression="add_key = 'frag_size_distr'"/> 961 <has_text_matching expression="groupby = &apos;leiden&apos;"/>
692 </assert_contents> 962 </assert_contents>
693 </output> 963 </output>
694 <output name="anndata_out" location="https://zenodo.org/records/11260316/files/metrics.frag_size_distr.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" /> 964 <output_collection name="export_fragment" type="list" count="8">
965 <element name="-1.bed">
966 <assert_contents>
967 <has_n_lines n="1018"/>
968 </assert_contents>
969 </element>
970 <element name="0.bed">
971 <assert_contents>
972 <has_n_lines n="2973"/>
973 </assert_contents>
974 </element>
975 <element name="1.bed">
976 <assert_contents>
977 <has_n_lines n="1661"/>
978 </assert_contents>
979 </element>
980 <element name="2.bed">
981 <assert_contents>
982 <has_n_lines n="1605"/>
983 </assert_contents>
984 </element>
985 <element name="3.bed">
986 <assert_contents>
987 <has_n_lines n="1457"/>
988 </assert_contents>
989 </element>
990 <element name="4.bed">
991 <assert_contents>
992 <has_n_lines n="620"/>
993 </assert_contents>
994 </element>
995 <element name="5.bed">
996 <assert_contents>
997 <has_n_lines n="2618"/>
998 </assert_contents>
999 </element>
1000 <element name="6.bed">
1001 <assert_contents>
1002 <has_n_lines n="225"/>
1003 </assert_contents>
1004 </element>
1005 </output_collection>
1006 </test>
1007 <test expect_num_outputs="2">
1008 <!-- ex.export_coverage -->
1009 <conditional name="method">
1010 <param name="method" value="ex.export_coverage"/>
1011 <param name="adata" location="https://zenodo.org/records/17512085/files/tl.leiden.modularity.pbmc_500_chr21.h5ad"/>
1012 <param name="groupby" value="leiden"/>
1013 </conditional>
1014 <section name="advanced_common">
1015 <param name="show_log" value="true" />
1016 </section>
1017 <output name="hidden_output">
1018 <assert_contents>
1019 <has_text_matching expression="snap.ex.export_coverage"/>
1020 <has_text_matching expression="groupby = &apos;leiden&apos;"/>
1021 <has_text_matching expression="output_format = &apos;bigwig&apos;"/>
1022 </assert_contents>
1023 </output>
1024 <output_collection name="export_coverage" type="list" count="8">
1025 <element name="-1">
1026 <assert_contents>
1027 <has_n_lines n="981"/>
1028 </assert_contents>
1029 </element>
1030 <element name="0">
1031 <assert_contents>
1032 <has_n_lines n="2447"/>
1033 </assert_contents>
1034 </element>
1035 <element name="1">
1036 <assert_contents>
1037 <has_n_lines n="1571"/>
1038 </assert_contents>
1039 </element>
1040 <element name="2">
1041 <assert_contents>
1042 <has_n_lines n="1499"/>
1043 </assert_contents>
1044 </element>
1045 <element name="3">
1046 <assert_contents>
1047 <has_n_lines n="1269"/>
1048 </assert_contents>
1049 </element>
1050 <element name="4">
1051 <assert_contents>
1052 <has_n_lines n="640"/>
1053 </assert_contents>
1054 </element>
1055 <element name="5">
1056 <assert_contents>
1057 <has_n_lines n="2420"/>
1058 </assert_contents>
1059 </element>
1060 <element name="6">
1061 <assert_contents>
1062 <has_n_lines n="177"/>
1063 </assert_contents>
1064 </element>
1065 </output_collection>
695 </test> 1066 </test>
696 </tests> 1067 </tests>
697 <help><![CDATA[ 1068 <help><![CDATA[
698 Convert a BAM file`to a fragment file, using `pp.make_fragment_file` 1069 Convert a BAM file to a fragment file, using `pp.make_fragment_file`
699 ==================================================================== 1070 ====================================================================
700 1071
701 Convert a BAM file to a fragment file. 1072 Convert a BAM file to a fragment file.
702 1073
703 Convert a BAM file to a fragment file by performing the following steps: 1074 Convert a BAM file to a fragment file by performing the following steps:
709 - Output: Convert BAM records to fragments (if paired-end) or single-end reads. 1080 - Output: Convert BAM records to fragments (if paired-end) or single-end reads.
710 1081
711 The bam file needn’t be sorted or filtered. 1082 The bam file needn’t be sorted or filtered.
712 1083
713 More details on the `SnapATAC2 documentation 1084 More details on the `SnapATAC2 documentation
714 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.make_fragment_file.html>`__ 1085 <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.pp.make_fragment_file.html>`__
715 1086
716 Import data fragment file` and compute basic QC metrics, using `pp.import_data` 1087 Generate cell by bin count matrix, using `pp.import_fragments`
717 =============================================================================== 1088 ==============================================================
718 1089
719 Import data fragment files and compute basic QC metrics. 1090 Import data fragment files and compute basic QC metrics.
720 1091
721 A fragment refers to the sequence data originating from a distinct location in the genome. In single-ended sequencing, one read equates to a fragment. However, in paired-ended sequencing, a fragment is defined by a pair of reads. This function is designed to handle, store, and process input files with fragment data, further yielding a range of basic Quality Control (QC) metrics. These metrics include the total number of unique fragments, duplication rates, and the percentage of mitochondrial DNA detected. 1092 This function is used to generate and add a cell by bin count matrix to the AnnData object.
722 1093 This function accepts both single-end and paired-end reads. If the records in the fragment file contain 6 columns with the last column representing the strand of the fragment, the fragments are considered single-ended. Otherwise, the fragments are considered paired-ended.
723 How fragments are stored is dependent on the sequencing approach utilized. For single-ended sequencing, fragments are found in `.obsm['fragment_single']`. In contrast, for paired-ended sequencing, they are located in `.obsm['fragment_paired']`. 1094
724 1095 More details on the `SnapATAC2 documentation
725 More details on the `SnapATAC2 documentation 1096 <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.pp.import_fragments.html>`__
726 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.import_data.html>`__ 1097
1098 Generate cell by bin count matrix, using `pp.import_contacts`
1099 =============================================================
1100
1101 Import chromatin contacts.
1102
1103 More details on the `SnapATAC2 documentation
1104 <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.pp.import_contacts.html>`__
727 1105
728 Generate cell by bin count matrix, using `pp.add_tile_matrix` 1106 Generate cell by bin count matrix, using `pp.add_tile_matrix`
729 ============================================================= 1107 =============================================================
730 1108
731 Generate cell by bin count matrix. 1109 Generate cell by bin count matrix.
732 1110
733 This function is used to generate and add a cell by bin count matrix to the AnnData object. 1111 This function is used to generate and add a cell by bin count matrix to the AnnData object.
734 1112
735 `import_data` must be ran first in order to use this function. 1113 More details on the `SnapATAC2 documentation
736 1114 <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.pp.add_tile_matrix.html>`__
737 More details on the `SnapATAC2 documentation
738 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.add_tile_matrix.html>`__
739 1115
740 Generate cell by gene activity matrix, using `pp.make_gene_matrix` 1116 Generate cell by gene activity matrix, using `pp.make_gene_matrix`
741 ================================================================== 1117 ==================================================================
742 1118
743 Generate cell by gene activity matrix. 1119 Generate cell by gene activity matrix.
744 1120
745 Generate cell by gene activity matrix by counting the TN5 insertions in gene body regions. The result will be stored in a new file and a new AnnData object will be created. 1121 Generate cell by gene activity matrix by counting the TN5 insertions in gene body regions. The result will be stored in a new file and a new AnnData object will be created.
746 1122
747 `import_data` must be ran first in order to use this function. 1123 More details on the `SnapATAC2 documentation
748 1124 <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.pp.make_gene_matrix.html>`__
749 More details on the `SnapATAC2 documentation
750 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.make_gene_matrix.html>`__
751 1125
752 Filter cell outliers based on counts and numbers of genes expressed, using `pp.filter_cells` 1126 Filter cell outliers based on counts and numbers of genes expressed, using `pp.filter_cells`
753 ============================================================================================ 1127 ============================================================================================
754 1128
755 Filter cell outliers based on counts and numbers of genes expressed. For instance, only keep cells with at least `min_counts` counts or `min_ts`` TSS enrichment scores. This is to filter measurement outliers, i.e. “unreliable” observations. 1129 Filter cell outliers based on counts and numbers of genes expressed. For instance, only keep cells with at least `min_counts` counts or `min_ts`` TSS enrichment scores. This is to filter measurement outliers, i.e. “unreliable” observations.
756 1130
757 More details on the `SnapATAC2 documentation 1131 More details on the `SnapATAC2 documentation
758 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.filter_cells.html>`__ 1132 <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.pp.filter_cells.html>`__
759 1133
760 Perform feature selection, using `pp.select_features` 1134 Perform feature selection, using `pp.select_features`
761 ===================================================== 1135 =====================================================
762 1136
763 Perform feature selection by selecting the most accessibile features across all cells unless `max_iter` > 1 1137 Perform feature selection by selecting the most accessible features across all cells unless `max_iter` > 1
764 1138
765 More details on the `SnapATAC2 documentation 1139 More details on the `SnapATAC2 documentation
766 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.select_features.html>`__ 1140 <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.pp.select_features.html>`__
767 1141
768 Compute probability of being a doublet using the scrublet algorithm, using `pp.scrublet` 1142 Compute probability of being a doublet using the scrublet algorithm, using `pp.scrublet`
769 ======================================================================================== 1143 ========================================================================================
770 1144
771 Compute probability of being a doublet using the scrublet algorithm. 1145 Compute probability of being a doublet using the scrublet algorithm.
772 1146
773 This function identifies doublets by generating simulated doublets using randomly pairing chromatin accessibility profiles of individual cells. The simulated doublets are then embedded alongside the original cells using the spectral embedding algorithm in this package. A k-nearest-neighbor classifier is trained to distinguish between the simulated doublets and the authentic cells. This trained classifier produces a “doublet score” for each cell. The doublet scores are then converted into probabilities using a Gaussian mixture model. 1147 This function identifies doublets by generating simulated doublets using randomly pairing chromatin accessibility profiles of individual cells. The simulated doublets are then embedded alongside the original cells using the spectral embedding algorithm in this package. A k-nearest-neighbor classifier is trained to distinguish between the simulated doublets and the authentic cells. This trained classifier produces a “doublet score” for each cell. The doublet scores are then converted into probabilities using a Gaussian mixture model.
774 1148
775 More details on the `SnapATAC2 documentation 1149 More details on the `SnapATAC2 documentation
776 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.scrublet.html>`__ 1150 <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.pp.scrublet.html>`__
777 1151
778 Remove doublets according to the doublet probability or doublet score, using `pp.filter_doublets` 1152 Remove doublets according to the doublet probability or doublet score, using `pp.filter_doublets`
779 ================================================================================================= 1153 =================================================================================================
780 1154
781 Remove doublets according to the doublet probability or doublet score. 1155 Remove doublets according to the doublet probability or doublet score.
782 1156
783 The user can choose to remove doublets by either the doublet probability or the doublet score. `scrublet` must be ran first in order to use this function. 1157 The user can choose to remove doublets by either the doublet probability or the doublet score. `scrublet` must be ran first in order to use this function.
784 1158
785 More details on the `SnapATAC2 documentation 1159 More details on the `SnapATAC2 documentation
786 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.filter_doublets.html>`__ 1160 <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.pp.filter_doublets.html>`__
787 1161
788 A modified MNN-Correct algorithm based on cluster centroid, using `pp.mnc_correct` 1162 A modified MNN-Correct algorithm based on cluster centroid, using `pp.mnc_correct`
789 ================================================================================== 1163 ==================================================================================
790 1164
791 A modified MNN-Correct algorithm based on cluster centroid. 1165 A modified MNN-Correct algorithm based on cluster centroid.
792 1166
793 More details on the `SnapATAC2 documentation 1167 More details on the `SnapATAC2 documentation
794 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.mnc_correct.html>`__ 1168 <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.pp.mnc_correct.html>`__
795 1169
796 Use harmonypy to integrate different experiments,using `pp.harmony` 1170 Use harmonypy to integrate different experiments,using `pp.harmony`
797 =================================================================== 1171 ===================================================================
798 1172
799 Use harmonypy to integrate different experiments. 1173 Use harmonypy to integrate different experiments.
800 1174
801 Harmony is an algorithm for integrating single-cell data from multiple experiments. This function uses the python port of Harmony, `harmonypy`, to integrate single-cell data stored in an AnnData object. This function should be run after performing dimension reduction. 1175 Harmony is an algorithm for integrating single-cell data from multiple experiments. This function uses the python port of Harmony, `harmonypy`, to integrate single-cell data stored in an AnnData object. This function should be run after performing dimension reduction.
802 1176
803 More details on the `SnapATAC2 documentation 1177 More details on the `SnapATAC2 documentation
804 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.harmony.html>`__ 1178 <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.pp.harmony.html>`__
805 1179
806 Use Scanorama to integrate different experiments, using `pp.scanorama_integrate` 1180 Use Scanorama to integrate different experiments, using `pp.scanorama_integrate`
807 ======================================================================================== 1181 ================================================================================
808 1182
809 Use Scanorama to integrate different experiments. 1183 Use Scanorama to integrate different experiments.
810 1184
811 Scanorama is an algorithm for integrating single-cell data from multiple experiments stored in an AnnData object. This function should be run after performing `tl.spectral` but before computing the neighbor graph. 1185 Scanorama is an algorithm for integrating single-cell data from multiple experiments stored in an AnnData object. This function should be run after performing `tl.spectral` but before computing the neighbor graph.
812 1186
813 More details on the `SnapATAC2 documentation 1187 More details on the `SnapATAC2 documentation
814 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.scanorama_integrate.html>`__ 1188 <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.pp.scanorama_integrate.html>`__
815 1189
816 Compute the fragment size distribution of the dataset, using `metrics.frag_size_distr` 1190 Export fragments for each group of cells, using `ex.export_fragments`
817 ====================================================================================== 1191 =====================================================================
818 1192
819 Compute the fragment size distribution of the dataset. 1193 Export and save fragments for a group of cells in a BED format file.
820 1194
821 This function computes the fragment size distribution of the dataset. Note that it does not operate at the single-cell level. The result is stored in a vector where each element represents the number of fragments and the index represents the fragment length. The first posision of the vector is reserved for fragments with size larger than the `max_recorded_size` parameter. `import_data` must be ran first in order to use this function. 1195 More details on the `SnapATAC2 documentation
822 1196 <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.ex.export_fragments.html>`__
823 More details on the `SnapATAC2 documentation 1197
824 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.metrics.frag_size_distr.html>`__ 1198 Export fragments for each group of cells, using `ex.export_coverage`
825 1199 =====================================================================
826 Compute the TSS enrichment score (TSSe) for each cell, using `metrics.tsse` 1200
827 =========================================================================== 1201 Export and save coverage for a group of cells in a bedgraph or bigwig format file.
828 1202
829 Compute the TSS enrichment score (TSSe) for each cell. 1203 More details on the `SnapATAC2 documentation
830 1204 <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.ex.export_coverage.html>`__
831 `import_data` must be ran first in order to use this function.
832
833 More details on the `SnapATAC2 documentation
834 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.metrics.tsse.html>`__
835 1205
836 ]]></help> 1206 ]]></help>
837 <expand macro="citations"/> 1207 <expand macro="citations"/>
838 </tool> 1208 </tool>