Mercurial > repos > iuc > snapatac2_preprocessing

--- a/macros.xml	Thu May 16 13:17:14 2024 +0000
+++ b/macros.xml	Thu May 23 15:20:02 2024 +0000
@@ -1,6 +1,6 @@
 <macros>
-	<token name="@TOOL_VERSION@">2.5.3</token>
-    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@TOOL_VERSION@">2.5.3</token>
+    <token name="@VERSION_SUFFIX@">1</token>
     <token name="@PROFILE@">23.0</token>
     <xml name="requirements">
         <requirement type="package" version="@TOOL_VERSION@">snapatac2</requirement>
@@ -12,6 +12,9 @@
         <requirement type="package" version="0.8.33">hdbscan</requirement>
         <requirement type="package" version="0.0.9">harmonypy</requirement>
         <requirement type="package" version="1.7.4">scanorama</requirement>
+        <requirement type="package" version="3.0.1">macs3</requirement>
+        <requirement type="package" version="0.70.16">multiprocess</requirement>
+        <requirement type="package" version="0.10.2">leidenalg</requirement>
         <yield />
     </xml>

@@ -23,8 +26,8 @@
     <token name="@CMD@"><![CDATA[
         cat '$script_file' > '$hidden_output' &&
         python '$script_file' >> '$hidden_output' &&
-		touch 'anndata_info.txt' &&
-		cat 'anndata_info.txt' @CMD_prettify_stdout@
+        touch 'anndata_info.txt' &&
+        cat 'anndata_info.txt' @CMD_prettify_stdout@
         ]]>
     </token>

@@ -56,7 +59,7 @@

     <xml name="dimentions_plot">
         <param argument="width" type="integer" value="500" label="Width of the plot"/>
-		<param argument="height" type="integer" value="400" label="Height of the plot"/>
+        <param argument="height" type="integer" value="400" label="Height of the plot"/>
     </xml>

     <xml name="param_groupby">
@@ -66,11 +69,11 @@
     </xml>

     <xml name="out_file">
-        <param name="out_file" type="select" optional="true" label="Type of output file">
+        <param name="out_file" type="select" optional="true" label="Type of output plot">
             <option value="png" selected="true">PNG</option>
             <option value="svg">SVG</option>
             <option value="pdf">PDF</option>
-		</param>
+        </param>
     </xml>
     <token name="@CMD_anndata_write_outputs@"><![CDATA[
 adata.write('anndata.h5ad')
@@ -89,28 +92,28 @@
         <expand macro="out_file"/>
     </xml>
     <xml name="param_shift">
-    	<param argument="shift_left" type="integer" value="4" label="Insertion site correction for the left end" help="Note this has no effect on single-end reads"/>
-    	<param argument="shift_right" type="integer" value="-5" label="Insertion site correction for the right end" help="Note this has no effect on single-end reads"/>
+        <param argument="shift_left" type="integer" value="4" label="Insertion site correction for the left end" help="Note this has no effect on single-end reads"/>
+        <param argument="shift_right" type="integer" value="-5" label="Insertion site correction for the right end" help="Note this has no effect on single-end reads"/>
     </xml>
     <xml name="param_chunk_size" tokens="size">
-    	<param argument="chunk_size" type="integer" value="@SIZE@" label="chunk size"/>
+        <param argument="chunk_size" type="integer" value="@SIZE@" label="chunk size"/>
+    </xml>
+    <xml name="min_max_frag_size">
+        <param argument="min_frag_size" type="integer" optional="true" value="" label="Minimum fragment size to include"/>
+        <param argument="max_frag_size" type="integer" optional="true" value="" label="Maximum fragment size to include"/>
     </xml>
-	<xml name="min_max_frag_size">
-		<param argument="min_frag_size" type="integer" optional="true" value="" label="Minimum fragment size to include"/>
-		<param argument="max_frag_size" type="integer" optional="true" value="" label="Maximum fragment size to include"/>
-	</xml>
-	<xml name="params_data_integration">
-		<param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/>
-		<param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation">
-			<expand macro="sanitize_query"/>
-		</param>
-		<param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider">
+    <xml name="params_data_integration">
+        <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/>
+        <param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation">
+            <expand macro="sanitize_query"/>
+        </param>
+        <param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider">
             <expand macro="sanitize_query" />
         </param>
-		<param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/>
-	</xml>
+        <param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/>
+    </xml>
     <xml name="param_n_comps">
-s        <param argument="n_comps" type="integer" value="30" label="Number of dimensions to keep" help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30."/>
+        <param argument="n_comps" type="integer" value="30" label="Number of dimensions to keep" help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30."/>
     </xml>
     <xml name="param_random_state">
         <param argument="random_state" type="integer" value="0" label="Seed of the random state generator"/>
@@ -126,12 +129,12 @@
     </xml>
     <xml name="background">
         <param argument="background" type="text" optional="true" value="" label="A list of regions to be used as the background">
-			<expand macro="sanitize_query"/>
-		</param>
+            <expand macro="sanitize_query"/>
+        </param>
     </xml>
     <xml name="mat">
         <param argument="peak_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by peak count matrix"/>
-		<param argument="gene_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by gene count matrix"/>
+        <param argument="gene_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by gene count matrix"/>
     </xml>
     <xml name="param_network">
         <param argument="network" type="text" label="network"/>
@@ -147,11 +150,11 @@
         </citations>
     </xml>
     <xml name="render_plot_test">
-    	<param name="width" value="650"/>
+        <param name="width" value="650"/>
         <param name="height" value="450"/>
     </xml>
     <xml name="render_plot_matching_text">
-    	<has_text_matching expression="width = 650"/>
+        <has_text_matching expression="width = 650"/>
         <has_text_matching expression="height = 450"/>
     </xml>
     <xml name="param_counting_strategy">
--- a/preprocessing.xml	Thu May 16 13:17:14 2024 +0000
+++ b/preprocessing.xml	Thu May 23 15:20:02 2024 +0000
@@ -22,30 +22,30 @@

 #if $method.method == 'pp.make_fragment_file'
 sa.pp.make_fragment_file(
-	bam_file = '$method.bam_file',
-	is_paired = $method.is_paired,
-	#if $method.barcode.extract_type == 'from_tag'
-		#if $method.barcode.barcode_tag != ''
-	barcode_tag = '$method.barcode.barcode_tag',
-		#end if
-	#elif $method.barcode.extract_type == 'from_read_names'
-		#if $method.barcode.barcode_regex != ''
-	barcode_regex = '$method.barcode.barcode_regex',
-		#end if
-	#end if
-	#if $method.umi_tag != ''
-	umi_tag = '$method.umi_tag',
-	#end if
-	#if $method.umi_regex != ''
-	umi_regex = '$method.umi_regex',
-	#end if
-	shift_right = $method.shift_right,
-	shift_left = $method.shift_left,
-	min_mapq = $method.min_mapq,
-	chunk_size = $method.chunk_size,
-	compression = 'gzip',
-	output_file = '$fragments_out',
-	tempdir = "."
+    bam_file = '$method.bam_file',
+    is_paired = $method.is_paired,
+    #if $method.barcode.extract_type == 'from_tag'
+        #if $method.barcode.barcode_tag != ''
+    barcode_tag = '$method.barcode.barcode_tag',
+        #end if
+    #elif $method.barcode.extract_type == 'from_read_names'
+        #if $method.barcode.barcode_regex != ''
+    barcode_regex = '$method.barcode.barcode_regex',
+        #end if
+    #end if
+    #if $method.umi_tag != ''
+    umi_tag = '$method.umi_tag',
+    #end if
+    #if $method.umi_regex != ''
+    umi_regex = '$method.umi_regex',
+    #end if
+    shift_right = $method.shift_right,
+    shift_left = $method.shift_left,
+    min_mapq = $method.min_mapq,
+    chunk_size = $method.chunk_size,
+    compression = 'gzip',
+    output_file = '$fragments_out',
+    tempdir = "."
 )

 #else if $method.method == 'pp.import_data'
@@ -54,179 +54,177 @@
     chr_sizes = {x[0]:int(x[1]) for x in csv.reader(f, delimiter='\t')}

 sa.pp.import_data(
-	fragment_file = '$method.fragment_file',
-	chrom_sizes = chr_sizes,
-	min_num_fragments = $method.min_num_fragments,
-	sorted_by_barcode = $method.sorted_by_barcode,
-	#if str($method.whitelist) != 'None'
-	whitelist = '$method.whitelist',
-	#end if
-	shift_left = $method.shift_left,
-	shift_right = $method.shift_right,
-	#set $chr_mt = ([x.strip() for x in str($method.chrM).split(',')])
-	chrM = $chr_mt,
-	chunk_size = $method.chunk_size,
-	file = 'anndata.h5ad',
-	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+    fragment_file = '$method.fragment_file',
+    chrom_sizes = chr_sizes,
+    min_num_fragments = $method.min_num_fragments,
+    sorted_by_barcode = $method.sorted_by_barcode,
+    #if str($method.whitelist) != 'None'
+    whitelist = '$method.whitelist',
+    #end if
+    shift_left = $method.shift_left,
+    shift_right = $method.shift_right,
+    #set $chr_mt = ([x.strip() for x in str($method.chrM).split(',')])
+    chrM = $chr_mt,
+    chunk_size = $method.chunk_size,
+    file = 'anndata.h5ad',
+    n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
 )

 #else if $method.method == 'pp.add_tile_matrix'
 sa.pp.add_tile_matrix(
-	adata,
-	bin_size = $method.bin_size,
-	inplace = True,
-	chunk_size = $method.chunk_size,
-	#if $method.exclude_chroms != ''
-	#set $excl_chroms = ([x.strip() for x in str($method.exclude_chroms).split(',')])
-	exclude_chroms = $excl_chroms,
-	#end if
-	#if $method.min_frag_size
-	min_frag_size = $method.min_frag_size,
-	#end if
-	#if $method.max_frag_size
-	max_frag_size = $method.max_frag_size,
-	#end if
-	##counting_strategy = '$method.counting_strategy',
-	count_frag_as_reads = $method.count_frag_as_reads,
-	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+    adata,
+    bin_size = $method.bin_size,
+    chunk_size = $method.chunk_size,
+    #if $method.exclude_chroms != ''
+    #set $excl_chroms = ([x.strip() for x in str($method.exclude_chroms).split(',')])
+    exclude_chroms = $excl_chroms,
+    #end if
+    #if $method.min_frag_size
+    min_frag_size = $method.min_frag_size,
+    #end if
+    #if $method.max_frag_size
+    max_frag_size = $method.max_frag_size,
+    #end if
+    ##counting_strategy = '$method.counting_strategy',
+    count_frag_as_reads = $method.count_frag_as_reads,
+    n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
 )

 #else if $method.method == 'pp.make_gene_matrix'
 sa.pp.make_gene_matrix(
-	adata,
-	gene_anno = '$method.gene_anno',
-	inplace = True,
-	chunk_size = $method.chunk_size,
-	use_x = $method.use_x,
-	id_type = '$method.id_type',
-	transcript_name_key = '$method.transcript_name_key',
-	transcript_id_key = '$method.transcript_id_key',
-	gene_name_key = '$method.gene_name_key',
-	gene_id_key = '$method.gene_id_key',
-	#if $method.min_frag_size
-	min_frag_size = $method.min_frag_size,
-	#end if
-	#if $method.max_frag_size
-	max_frag_size = $method.max_frag_size,
-	#end if
-	##counting_strategy = '$method.counting_strategy'
-	count_frag_as_reads = $method.count_frag_as_reads
+    adata,
+    gene_anno = '$method.gene_anno',
+    chunk_size = $method.chunk_size,
+    use_x = $method.use_x,
+    id_type = '$method.id_type',
+    transcript_name_key = '$method.transcript_name_key',
+    transcript_id_key = '$method.transcript_id_key',
+    gene_name_key = '$method.gene_name_key',
+    gene_id_key = '$method.gene_id_key',
+    #if $method.min_frag_size
+    min_frag_size = $method.min_frag_size,
+    #end if
+    #if $method.max_frag_size
+    max_frag_size = $method.max_frag_size,
+    #end if
+    ##counting_strategy = '$method.counting_strategy'
+    count_frag_as_reads = $method.count_frag_as_reads
 )

 #else if $method.method == 'pp.filter_cells'
 sa.pp.filter_cells(
-	adata,
-	min_counts = $method.min_counts,
-	min_tsse = $method.min_tsse,
-	#if $method.max_counts
-	max_counts = $method.max_counts,
-	#end if
-	#if $method.max_tsse
-	max_tsse = $method.max_tsse,
-	#end if
-	inplace = True,
-	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+    adata,
+    min_counts = $method.min_counts,
+    min_tsse = $method.min_tsse,
+    #if $method.max_counts
+    max_counts = $method.max_counts,
+    #end if
+    #if $method.max_tsse
+    max_tsse = $method.max_tsse,
+    #end if
+    inplace = True,
+    n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
 )

 #else if $method.method == 'pp.select_features'
 sa.pp.select_features(
-	adata,
-	n_features = $method.n_features,
-	filter_lower_quantile = $method.filter_lower_quantile,
-	filter_upper_quantile = $method.filter_upper_quantile,
-	#if str($method.whitelist) != 'None'
-	whitelist = '$method.whitelist',
-	#end if
-	#if str($method.blacklist) != 'None'
-	blacklist = '$method.blacklist',
-	#end if
-	max_iter = $method.max_iter,
-	inplace = True,
-	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+    adata,
+    n_features = $method.n_features,
+    filter_lower_quantile = $method.filter_lower_quantile,
+    filter_upper_quantile = $method.filter_upper_quantile,
+    #if str($method.whitelist) != 'None'
+    whitelist = '$method.whitelist',
+    #end if
+    #if str($method.blacklist) != 'None'
+    blacklist = '$method.blacklist',
+    #end if
+    max_iter = $method.max_iter,
+    inplace = True,
+    n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
 )

 #else if $method.method == 'pp.scrublet'
 sa.pp.scrublet(
-	adata,
-	#if $method.features
-	features = '$method.features',
-	#end if
-	n_comps = $method.n_comps,
-	sim_doublet_ratio = $method.sim_doublet_ratio,
-	expected_doublet_rate = $method.expected_doublet_rate,
-	#if $method.n_neighbors
-	n_neighbors = $method.n_neighbors,
-	#end if
-	use_approx_neighbors = $method.use_approx_neighbors,
-	random_state = $method.random_state,
-	inplace = True,
-	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+    adata,
+    #if $method.features
+    features = '$method.features',
+    #end if
+    n_comps = $method.n_comps,
+    sim_doublet_ratio = $method.sim_doublet_ratio,
+    expected_doublet_rate = $method.expected_doublet_rate,
+    #if $method.n_neighbors
+    n_neighbors = $method.n_neighbors,
+    #end if
+    use_approx_neighbors = $method.use_approx_neighbors,
+    random_state = $method.random_state,
+    inplace = True,
+    n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
 )

 #else if $method.method == 'pp.filter_doublets'
 sa.pp.filter_doublets(
-	adata,
-	#if $method.probability_threshold
-	probability_threshold = $method.probability_threshold,
-	#end if
-	#if $method.score_threshold
-	score_threshold = $method.score_threshold,
-	#end if
-	inplace = True,
-	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+    adata,
+    #if $method.probability_threshold
+    probability_threshold = $method.probability_threshold,
+    #end if
+    #if $method.score_threshold
+    score_threshold = $method.score_threshold,
+    #end if
+    inplace = True,
+    n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
 )

 #else if $method.method == 'pp.mnc_correct'
 sa.pp.mnc_correct(
-	adata,
-	batch = '$method.batch',
-	n_neighbors = $method.n_neighbors,
-	n_clusters = $method.n_clusters,
-	n_iter = $method.n_iter,
-	@CMD_params_data_integration@
-	inplace = True,
-	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+    adata,
+    batch = '$method.batch',
+    n_neighbors = $method.n_neighbors,
+    n_clusters = $method.n_clusters,
+    n_iter = $method.n_iter,
+    @CMD_params_data_integration@
+    inplace = True,
+    n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
 )

 #else if $method.method == 'pp.harmony'
 sa.pp.harmony(
-	adata,
-	batch = '$method.batch',
-	@CMD_params_data_integration@
-	inplace = True
+    adata,
+    batch = '$method.batch',
+    @CMD_params_data_integration@
+    inplace = True
 )

 #else if $method.method == 'pp.scanorama_integrate'
 sa.pp.scanorama_integrate(
-	adata,
-	batch = '$method.batch',
-	n_neighbors = $method.n_neighbors,
-	@CMD_params_data_integration@
-	inplace = True
+    adata,
+    batch = '$method.batch',
+    n_neighbors = $method.n_neighbors,
+    @CMD_params_data_integration@
+    inplace = True
 )

 #else if $method.method == 'metrics.frag_size_distr'
 sa.metrics.frag_size_distr(
-	adata,
-	max_recorded_size = $method.max_recorded_size,
-	add_key = '$method.add_key',
-	inplace = True,
-	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+    adata,
+    max_recorded_size = $method.max_recorded_size,
+    add_key = '$method.add_key',
+    inplace = True,
+    n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
 )

 #else if $method.method == 'metrics.tsse'
 sa.metrics.tsse(
-	adata,
-	gene_anno = '$method.gene_anno',
-	inplace = True,
-	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+    adata,
+    gene_anno = '$method.gene_anno',
+    inplace = True,
+    n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
 )
 #end if

 #if $method.method != 'pp.make_fragment_file' and $method.method != 'pp.import_data'
 @CMD_anndata_write_outputs@
 #end if
-	]]></configfile>
+    ]]></configfile>
     </configfiles>
     <inputs>
         <conditional name="method">
@@ -257,7 +255,7 @@
                         <param argument="barcode_tag" type="text" value="CB" optional="true" label="Extract barcodes from TAG fields of BAM records"/>
                     </when>
                     <when value="from_read_names">
-                        <param argument="barcode_regex" type="text" value="" optional="true" label="Extract barcodes from read names of BAM records using regular expressions" help="`(..:..:..:..):w+$` extracts `bd:69:Y6:10` from `A01535:24:HW2MMDSX2:2:1359:8513:3458:bd:69:Y6:10:TGATAGGTT``"/>
+                        <param argument="barcode_regex" type="text" value="" optional="true" label="Extract barcodes from read names of BAM records using regular expressions" help="`(..:..:..:..):\w+$` extracts `bd:69:Y6:10` from `A01535:24:HW2MMDSX2:2:1359:8513:3458:bd:69:Y6:10:TGATAGGTT``"/>
                     </when>
                 </conditional>
                 <param argument="umi_tag" type="text" value="" optional="true" label="Extract UMI from TAG fields of BAM records"/>
@@ -393,7 +391,7 @@
             <!-- pp.make_fragment_file -->
             <conditional name="method">
                 <param name="method" value="pp.make_fragment_file"/>
-                <param name="bam_file" location="https://zenodo.org/records/11199963/files/pbmc_500_chr21_subsample.bam"/>
+                <param name="bam_file" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21_subsample.bam"/>
                 <param name="is_paired" value="true"/>
                 <conditional name="barcode">
                     <param name="extract_type" value="from_tag"/>
@@ -404,14 +402,14 @@
                 <param name="min_mapq" value="10"/>
                 <param name="chunk_size" value="50000000"/>
             </conditional>
-            <output name="fragments_out" location="https://zenodo.org/records/11199963/files/pp.make_fragment_file.pbmc_500_chr21.tsv.gz" ftype="interval" compare="sim_size" delta_frac="0.1"/>
+            <output name="fragments_out" location="https://zenodo.org/records/11260316/files/pp.make_fragment_file.pbmc_500_chr21.tsv.gz" ftype="interval" compare="sim_size" delta_frac="0.1"/>
         </test>
         <test expect_num_outputs="2">
             <!-- pp.pp.import_data -->
             <conditional name="method">
                 <param name="method" value="pp.import_data"/>
-                <param name="fragment_file" location="https://zenodo.org/records/11199963/files/pbmc_500_chr21.tsv.gz"/>
-                <param name="chrom_sizes" location="https://zenodo.org/records/11199963/files/chr21_size.tabular"/>
+                <param name="fragment_file" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.tsv.gz"/>
+                <param name="chrom_sizes" location="https://zenodo.org/records/11260316/files/chr21_size.tabular"/>
                 <param name="min_num_fragments" value="1"/>
                 <param name="sorted_by_barcode" value="False"/>
                 <param name="shift_left" value="0"/>
@@ -433,11 +431,250 @@
                     <has_text_matching expression="chunk_size = 1000"/>
                 </assert_contents>
             </output>
-            <output name="anndata_out" location="https://zenodo.org/records/11199963/files/pp.import_data.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1"/>
+            <output name="anndata_out" location="https://zenodo.org/records/11260316/files/pp.import_data.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- pp.make_gene_matrix -->
+            <conditional name="method">
+                <param name="method" value="pp.make_gene_matrix"/>
+                <param name="adata" location="https://zenodo.org/records/11260316/files/tl.leiden.modularity.pbmc_500_chr21.h5ad"/>
+                <param name="gene_anno" location="https://zenodo.org/records/11260316/files/chr21.gff3.gz"/>
+                <param name="chunk_size" value="500"/>
+                <param name="use_x" value="False"/>
+                <param name="id_type" value="gene"/>
+                <param name="transcript_name_key" value="transcript_name"/>
+                <param name="transcript_id_key" value="transcript_id"/>
+                <param name="gene_name_key" value="gene_name"/>
+                <param name="gene_id_key" value="gene_id"/>
+                <param name="count_frag_as_reads" value="True"/>
+                </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pp.make_gene_matrix"/>
+                    <has_text_matching expression="chunk_size = 500"/>
+                    <has_text_matching expression="use_x = False"/>
+                    <has_text_matching expression="id_type = 'gene'"/>
+                    <has_text_matching expression="transcript_name_key = 'transcript_name'"/>
+                    <has_text_matching expression="transcript_id_key = 'transcript_id'"/>
+                    <has_text_matching expression="gene_name_key = 'gene_name'"/>
+                    <has_text_matching expression="gene_id_key = 'gene_id'"/>
+                    <has_text_matching expression="count_frag_as_reads = True"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" location="https://zenodo.org/records/11260316/files/pp.make_gene_matrix.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" />
+        </test>
+        <test expect_num_outputs="2">
+            <!-- metrics.tsse -->
+            <conditional name="method">
+                <param name="method" value="metrics.tsse"/>
+                <param name="adata" location="https://zenodo.org/records/11260316/files/pp.import_data.pbmc_500_chr21.h5ad"/>
+                <param name="gene_anno" location="https://zenodo.org/records/11260316/files/chr21.gff3.gz"/>
+                </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.metrics.tsse"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" location="https://zenodo.org/records/11260316/files/metrics.tsse.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" />
+        </test>
+        <test expect_num_outputs="2">
+            <!-- pp.filter_cells -->
+            <conditional name="method">
+                <param name="method" value="pp.filter_cells"/>
+                <param name="adata" location="https://zenodo.org/records/11260316/files/metrics.tsse.pbmc_500_chr21.h5ad"/>
+                <param name="min_counts" value="200"/>
+                <param name="min_tsse" value="5"/>
+                <param name="max_counts" value="10000"/>
+              </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pp.filter_cells"/>
+                    <has_text_matching expression="min_counts = 200"/>
+                    <has_text_matching expression="min_tsse = 5"/>
+                    <has_text_matching expression="max_counts = 10000"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" location="https://zenodo.org/records/11260316/files/pp.filter_cells.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" />
+        </test>
+        <test expect_num_outputs="2">
+            <!-- pp.add_tile_matrix -->
+            <conditional name="method">
+                <param name="method" value="pp.add_tile_matrix"/>
+                <param name="adata" location="https://zenodo.org/records/11260316/files/pp.filter_cells.pbmc_500_chr21.h5ad"/>
+                <param name="bin_size" value="5000"/>
+                <param name="chunk_size" value="500"/>
+                <param name="exclude_chroms" value="chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr20, chr22, chrX, chrY"/>
+                <param name="count_frag_as_reads" value="True"/>
+                </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pp.add_tile_matrix"/>
+                    <has_text_matching expression="bin_size = 5000"/>
+                    <has_text_matching expression="chunk_size = 500"/>
+                    <has_text_matching expression="exclude_chroms = \['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chr20', 'chr22', 'chrX', 'chrY'\]"/>
+                    <has_text_matching expression="count_frag_as_reads = True"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out"  ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.add_tile_matrix.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- pp.select_features -->
+            <conditional name="method">
+                <param name="method" value="pp.select_features"/>
+                <param name="adata" location="https://zenodo.org/records/11260316/files/pp.add_tile_matrix.pbmc_500_chr21.h5ad"/>
+                <param name="n_features" value="15000"/>
+              </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pp.select_features"/>
+                    <has_text_matching expression="n_features = 15000"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.select_features.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+        <!-- pp.scrublet -->
+            <conditional name="method">
+                <param name="method" value="pp.scrublet"/>
+                <param name="adata" location="https://zenodo.org/records/11260316/files/pp.select_features.pbmc_500_chr21.h5ad"/>
+                <param name="n_comps" value="15"/>
+                <param name="sim_doublet_ratio" value="2.0"/>
+                <param name="expected_doublet_rate" value="0.1"/>
+                <param name="random_state" value="0"/>
+              </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pp.scrublet"/>
+                    <has_text_matching expression="n_comps = 15"/>
+                    <has_text_matching expression="sim_doublet_ratio = 2.0"/>
+                    <has_text_matching expression="expected_doublet_rate = 0.1"/>
+                    <has_text_matching expression="random_state = 0"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.scrublet.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+        <!-- pp.filter_doublets -->
+            <conditional name="method">
+                <param name="method" value="pp.filter_doublets"/>
+                <param name="adata" location="https://zenodo.org/records/11260316/files/pp.scrublet.pbmc_500_chr21.h5ad"/>
+                <param name="probability_threshold"  value="0.1"/>
+              </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pp.filter_doublets"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.filter_doublets.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- pp.mnc_correct -->
+            <conditional name="method">
+                <param name="method" value="pp.mnc_correct"/>
+                <param name="adata" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.batch.h5ad"/>
+                <param name="batch" value="batch"/>
+                <param name="n_neighbors" value="3"/>
+                <param name="n_clusters" value="10"/>
+                <param name="use_rep" value="X_spectral"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pp.mnc_correct"/>
+                    <has_text_matching expression="batch = 'batch'"/>
+                    <has_text_matching expression="n_neighbors = 3"/>
+                    <has_text_matching expression="n_clusters = 10"/>
+                    <has_text_matching expression="batch = 'batch'"/>
+                    <has_text_matching expression="use_rep = 'X_spectral'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.mnc_correct.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- pp.harmony -->
+            <conditional name="method">
+                <param name="method" value="pp.harmony"/>
+                <param name="adata" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.batch.h5ad"/>
+                <param name="batch" value="batch"/>
+                <param name="use_rep" value="X_spectral"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pp.harmony"/>
+                    <has_text_matching expression="batch = 'batch'"/>
+                    <has_text_matching expression="use_rep = 'X_spectral'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.harmony.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- pp.scanorama_integrate -->
+            <conditional name="method">
+                <param name="method" value="pp.scanorama_integrate"/>
+                <param name="adata" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.batch.h5ad"/>
+                <param name="batch" value="batch"/>
+                <param name="use_rep" value="X_spectral"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pp.scanorama_integrate"/>
+                    <has_text_matching expression="batch = 'batch'"/>
+                    <has_text_matching expression="use_rep = 'X_spectral'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.scanorama_integrate.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+        <!-- metrics.frag_size_distr -->
+            <conditional name="method">
+                <param name="method" value="metrics.frag_size_distr"/>
+                <param name="adata" location="https://zenodo.org/records/11260316/files/pp.import_data.pbmc_500_chr21.h5ad"/>
+                <param name="max_recorded_size" value="500"/>
+                <param name="add_key" value="frag_size_distr"/>
+                </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.metrics.frag_size_distr"/>
+                    <has_text_matching expression="add_key = 'frag_size_distr'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" location="https://zenodo.org/records/11260316/files/metrics.frag_size_distr.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" />
         </test>
     </tests>
     <help><![CDATA[
-Convert a BAM file`to a fragment file, `using pp.make_fragment_file`
+Convert a BAM file`to a fragment file, using `pp.make_fragment_file`
 ====================================================================

 Convert a BAM file to a fragment file.