Mercurial > repos > iuc > snapatac2_preprocessing
changeset 1:cec3e76eaf05 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snapatac2 commit 1e34deee1e39c0c65e1e29a9d28becc7aaf23a4f
author | iuc |
---|---|
date | Thu, 23 May 2024 15:20:02 +0000 |
parents | 00a6721e1f81 |
children | 48d9421bf176 |
files | macros.xml preprocessing.xml |
diffstat | 2 files changed, 422 insertions(+), 182 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Thu May 16 13:17:14 2024 +0000 +++ b/macros.xml Thu May 23 15:20:02 2024 +0000 @@ -1,6 +1,6 @@ <macros> - <token name="@TOOL_VERSION@">2.5.3</token> - <token name="@VERSION_SUFFIX@">0</token> + <token name="@TOOL_VERSION@">2.5.3</token> + <token name="@VERSION_SUFFIX@">1</token> <token name="@PROFILE@">23.0</token> <xml name="requirements"> <requirement type="package" version="@TOOL_VERSION@">snapatac2</requirement> @@ -12,6 +12,9 @@ <requirement type="package" version="0.8.33">hdbscan</requirement> <requirement type="package" version="0.0.9">harmonypy</requirement> <requirement type="package" version="1.7.4">scanorama</requirement> + <requirement type="package" version="3.0.1">macs3</requirement> + <requirement type="package" version="0.70.16">multiprocess</requirement> + <requirement type="package" version="0.10.2">leidenalg</requirement> <yield /> </xml> @@ -23,8 +26,8 @@ <token name="@CMD@"><![CDATA[ cat '$script_file' > '$hidden_output' && python '$script_file' >> '$hidden_output' && - touch 'anndata_info.txt' && - cat 'anndata_info.txt' @CMD_prettify_stdout@ + touch 'anndata_info.txt' && + cat 'anndata_info.txt' @CMD_prettify_stdout@ ]]> </token> @@ -56,7 +59,7 @@ <xml name="dimentions_plot"> <param argument="width" type="integer" value="500" label="Width of the plot"/> - <param argument="height" type="integer" value="400" label="Height of the plot"/> + <param argument="height" type="integer" value="400" label="Height of the plot"/> </xml> <xml name="param_groupby"> @@ -66,11 +69,11 @@ </xml> <xml name="out_file"> - <param name="out_file" type="select" optional="true" label="Type of output file"> + <param name="out_file" type="select" optional="true" label="Type of output plot"> <option value="png" selected="true">PNG</option> <option value="svg">SVG</option> <option value="pdf">PDF</option> - </param> + </param> </xml> <token name="@CMD_anndata_write_outputs@"><![CDATA[ adata.write('anndata.h5ad') @@ -89,28 +92,28 @@ <expand macro="out_file"/> </xml> <xml name="param_shift"> - <param argument="shift_left" type="integer" value="4" label="Insertion site correction for the left end" help="Note this has no effect on single-end reads"/> - <param argument="shift_right" type="integer" value="-5" label="Insertion site correction for the right end" help="Note this has no effect on single-end reads"/> + <param argument="shift_left" type="integer" value="4" label="Insertion site correction for the left end" help="Note this has no effect on single-end reads"/> + <param argument="shift_right" type="integer" value="-5" label="Insertion site correction for the right end" help="Note this has no effect on single-end reads"/> </xml> <xml name="param_chunk_size" tokens="size"> - <param argument="chunk_size" type="integer" value="@SIZE@" label="chunk size"/> + <param argument="chunk_size" type="integer" value="@SIZE@" label="chunk size"/> + </xml> + <xml name="min_max_frag_size"> + <param argument="min_frag_size" type="integer" optional="true" value="" label="Minimum fragment size to include"/> + <param argument="max_frag_size" type="integer" optional="true" value="" label="Maximum fragment size to include"/> </xml> - <xml name="min_max_frag_size"> - <param argument="min_frag_size" type="integer" optional="true" value="" label="Minimum fragment size to include"/> - <param argument="max_frag_size" type="integer" optional="true" value="" label="Maximum fragment size to include"/> - </xml> - <xml name="params_data_integration"> - <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/> - <param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation"> - <expand macro="sanitize_query"/> - </param> - <param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider"> + <xml name="params_data_integration"> + <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/> + <param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation"> + <expand macro="sanitize_query"/> + </param> + <param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider"> <expand macro="sanitize_query" /> </param> - <param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/> - </xml> + <param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/> + </xml> <xml name="param_n_comps"> -s <param argument="n_comps" type="integer" value="30" label="Number of dimensions to keep" help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30."/> + <param argument="n_comps" type="integer" value="30" label="Number of dimensions to keep" help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30."/> </xml> <xml name="param_random_state"> <param argument="random_state" type="integer" value="0" label="Seed of the random state generator"/> @@ -126,12 +129,12 @@ </xml> <xml name="background"> <param argument="background" type="text" optional="true" value="" label="A list of regions to be used as the background"> - <expand macro="sanitize_query"/> - </param> + <expand macro="sanitize_query"/> + </param> </xml> <xml name="mat"> <param argument="peak_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by peak count matrix"/> - <param argument="gene_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by gene count matrix"/> + <param argument="gene_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by gene count matrix"/> </xml> <xml name="param_network"> <param argument="network" type="text" label="network"/> @@ -147,11 +150,11 @@ </citations> </xml> <xml name="render_plot_test"> - <param name="width" value="650"/> + <param name="width" value="650"/> <param name="height" value="450"/> </xml> <xml name="render_plot_matching_text"> - <has_text_matching expression="width = 650"/> + <has_text_matching expression="width = 650"/> <has_text_matching expression="height = 450"/> </xml> <xml name="param_counting_strategy">
--- a/preprocessing.xml Thu May 16 13:17:14 2024 +0000 +++ b/preprocessing.xml Thu May 23 15:20:02 2024 +0000 @@ -22,30 +22,30 @@ #if $method.method == 'pp.make_fragment_file' sa.pp.make_fragment_file( - bam_file = '$method.bam_file', - is_paired = $method.is_paired, - #if $method.barcode.extract_type == 'from_tag' - #if $method.barcode.barcode_tag != '' - barcode_tag = '$method.barcode.barcode_tag', - #end if - #elif $method.barcode.extract_type == 'from_read_names' - #if $method.barcode.barcode_regex != '' - barcode_regex = '$method.barcode.barcode_regex', - #end if - #end if - #if $method.umi_tag != '' - umi_tag = '$method.umi_tag', - #end if - #if $method.umi_regex != '' - umi_regex = '$method.umi_regex', - #end if - shift_right = $method.shift_right, - shift_left = $method.shift_left, - min_mapq = $method.min_mapq, - chunk_size = $method.chunk_size, - compression = 'gzip', - output_file = '$fragments_out', - tempdir = "." + bam_file = '$method.bam_file', + is_paired = $method.is_paired, + #if $method.barcode.extract_type == 'from_tag' + #if $method.barcode.barcode_tag != '' + barcode_tag = '$method.barcode.barcode_tag', + #end if + #elif $method.barcode.extract_type == 'from_read_names' + #if $method.barcode.barcode_regex != '' + barcode_regex = '$method.barcode.barcode_regex', + #end if + #end if + #if $method.umi_tag != '' + umi_tag = '$method.umi_tag', + #end if + #if $method.umi_regex != '' + umi_regex = '$method.umi_regex', + #end if + shift_right = $method.shift_right, + shift_left = $method.shift_left, + min_mapq = $method.min_mapq, + chunk_size = $method.chunk_size, + compression = 'gzip', + output_file = '$fragments_out', + tempdir = "." ) #else if $method.method == 'pp.import_data' @@ -54,179 +54,177 @@ chr_sizes = {x[0]:int(x[1]) for x in csv.reader(f, delimiter='\t')} sa.pp.import_data( - fragment_file = '$method.fragment_file', - chrom_sizes = chr_sizes, - min_num_fragments = $method.min_num_fragments, - sorted_by_barcode = $method.sorted_by_barcode, - #if str($method.whitelist) != 'None' - whitelist = '$method.whitelist', - #end if - shift_left = $method.shift_left, - shift_right = $method.shift_right, - #set $chr_mt = ([x.strip() for x in str($method.chrM).split(',')]) - chrM = $chr_mt, - chunk_size = $method.chunk_size, - file = 'anndata.h5ad', - n_jobs = os.getenv("GALAXY_SLOTS", 4) + fragment_file = '$method.fragment_file', + chrom_sizes = chr_sizes, + min_num_fragments = $method.min_num_fragments, + sorted_by_barcode = $method.sorted_by_barcode, + #if str($method.whitelist) != 'None' + whitelist = '$method.whitelist', + #end if + shift_left = $method.shift_left, + shift_right = $method.shift_right, + #set $chr_mt = ([x.strip() for x in str($method.chrM).split(',')]) + chrM = $chr_mt, + chunk_size = $method.chunk_size, + file = 'anndata.h5ad', + n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) ) #else if $method.method == 'pp.add_tile_matrix' sa.pp.add_tile_matrix( - adata, - bin_size = $method.bin_size, - inplace = True, - chunk_size = $method.chunk_size, - #if $method.exclude_chroms != '' - #set $excl_chroms = ([x.strip() for x in str($method.exclude_chroms).split(',')]) - exclude_chroms = $excl_chroms, - #end if - #if $method.min_frag_size - min_frag_size = $method.min_frag_size, - #end if - #if $method.max_frag_size - max_frag_size = $method.max_frag_size, - #end if - ##counting_strategy = '$method.counting_strategy', - count_frag_as_reads = $method.count_frag_as_reads, - n_jobs = os.getenv("GALAXY_SLOTS", 4) + adata, + bin_size = $method.bin_size, + chunk_size = $method.chunk_size, + #if $method.exclude_chroms != '' + #set $excl_chroms = ([x.strip() for x in str($method.exclude_chroms).split(',')]) + exclude_chroms = $excl_chroms, + #end if + #if $method.min_frag_size + min_frag_size = $method.min_frag_size, + #end if + #if $method.max_frag_size + max_frag_size = $method.max_frag_size, + #end if + ##counting_strategy = '$method.counting_strategy', + count_frag_as_reads = $method.count_frag_as_reads, + n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) ) #else if $method.method == 'pp.make_gene_matrix' sa.pp.make_gene_matrix( - adata, - gene_anno = '$method.gene_anno', - inplace = True, - chunk_size = $method.chunk_size, - use_x = $method.use_x, - id_type = '$method.id_type', - transcript_name_key = '$method.transcript_name_key', - transcript_id_key = '$method.transcript_id_key', - gene_name_key = '$method.gene_name_key', - gene_id_key = '$method.gene_id_key', - #if $method.min_frag_size - min_frag_size = $method.min_frag_size, - #end if - #if $method.max_frag_size - max_frag_size = $method.max_frag_size, - #end if - ##counting_strategy = '$method.counting_strategy' - count_frag_as_reads = $method.count_frag_as_reads + adata, + gene_anno = '$method.gene_anno', + chunk_size = $method.chunk_size, + use_x = $method.use_x, + id_type = '$method.id_type', + transcript_name_key = '$method.transcript_name_key', + transcript_id_key = '$method.transcript_id_key', + gene_name_key = '$method.gene_name_key', + gene_id_key = '$method.gene_id_key', + #if $method.min_frag_size + min_frag_size = $method.min_frag_size, + #end if + #if $method.max_frag_size + max_frag_size = $method.max_frag_size, + #end if + ##counting_strategy = '$method.counting_strategy' + count_frag_as_reads = $method.count_frag_as_reads ) #else if $method.method == 'pp.filter_cells' sa.pp.filter_cells( - adata, - min_counts = $method.min_counts, - min_tsse = $method.min_tsse, - #if $method.max_counts - max_counts = $method.max_counts, - #end if - #if $method.max_tsse - max_tsse = $method.max_tsse, - #end if - inplace = True, - n_jobs = os.getenv("GALAXY_SLOTS", 4) + adata, + min_counts = $method.min_counts, + min_tsse = $method.min_tsse, + #if $method.max_counts + max_counts = $method.max_counts, + #end if + #if $method.max_tsse + max_tsse = $method.max_tsse, + #end if + inplace = True, + n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) ) #else if $method.method == 'pp.select_features' sa.pp.select_features( - adata, - n_features = $method.n_features, - filter_lower_quantile = $method.filter_lower_quantile, - filter_upper_quantile = $method.filter_upper_quantile, - #if str($method.whitelist) != 'None' - whitelist = '$method.whitelist', - #end if - #if str($method.blacklist) != 'None' - blacklist = '$method.blacklist', - #end if - max_iter = $method.max_iter, - inplace = True, - n_jobs = os.getenv("GALAXY_SLOTS", 4) + adata, + n_features = $method.n_features, + filter_lower_quantile = $method.filter_lower_quantile, + filter_upper_quantile = $method.filter_upper_quantile, + #if str($method.whitelist) != 'None' + whitelist = '$method.whitelist', + #end if + #if str($method.blacklist) != 'None' + blacklist = '$method.blacklist', + #end if + max_iter = $method.max_iter, + inplace = True, + n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) ) #else if $method.method == 'pp.scrublet' sa.pp.scrublet( - adata, - #if $method.features - features = '$method.features', - #end if - n_comps = $method.n_comps, - sim_doublet_ratio = $method.sim_doublet_ratio, - expected_doublet_rate = $method.expected_doublet_rate, - #if $method.n_neighbors - n_neighbors = $method.n_neighbors, - #end if - use_approx_neighbors = $method.use_approx_neighbors, - random_state = $method.random_state, - inplace = True, - n_jobs = os.getenv("GALAXY_SLOTS", 4) + adata, + #if $method.features + features = '$method.features', + #end if + n_comps = $method.n_comps, + sim_doublet_ratio = $method.sim_doublet_ratio, + expected_doublet_rate = $method.expected_doublet_rate, + #if $method.n_neighbors + n_neighbors = $method.n_neighbors, + #end if + use_approx_neighbors = $method.use_approx_neighbors, + random_state = $method.random_state, + inplace = True, + n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) ) #else if $method.method == 'pp.filter_doublets' sa.pp.filter_doublets( - adata, - #if $method.probability_threshold - probability_threshold = $method.probability_threshold, - #end if - #if $method.score_threshold - score_threshold = $method.score_threshold, - #end if - inplace = True, - n_jobs = os.getenv("GALAXY_SLOTS", 4) + adata, + #if $method.probability_threshold + probability_threshold = $method.probability_threshold, + #end if + #if $method.score_threshold + score_threshold = $method.score_threshold, + #end if + inplace = True, + n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) ) #else if $method.method == 'pp.mnc_correct' sa.pp.mnc_correct( - adata, - batch = '$method.batch', - n_neighbors = $method.n_neighbors, - n_clusters = $method.n_clusters, - n_iter = $method.n_iter, - @CMD_params_data_integration@ - inplace = True, - n_jobs = os.getenv("GALAXY_SLOTS", 4) + adata, + batch = '$method.batch', + n_neighbors = $method.n_neighbors, + n_clusters = $method.n_clusters, + n_iter = $method.n_iter, + @CMD_params_data_integration@ + inplace = True, + n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) ) #else if $method.method == 'pp.harmony' sa.pp.harmony( - adata, - batch = '$method.batch', - @CMD_params_data_integration@ - inplace = True + adata, + batch = '$method.batch', + @CMD_params_data_integration@ + inplace = True ) #else if $method.method == 'pp.scanorama_integrate' sa.pp.scanorama_integrate( - adata, - batch = '$method.batch', - n_neighbors = $method.n_neighbors, - @CMD_params_data_integration@ - inplace = True + adata, + batch = '$method.batch', + n_neighbors = $method.n_neighbors, + @CMD_params_data_integration@ + inplace = True ) #else if $method.method == 'metrics.frag_size_distr' sa.metrics.frag_size_distr( - adata, - max_recorded_size = $method.max_recorded_size, - add_key = '$method.add_key', - inplace = True, - n_jobs = os.getenv("GALAXY_SLOTS", 4) + adata, + max_recorded_size = $method.max_recorded_size, + add_key = '$method.add_key', + inplace = True, + n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) ) #else if $method.method == 'metrics.tsse' sa.metrics.tsse( - adata, - gene_anno = '$method.gene_anno', - inplace = True, - n_jobs = os.getenv("GALAXY_SLOTS", 4) + adata, + gene_anno = '$method.gene_anno', + inplace = True, + n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) ) #end if #if $method.method != 'pp.make_fragment_file' and $method.method != 'pp.import_data' @CMD_anndata_write_outputs@ #end if - ]]></configfile> + ]]></configfile> </configfiles> <inputs> <conditional name="method"> @@ -257,7 +255,7 @@ <param argument="barcode_tag" type="text" value="CB" optional="true" label="Extract barcodes from TAG fields of BAM records"/> </when> <when value="from_read_names"> - <param argument="barcode_regex" type="text" value="" optional="true" label="Extract barcodes from read names of BAM records using regular expressions" help="`(..:..:..:..):w+$` extracts `bd:69:Y6:10` from `A01535:24:HW2MMDSX2:2:1359:8513:3458:bd:69:Y6:10:TGATAGGTT``"/> + <param argument="barcode_regex" type="text" value="" optional="true" label="Extract barcodes from read names of BAM records using regular expressions" help="`(..:..:..:..):\w+$` extracts `bd:69:Y6:10` from `A01535:24:HW2MMDSX2:2:1359:8513:3458:bd:69:Y6:10:TGATAGGTT``"/> </when> </conditional> <param argument="umi_tag" type="text" value="" optional="true" label="Extract UMI from TAG fields of BAM records"/> @@ -393,7 +391,7 @@ <!-- pp.make_fragment_file --> <conditional name="method"> <param name="method" value="pp.make_fragment_file"/> - <param name="bam_file" location="https://zenodo.org/records/11199963/files/pbmc_500_chr21_subsample.bam"/> + <param name="bam_file" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21_subsample.bam"/> <param name="is_paired" value="true"/> <conditional name="barcode"> <param name="extract_type" value="from_tag"/> @@ -404,14 +402,14 @@ <param name="min_mapq" value="10"/> <param name="chunk_size" value="50000000"/> </conditional> - <output name="fragments_out" location="https://zenodo.org/records/11199963/files/pp.make_fragment_file.pbmc_500_chr21.tsv.gz" ftype="interval" compare="sim_size" delta_frac="0.1"/> + <output name="fragments_out" location="https://zenodo.org/records/11260316/files/pp.make_fragment_file.pbmc_500_chr21.tsv.gz" ftype="interval" compare="sim_size" delta_frac="0.1"/> </test> <test expect_num_outputs="2"> <!-- pp.pp.import_data --> <conditional name="method"> <param name="method" value="pp.import_data"/> - <param name="fragment_file" location="https://zenodo.org/records/11199963/files/pbmc_500_chr21.tsv.gz"/> - <param name="chrom_sizes" location="https://zenodo.org/records/11199963/files/chr21_size.tabular"/> + <param name="fragment_file" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.tsv.gz"/> + <param name="chrom_sizes" location="https://zenodo.org/records/11260316/files/chr21_size.tabular"/> <param name="min_num_fragments" value="1"/> <param name="sorted_by_barcode" value="False"/> <param name="shift_left" value="0"/> @@ -433,11 +431,250 @@ <has_text_matching expression="chunk_size = 1000"/> </assert_contents> </output> - <output name="anndata_out" location="https://zenodo.org/records/11199963/files/pp.import_data.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1"/> + <output name="anndata_out" location="https://zenodo.org/records/11260316/files/pp.import_data.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1"/> + </test> + <test expect_num_outputs="2"> + <!-- pp.make_gene_matrix --> + <conditional name="method"> + <param name="method" value="pp.make_gene_matrix"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/tl.leiden.modularity.pbmc_500_chr21.h5ad"/> + <param name="gene_anno" location="https://zenodo.org/records/11260316/files/chr21.gff3.gz"/> + <param name="chunk_size" value="500"/> + <param name="use_x" value="False"/> + <param name="id_type" value="gene"/> + <param name="transcript_name_key" value="transcript_name"/> + <param name="transcript_id_key" value="transcript_id"/> + <param name="gene_name_key" value="gene_name"/> + <param name="gene_id_key" value="gene_id"/> + <param name="count_frag_as_reads" value="True"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.pp.make_gene_matrix"/> + <has_text_matching expression="chunk_size = 500"/> + <has_text_matching expression="use_x = False"/> + <has_text_matching expression="id_type = 'gene'"/> + <has_text_matching expression="transcript_name_key = 'transcript_name'"/> + <has_text_matching expression="transcript_id_key = 'transcript_id'"/> + <has_text_matching expression="gene_name_key = 'gene_name'"/> + <has_text_matching expression="gene_id_key = 'gene_id'"/> + <has_text_matching expression="count_frag_as_reads = True"/> + </assert_contents> + </output> + <output name="anndata_out" location="https://zenodo.org/records/11260316/files/pp.make_gene_matrix.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" /> + </test> + <test expect_num_outputs="2"> + <!-- metrics.tsse --> + <conditional name="method"> + <param name="method" value="metrics.tsse"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/pp.import_data.pbmc_500_chr21.h5ad"/> + <param name="gene_anno" location="https://zenodo.org/records/11260316/files/chr21.gff3.gz"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.metrics.tsse"/> + </assert_contents> + </output> + <output name="anndata_out" location="https://zenodo.org/records/11260316/files/metrics.tsse.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" /> + </test> + <test expect_num_outputs="2"> + <!-- pp.filter_cells --> + <conditional name="method"> + <param name="method" value="pp.filter_cells"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/metrics.tsse.pbmc_500_chr21.h5ad"/> + <param name="min_counts" value="200"/> + <param name="min_tsse" value="5"/> + <param name="max_counts" value="10000"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.pp.filter_cells"/> + <has_text_matching expression="min_counts = 200"/> + <has_text_matching expression="min_tsse = 5"/> + <has_text_matching expression="max_counts = 10000"/> + </assert_contents> + </output> + <output name="anndata_out" location="https://zenodo.org/records/11260316/files/pp.filter_cells.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" /> + </test> + <test expect_num_outputs="2"> + <!-- pp.add_tile_matrix --> + <conditional name="method"> + <param name="method" value="pp.add_tile_matrix"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/pp.filter_cells.pbmc_500_chr21.h5ad"/> + <param name="bin_size" value="5000"/> + <param name="chunk_size" value="500"/> + <param name="exclude_chroms" value="chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr20, chr22, chrX, chrY"/> + <param name="count_frag_as_reads" value="True"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.pp.add_tile_matrix"/> + <has_text_matching expression="bin_size = 5000"/> + <has_text_matching expression="chunk_size = 500"/> + <has_text_matching expression="exclude_chroms = \['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chr20', 'chr22', 'chrX', 'chrY'\]"/> + <has_text_matching expression="count_frag_as_reads = True"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.add_tile_matrix.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- pp.select_features --> + <conditional name="method"> + <param name="method" value="pp.select_features"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/pp.add_tile_matrix.pbmc_500_chr21.h5ad"/> + <param name="n_features" value="15000"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.pp.select_features"/> + <has_text_matching expression="n_features = 15000"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.select_features.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- pp.scrublet --> + <conditional name="method"> + <param name="method" value="pp.scrublet"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/pp.select_features.pbmc_500_chr21.h5ad"/> + <param name="n_comps" value="15"/> + <param name="sim_doublet_ratio" value="2.0"/> + <param name="expected_doublet_rate" value="0.1"/> + <param name="random_state" value="0"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.pp.scrublet"/> + <has_text_matching expression="n_comps = 15"/> + <has_text_matching expression="sim_doublet_ratio = 2.0"/> + <has_text_matching expression="expected_doublet_rate = 0.1"/> + <has_text_matching expression="random_state = 0"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.scrublet.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- pp.filter_doublets --> + <conditional name="method"> + <param name="method" value="pp.filter_doublets"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/pp.scrublet.pbmc_500_chr21.h5ad"/> + <param name="probability_threshold" value="0.1"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.pp.filter_doublets"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.filter_doublets.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- pp.mnc_correct --> + <conditional name="method"> + <param name="method" value="pp.mnc_correct"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.batch.h5ad"/> + <param name="batch" value="batch"/> + <param name="n_neighbors" value="3"/> + <param name="n_clusters" value="10"/> + <param name="use_rep" value="X_spectral"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.pp.mnc_correct"/> + <has_text_matching expression="batch = 'batch'"/> + <has_text_matching expression="n_neighbors = 3"/> + <has_text_matching expression="n_clusters = 10"/> + <has_text_matching expression="batch = 'batch'"/> + <has_text_matching expression="use_rep = 'X_spectral'"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.mnc_correct.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- pp.harmony --> + <conditional name="method"> + <param name="method" value="pp.harmony"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.batch.h5ad"/> + <param name="batch" value="batch"/> + <param name="use_rep" value="X_spectral"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.pp.harmony"/> + <has_text_matching expression="batch = 'batch'"/> + <has_text_matching expression="use_rep = 'X_spectral'"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.harmony.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- pp.scanorama_integrate --> + <conditional name="method"> + <param name="method" value="pp.scanorama_integrate"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.batch.h5ad"/> + <param name="batch" value="batch"/> + <param name="use_rep" value="X_spectral"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.pp.scanorama_integrate"/> + <has_text_matching expression="batch = 'batch'"/> + <has_text_matching expression="use_rep = 'X_spectral'"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.scanorama_integrate.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- metrics.frag_size_distr --> + <conditional name="method"> + <param name="method" value="metrics.frag_size_distr"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/pp.import_data.pbmc_500_chr21.h5ad"/> + <param name="max_recorded_size" value="500"/> + <param name="add_key" value="frag_size_distr"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.metrics.frag_size_distr"/> + <has_text_matching expression="add_key = 'frag_size_distr'"/> + </assert_contents> + </output> + <output name="anndata_out" location="https://zenodo.org/records/11260316/files/metrics.frag_size_distr.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" /> </test> </tests> <help><![CDATA[ -Convert a BAM file`to a fragment file, `using pp.make_fragment_file` +Convert a BAM file`to a fragment file, using `pp.make_fragment_file` ==================================================================== Convert a BAM file to a fragment file.