Mercurial > repos > iuc > scanpy_filter
comparison filter.xml @ 15:aa0059118fb9 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit c21958f44b81d740191999fb6015d5ae69538ee0
author | iuc |
---|---|
date | Wed, 31 Jul 2024 18:10:52 +0000 |
parents | d636ce5cde16 |
children |
comparison
equal
deleted
inserted
replaced
14:d636ce5cde16 | 15:aa0059118fb9 |
---|---|
96 total_counts=$method.total_counts, | 96 total_counts=$method.total_counts, |
97 #end if | 97 #end if |
98 random_state=$method.random_state, | 98 random_state=$method.random_state, |
99 replace=$method.replace, | 99 replace=$method.replace, |
100 copy=False) | 100 copy=False) |
101 | |
102 #else if $method.method == "filter_marker" | |
103 | |
104 #if $method.layer_selection.use_raw == 'False': | |
105 adata.X = adata.layers['$method.layer_selection.layer'] | |
106 #end if | |
107 | |
108 def check_marker(adata, group, gene, thresh_mean, thresh_frac, groupby): | |
109 filtered_data = adata[adata.obs[groupby] == group, adata.var_names == gene] | |
110 mean_expression = np.mean(filtered_data.X) | |
111 frac_cell_mean_expression = len(filtered_data.X[filtered_data.X > mean_expression]) / filtered_data.n_obs | |
112 if ( mean_expression > thresh_mean and frac_cell_mean_expression >= thresh_frac ): | |
113 return(True) | |
114 return(False) | |
115 | |
116 header='infer' | |
117 | |
118 #if $method.header == 'not_included': | |
119 header=None | |
120 #end if | |
121 | |
122 marker_list={key: list(value.values()) for key, value in pd.read_csv('$method.markerfile', sep='\t', index_col=0, header=header).to_dict(orient='index').items()} | |
123 | |
124 for key, value in marker_list.items(): | |
125 marker_list[key] = [x for x in value if check_marker(adata, key, x, $method.thresh_mean, $method.thresh_frac, '$method.groupby')] | |
126 | |
127 # Find the maximum length of lists | |
128 max_len = max(len(lst) for lst in marker_list.values()) | |
129 | |
130 # Fill smaller lists with empty values | |
131 for key, value in marker_list.items(): | |
132 marker_list[key] = value + [''] * (max_len - len(value)) | |
133 | |
134 df = pd.DataFrame(marker_list).T | |
135 df.to_csv('marker.tsv', sep='\t', index=True) | |
101 #end if | 136 #end if |
102 | 137 |
103 @CMD_anndata_write_outputs@ | 138 @CMD_anndata_write_outputs@ |
104 ]]></configfile> | 139 ]]></configfile> |
105 </configfiles> | 140 </configfiles> |
111 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using 'pp.filter_genes'</option> | 146 <option value="pp.filter_genes">Filter genes based on number of cells or counts, using 'pp.filter_genes'</option> |
112 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option> | 147 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option> |
113 <option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option> | 148 <option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option> |
114 <option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option> | 149 <option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option> |
115 <option value="pp.downsample_counts">Downsample counts from count matrix, using 'pp.downsample_counts'</option> | 150 <option value="pp.downsample_counts">Downsample counts from count matrix, using 'pp.downsample_counts'</option> |
151 <option value="filter_marker">Filter markers from count matrix and marker list</option> | |
116 </param> | 152 </param> |
117 <when value="pp.filter_cells"> | 153 <when value="pp.filter_cells"> |
118 <conditional name="filter"> | 154 <conditional name="filter"> |
119 <param argument="filter" type="select" label="Filter"> | 155 <param argument="filter" type="select" label="Filter"> |
120 <option value="min_counts">Minimum number of counts</option> | 156 <option value="min_counts">Minimum number of counts</option> |
211 <param argument="counts_per_cell" type="integer" min="0" optional="true" label="Target total counts per cell" help="If a cell has more than ‘counts_per_cell’, it will be downsampled to this number. Resulting counts can be specified on a per cell basis by passing an array."/> | 247 <param argument="counts_per_cell" type="integer" min="0" optional="true" label="Target total counts per cell" help="If a cell has more than ‘counts_per_cell’, it will be downsampled to this number. Resulting counts can be specified on a per cell basis by passing an array."/> |
212 <param argument="total_counts" type="integer" min="0" optional="true" label="Target total counts" help="If the count matrix has more than total_counts it will be downsampled to have this number."/> | 248 <param argument="total_counts" type="integer" min="0" optional="true" label="Target total counts" help="If the count matrix has more than total_counts it will be downsampled to have this number."/> |
213 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/> | 249 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/> |
214 <param argument="replace" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sample the counts with replacement?"/> | 250 <param argument="replace" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sample the counts with replacement?"/> |
215 </when> | 251 </when> |
252 <when value="filter_marker"> | |
253 <param argument="markerfile" type="data" format="tabular" label="List of markers" help="This should be a tsv where row = group (e.g. celltypes) and columns = markers."></param> | |
254 <param name="header" type="select" label="Header in the list of markers?"> | |
255 <option value="included">Header incldued</option> | |
256 <option value="not_included">Header not included</option> | |
257 </param> | |
258 <param argument="thresh_mean" type="float" min="0.0" value="1.0" label="Minimal average count of all cells of a group (e.g., celltype) for a particular marker" help="Increasing the threshold will result in a smaller marker set."/> | |
259 <param argument="thresh_frac" type="float" min="0.0" max="1.0" value="0.1" label="Minimal fractions of cells that has a higher count than the average count of all cells of the group for the marker" help="Increasing this threshold might remove marker outliers."/> | |
260 <conditional name="layer_selection"> | |
261 <param name="use_raw" type="select" label="Use .X of adata to perform the filtering" help=""> | |
262 <option value="True">Yes</option> | |
263 <option value="False">No</option> | |
264 </param> | |
265 <when value="False"> | |
266 <param argument="layer" type="text" value="" label="Key from adata.layers whose value will be used to filter" help="If layers specified then use adata.layers[layer]."/> | |
267 </when> | |
268 <when value="True"/> | |
269 </conditional> | |
270 <param argument="groupby" type="text" value="" label="The key of the observation grouping to consider (e.g., celltype)" help=""> | |
271 <expand macro="sanitize_query" /> | |
272 </param> | |
273 </when> | |
216 </conditional> | 274 </conditional> |
217 <expand macro="inputs_common_advanced"/> | 275 <expand macro="inputs_common_advanced"/> |
218 </inputs> | 276 </inputs> |
219 <outputs> | 277 <outputs> |
220 <expand macro="anndata_outputs"/> | 278 <expand macro="anndata_outputs"/> |
279 <data name="marker_out" format="tabular" from_work_dir="marker.tsv" label="${tool.name} on ${on_string}: Markers"> | |
280 <filter>method['method'] == 'filter_marker'</filter> | |
281 </data> | |
221 </outputs> | 282 </outputs> |
222 <tests> | 283 <tests> |
223 <test expect_num_outputs="2"> | 284 <test expect_num_outputs="2"> |
224 <!-- test 1 --> | 285 <!-- test 1 --> |
225 <param name="adata" value="krumsiek11.h5ad" /> | 286 <param name="adata" value="krumsiek11.h5ad" /> |
442 <has_text_matching expression="replace=False"/> | 503 <has_text_matching expression="replace=False"/> |
443 </assert_contents> | 504 </assert_contents> |
444 </output> | 505 </output> |
445 <output name="anndata_out" file="pp.downsample_counts.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="10000000" delta_frac="0.5"/> | 506 <output name="anndata_out" file="pp.downsample_counts.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="10000000" delta_frac="0.5"/> |
446 </test> | 507 </test> |
508 <test expect_num_outputs="3"> | |
509 <!-- test 10 --> | |
510 <param name="adata" value="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad" /> | |
511 <conditional name="method"> | |
512 <param name="method" value="filter_marker"/> | |
513 <param name="markerfile" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_1.tsv"/> | |
514 <param name="thresh_mean" value="1.0"/> | |
515 <param name="thresh_frac" value="0.2"/> | |
516 <param name="layer_selection" value="True"/> | |
517 <param name="groupby" value="bulk_labels"/> | |
518 </conditional> | |
519 <section name="advanced_common"> | |
520 <param name="show_log" value="true" /> | |
521 </section> | |
522 <output name="hidden_output"> | |
523 <assert_contents> | |
524 <has_text_matching expression="adata, key, x, 1.0, 0.2, 'bulk_labels'"/> | |
525 </assert_contents> | |
526 </output> | |
527 <output name="anndata_out" file="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad" ftype="h5ad"> | |
528 <assert_contents> | |
529 <has_h5_keys keys="obs, var, uns" /> | |
530 </assert_contents> | |
531 </output> | |
532 <output name="marker_out" file="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_filtered_1.tsv" ftype="tabular" compare="sim_size"/> | |
533 </test> | |
447 </tests> | 534 </tests> |
448 <help><![CDATA[ | 535 <help><![CDATA[ |
449 | 536 |
450 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`) | 537 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`) |
451 ======================================================================================== | 538 ======================================================================================== |
500 ========================================== | 587 ========================================== |
501 | 588 |
502 Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This | 589 Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This |
503 has been implemented by M. D. Luecken. | 590 has been implemented by M. D. Luecken. |
504 | 591 |
592 | |
593 Filter marker genes (`filter_marker`) | |
594 ====================================================================== | |
595 | |
596 This option is specific for celltype marker gene detection. You can generate a celltype marker gene file (tsv) with **COSG** provided at Galaxy. | |
597 | |
598 The marker gene file should have as rows celltypes and columns as marker genes. Each celltype can have varying number of marker genes. | |
599 | |
600 A marker gene is returned (retained in the list) if the mean expression of the marker gene is bigger than the threshold of mean expression (thresh_mean) and if the fraction of cells with the marker gene expression is equal or higher than the cell fraction threshold (thresh_frac). | |
601 | |
505 More details on the `scanpy documentation | 602 More details on the `scanpy documentation |
506 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.downsample_counts.html>`__ | 603 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.downsample_counts.html>`__ |
507 | 604 |
508 | 605 |
509 ]]></help> | 606 ]]></help> |