Previous changeset 3:5407dc697e24 (2023-10-16) |
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_filter_cells commit c5b2e910bd79d92566b2c2e9bad508d090a75841 |
modified:
filter_cells.R filter_cells.xml |
b |
diff -r 5407dc697e24 -r 0b27f323c80b filter_cells.R --- a/filter_cells.R Mon Oct 16 22:33:23 2023 +0000 +++ b/filter_cells.R Thu Nov 07 18:33:17 2024 +0000 |
[ |
b'@@ -3,11 +3,12 @@\n # percentiles or raw values of number of genes detected or\n # total aligned reads\n \n-options(show.error.messages = FALSE,\n- error = function() {\n- cat(geterrmessage(), file = stderr())\n- q("no", 1, FALSE)\n- }\n+options(\n+ show.error.messages = FALSE,\n+ error = function() {\n+ cat(geterrmessage(), file = stderr())\n+ q("no", 1, FALSE)\n+ }\n )\n \n loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")\n@@ -18,38 +19,58 @@\n \n # Arguments\n option_list <- list(\n- make_option(c("-f", "--file"), default = NA, type = "character",\n- help = "Input file that contains values to filter"),\n- make_option("--sep", default = "\\t", type = "character",\n- help = "File column separator [default : \'%default\' ]"),\n- make_option("--percentile_genes", default = 0, type = "integer",\n- help = "nth Percentile of the number of genes detected by a cell distribution [default : \'%default\' ]"),\n- make_option("--percentile_counts", default = 0, type = "integer",\n- help = "nth Percentile of the total counts per cell distribution [default : \'%default\' ]"),\n- make_option("--absolute_genes", default = 0, type = "integer",\n- help = "Remove cells that did not express at least this number of genes [default : \'%default\' ]"),\n- make_option("--absolute_counts", default = 0, type = "integer",\n- help = "Number of transcript threshold for cell filtering [default : \'%default\' ]"),\n- make_option("--manage_cutoffs", default = "intersect", type = "character",\n- help = "combine or intersect cutoffs for filtering"),\n- make_option("--pdfplot", type = "character",\n- help = "Path to pdf file of the plots"),\n- make_option("--output", type = "character",\n- help = "Path to tsv file of filtered cell data"),\n- make_option("--output_metada", type = "character",\n- help = "Path to tsv file of filtered cell metadata")\n+ make_option(c("-f", "--file"),\n+ default = NA, type = "character",\n+ help = "Input file that contains values to filter"\n+ ),\n+ make_option("--sep",\n+ default = "\\t", type = "character",\n+ help = "File column separator [default : \'%default\' ]"\n+ ),\n+ make_option("--percentile_genes",\n+ default = 0, type = "integer",\n+ help = "nth Percentile of the number of genes detected by a cell distribution [default : \'%default\' ]"\n+ ),\n+ make_option("--percentile_counts",\n+ default = 0, type = "integer",\n+ help = "nth Percentile of the total counts per cell distribution [default : \'%default\' ]"\n+ ),\n+ make_option("--absolute_genes",\n+ default = 0, type = "integer",\n+ help = "Remove cells that did not express at least this number of genes [default : \'%default\' ]"\n+ ),\n+ make_option("--absolute_counts",\n+ default = 0, type = "integer",\n+ help = "Number of transcript threshold for cell filtering [default : \'%default\' ]"\n+ ),\n+ make_option("--manage_cutoffs",\n+ default = "intersect", type = "character",\n+ help = "combine or intersect cutoffs for filtering"\n+ ),\n+ make_option("--pdfplot",\n+ type = "character",\n+ help = "Path to pdf file of the plots"\n+ ),\n+ make_option("--output",\n+ type = "character",\n+ help = "Path to tsv file of filtered cell data"\n+ ),\n+ make_option("--output_metada",\n+ type = "character",\n+ help = "Path to tsv file of filtered cell metadata"\n+ )\n )\n opt <- parse_args(OptionParser(option_list = option_list),\n- args = commandArgs(trailingOnly = TRUE)\n+ args = commandArgs(trailingOnly = TRUE)\n )\n if (opt$sep == "tab") {\n- opt$sep <- "\\t"\n+ opt$sep <- "\\t"\n }\n if (opt$sep == "comma") {\n- opt$sep <- ","\n+ opt$sep <- ","\n }\n if (opt$sep == "space") {\n- opt$sep <- " "\n+ opt$sep <- " "\n }\n \n \n@@ -57,56 +78,64 @@\n \n # if input parameters are not consistent (one or either method, not both), '..b'<- (QC_metrics$nGenes < genes_threshold) & (QC_metrics$total_counts < counts_threshold)\n+ QC_metrics$filtered <- (QC_metrics$nGenes < genes_threshold) & (QC_metrics$total_counts < counts_threshold)\n }\n \n ## Plot the results\n \n # Determine title from the parameter logics\n if (opt$percentile_counts > 0) {\n- part_one <- paste0("Cells with aligned reads counts below the ",\n- opt$percentile_counts,\n- "th percentile of aligned read counts")\n+ part_one <- paste0(\n+ "Cells with aligned reads counts below the ",\n+ opt$percentile_counts,\n+ "th percentile of aligned read counts"\n+ )\n } else {\n- part_one <- paste0("Cells with aligned read counts below ",\n- opt$absolute_counts)\n+ part_one <- paste0(\n+ "Cells with aligned read counts below ",\n+ opt$absolute_counts\n+ )\n }\n \n if (opt$percentile_genes > 0) {\n- part_two <- paste0("with number of detected genes below the ",\n- opt$percentile_genes,\n- "th percentile of detected gene counts")\n+ part_two <- paste0(\n+ "with number of detected genes below the ",\n+ opt$percentile_genes,\n+ "th percentile of detected gene counts"\n+ )\n } else {\n- part_two <- paste0("with number of detected genes below ",\n- opt$absolute_genes)\n+ part_two <- paste0(\n+ "with number of detected genes below ",\n+ opt$absolute_genes\n+ )\n }\n \n if (opt$manage_cutoffs == "intersect") {\n- conjunction <- " and\\n"\n+ conjunction <- " and\\n"\n } else {\n- conjunction <- " or\\n"\n+ conjunction <- " or\\n"\n }\n \n # plot with ggplot2\n ggplot(QC_metrics, aes(nGenes, total_counts, colour = filtered)) +\n- geom_point() +\n- scale_y_log10() +\n- scale_colour_discrete(name = "",\n- breaks = c(FALSE, TRUE),\n- labels = c(paste0("Not filtered (", table(QC_metrics$filtered)[1], " cells)"),\n- paste0("Filtered (", table(QC_metrics$filtered)[2], " cells)"))\n- ) +\n- xlab("Detected genes per cell") +\n- ylab("Aligned reads per cell (log10 scale)") +\n- geom_vline(xintercept = genes_threshold) +\n- geom_hline(yintercept = counts_threshold) +\n- ggtitle(paste0(part_one, conjunction, part_two, "\\nwere filtered out")) +\n- theme(plot.title = element_text(size = 8, face = "bold"))\n+ geom_point() +\n+ scale_y_log10() +\n+ scale_colour_discrete(\n+ name = "",\n+ breaks = c(FALSE, TRUE),\n+ labels = c(\n+ paste0("Not filtered (", table(QC_metrics$filtered)[1], " cells)"),\n+ paste0("Filtered (", table(QC_metrics$filtered)[2], " cells)")\n+ )\n+ ) +\n+ xlab("Detected genes per cell") +\n+ ylab("Aligned reads per cell (log10 scale)") +\n+ geom_vline(xintercept = genes_threshold) +\n+ geom_hline(yintercept = counts_threshold) +\n+ ggtitle(paste0(part_one, conjunction, part_two, "\\nwere filtered out")) +\n+ theme(plot.title = element_text(size = 8, face = "bold"))\n \n dev.off()\n \n # Retrieve identifier of kept_cells\n kept_cells <- QC_metrics$cell_id[!QC_metrics$filtered]\n \n-data_counts <- data.frame(Genes = rownames(data_counts[, kept_cells]),\n- data_counts[, kept_cells],\n- check.names = FALSE)\n+data_counts <- data.frame(\n+ Genes = rownames(data_counts[, kept_cells]),\n+ data_counts[, kept_cells],\n+ check.names = FALSE\n+)\n \n # Save filtered cells\n write.table(data_counts,\n- opt$output,\n- sep = "\\t",\n- quote = FALSE,\n- col.names = TRUE,\n- row.names = FALSE\n+ opt$output,\n+ sep = "\\t",\n+ quote = FALSE,\n+ col.names = TRUE,\n+ row.names = FALSE\n )\n \n # Add QC metrics of filtered cells to a metadata file\n@@ -212,9 +260,9 @@\n \n # Save the metadata (QC metrics) file\n write.table(metadata,\n- opt$output_metada,\n- sep = "\\t",\n- quote = FALSE,\n- col.names = TRUE,\n- row.names = FALSE\n+ opt$output_metada,\n+ sep = "\\t",\n+ quote = FALSE,\n+ col.names = TRUE,\n+ row.names = FALSE\n )\n' |
b |
diff -r 5407dc697e24 -r 0b27f323c80b filter_cells.xml --- a/filter_cells.xml Mon Oct 16 22:33:23 2023 +0000 +++ b/filter_cells.xml Thu Nov 07 18:33:17 2024 +0000 |
b |
@@ -1,5 +1,8 @@ -<tool id="filter_cells" name="Filter cells data" version="4.3.1+galaxy0" profile="21.01"> +<tool id="filter_cells" name="Filter cells data" version="4.3.1+galaxy1" profile="21.01"> <description>on total aligned reads and/or number of detected genes</description> + <xrefs> + <xref type="bio.tools">galaxy_single_cell_suite</xref> + </xrefs> <requirements> <requirement type="package" version="1.7.3">r-optparse</requirement> <requirement type="package" version="3.4.4">r-ggplot2</requirement> |