Mercurial > repos > artbio > gsc_filter_genes
changeset 3:c69729b80a66 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_filter_genes commit 023776e31e97af9ca778b47ceafd1e5f04dea15f
author | artbio |
---|---|
date | Thu, 07 Nov 2024 19:06:35 +0000 |
parents | afe949d332b3 |
children | |
files | filter_genes.R filter_genes.xml |
diffstat | 2 files changed, 64 insertions(+), 60 deletions(-) [+] |
line wrap: on
line diff
--- a/filter_genes.R Mon Oct 16 23:26:20 2023 +0000 +++ b/filter_genes.R Thu Nov 07 19:06:35 2024 +0000 @@ -4,83 +4,84 @@ # Rscript filter_genes.R -f <input file> -o <output file> # load packages that are provided in the conda env -options(show.error.messages = FALSE, - error = function() { - cat(geterrmessage(), file = stderr()) - q("no", 1, FALSE) - } +options( + show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } ) loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") library(optparse) # Arguments option_list <- list( - make_option( - c("-f", "--input"), - default = NA, - type = "character", - help = "Input file that contains count values to filter" - ), - make_option( - c("-s", "--sep"), - default = "\t", - type = "character", - help = "File separator [default : '%default' ]" - ), - make_option( - c("-c", "--colnames"), - default = TRUE, - type = "logical", - help = "first line is a header [default : '%default' ]" - ), - make_option( - "--percentile_detection", - default = 0, - type = "numeric", - help = "Include genes with detected expression in at least \ + make_option( + c("-f", "--input"), + default = NA, + type = "character", + help = "Input file that contains count values to filter" + ), + make_option( + c("-s", "--sep"), + default = "\t", + type = "character", + help = "File separator [default : '%default' ]" + ), + make_option( + c("-c", "--colnames"), + default = TRUE, + type = "logical", + help = "first line is a header [default : '%default' ]" + ), + make_option( + "--percentile_detection", + default = 0, + type = "numeric", + help = "Include genes with detected expression in at least \ this fraction of cells [default : '%default' ]" - ), - make_option( - "--absolute_detection", - default = 0, - type = "numeric", - help = "Include genes with detected expression in at least \ + ), + make_option( + "--absolute_detection", + default = 0, + type = "numeric", + help = "Include genes with detected expression in at least \ this number of cells [default : '%default' ]" - ), - make_option( - c("-o", "--output"), - default = NA, - type = "character", - help = "Output name [default : '%default' ]" - ) + ), + make_option( + c("-o", "--output"), + default = NA, + type = "character", + help = "Output name [default : '%default' ]" + ) ) opt <- parse_args(OptionParser(option_list = option_list), - args = commandArgs(trailingOnly = TRUE)) + args = commandArgs(trailingOnly = TRUE) +) if (opt$sep == "tab") { - opt$sep <- "\t" + opt$sep <- "\t" } if (opt$sep == "comma") { - opt$sep <- "," + opt$sep <- "," } # Open files data.counts <- read.delim( - opt$input, - h = opt$colnames, - row.names = 1, - sep = opt$sep, - check.names = FALSE + opt$input, + h = opt$colnames, + row.names = 1, + sep = opt$sep, + check.names = FALSE ) # note the [if else] below, to handle percentile_detection=absolute_detection=0 # Search for genes that are expressed in a certain percent of cells if (opt$percentile_detection > 0) { - kept_genes <- rowSums(data.counts != 0) >= (opt$percentile_detection * ncol(data.counts)) + kept_genes <- rowSums(data.counts != 0) >= (opt$percentile_detection * ncol(data.counts)) } else { - - # Search for genes that are expressed in more than an absolute number of cells - kept_genes <- rowSums(data.counts != 0) >= (opt$absolute_detection) + # Search for genes that are expressed in more than an absolute number of cells + kept_genes <- rowSums(data.counts != 0) >= (opt$absolute_detection) } # Filter matrix @@ -89,10 +90,10 @@ # Save filtered matrix write.table( - data.counts, - opt$output, - sep = "\t", - quote = FALSE, - col.names = TRUE, - row.names = FALSE + data.counts, + opt$output, + sep = "\t", + quote = FALSE, + col.names = TRUE, + row.names = FALSE )
--- a/filter_genes.xml Mon Oct 16 23:26:20 2023 +0000 +++ b/filter_genes.xml Thu Nov 07 19:06:35 2024 +0000 @@ -1,5 +1,8 @@ -<tool id="filter_genes" name="Filter genes in single cell data" version="4.3.1+galaxy0" profile="21.01"> +<tool id="filter_genes" name="Filter genes in single cell data" version="4.3.1+galaxy1" profile="21.01"> <description>which are detected in less that a given fraction of the libraries</description> + <xrefs> + <xref type="bio.tools">galaxy_single_cell_suite</xref> + </xrefs> <requirements> <requirement type="package" version="1.7.3">r-optparse</requirement> </requirements>