# HG changeset patch # User artbio # Date 1697498780 0 # Node ID afe949d332b32e64e1c4ca55fd4a5aafc88a2823 # Parent 5d2304b09f584477de255298776c071e7ed620eb planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_filter_genes commit b184054ad6d4230ab0a714c13f9ef32449faa327 diff -r 5d2304b09f58 -r afe949d332b3 filter_genes.R --- a/filter_genes.R Mon Jun 24 18:07:05 2019 -0400 +++ b/filter_genes.R Mon Oct 16 23:26:20 2023 +0000 @@ -1,94 +1,98 @@ -# ######################## -# filter genes # -# ######################## - # Filter out low expressed genes # Example of command (used for generate output file) : # Rscript filter_genes.R -f -o # load packages that are provided in the conda env -options( show.error.messages=F, - error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) +options(show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } +) loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") library(optparse) # Arguments -option_list = list( +option_list <- list( make_option( c("-f", "--input"), default = NA, - type = 'character', + type = "character", help = "Input file that contains count values to filter" ), make_option( c("-s", "--sep"), - default = '\t', - type = 'character', + default = "\t", + type = "character", help = "File separator [default : '%default' ]" ), make_option( c("-c", "--colnames"), default = TRUE, - type = 'logical', + type = "logical", help = "first line is a header [default : '%default' ]" ), make_option( "--percentile_detection", default = 0, - type = 'numeric', + type = "numeric", help = "Include genes with detected expression in at least \ this fraction of cells [default : '%default' ]" ), make_option( "--absolute_detection", default = 0, - type = 'numeric', + type = "numeric", help = "Include genes with detected expression in at least \ this number of cells [default : '%default' ]" ), make_option( c("-o", "--output"), default = NA, - type = 'character', + type = "character", help = "Output name [default : '%default' ]" ) ) -opt = parse_args(OptionParser(option_list = option_list), - args = commandArgs(trailingOnly = TRUE)) -if (opt$sep == "tab") {opt$sep = "\t"} -if (opt$sep == "comma") {opt$sep = ","} +opt <- parse_args(OptionParser(option_list = option_list), + args = commandArgs(trailingOnly = TRUE)) +if (opt$sep == "tab") { + opt$sep <- "\t" +} +if (opt$sep == "comma") { + opt$sep <- "," +} # Open files -data.counts <- read.table( +data.counts <- read.delim( opt$input, h = opt$colnames, row.names = 1, sep = opt$sep, - check.names = F + check.names = FALSE ) # note the [if else] below, to handle percentile_detection=absolute_detection=0 # Search for genes that are expressed in a certain percent of cells if (opt$percentile_detection > 0) { -kept_genes <- rowSums(data.counts != 0) >= (opt$percentile_detection * ncol(data.counts)) + kept_genes <- rowSums(data.counts != 0) >= (opt$percentile_detection * ncol(data.counts)) } else { -# Search for genes that are expressed in more than an absolute number of cells -kept_genes <- rowSums(data.counts != 0) >= (opt$absolute_detection) + # Search for genes that are expressed in more than an absolute number of cells + kept_genes <- rowSums(data.counts != 0) >= (opt$absolute_detection) } # Filter matrix -data.counts <- data.counts[kept_genes,] -data.counts <- cbind(Genes=rownames(data.counts), data.counts) +data.counts <- data.counts[kept_genes, ] +data.counts <- cbind(Genes = rownames(data.counts), data.counts) # Save filtered matrix write.table( data.counts, opt$output, sep = "\t", - quote = F, - col.names = T, - row.names = F -) \ No newline at end of file + quote = FALSE, + col.names = TRUE, + row.names = FALSE +) diff -r 5d2304b09f58 -r afe949d332b3 filter_genes.xml --- a/filter_genes.xml Mon Jun 24 18:07:05 2019 -0400 +++ b/filter_genes.xml Mon Oct 16 23:26:20 2023 +0000 @@ -1,7 +1,7 @@ - + which are detected in less that a given fraction of the libraries - r-optparse + r-optparse