Mercurial > repos > artbio > gsc_filter_genes
view filter_genes.R @ 0:f689c4ea8c43 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_filter_genes commit 09dcd74dbc01f448518cf3db3e646afb0675a6fe
author | artbio |
---|---|
date | Mon, 24 Jun 2019 13:38:10 -0400 |
parents | |
children | 5d2304b09f58 |
line wrap: on
line source
# ######################## # filter genes # # ######################## # Filter out low expressed genes # Example of command (used for generate output file) : # Rscript filter_genes.R -f <input file> -o <output file> # load packages that are provided in the conda env options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") library(optparse) # Arguments option_list = list( make_option( c("-f", "--input"), default = NA, type = 'character', help = "Input file that contains count values to filter" ), make_option( c("-s", "--sep"), default = '\t', type = 'character', help = "File separator [default : '%default' ]" ), make_option( c("-c", "--colnames"), default = TRUE, type = 'logical', help = "first line is a header [default : '%default' ]" ), make_option( "--percentile_detection", default = 0, type = 'numeric', help = "Include genes with detected expression in at least \ this fraction of cells [default : '%default' ]" ), make_option( "--absolute_detection", default = 0, type = 'numeric', help = "Include genes with detected expression in at least \ this number of cells [default : '%default' ]" ), make_option( c("-o", "--output"), default = NA, type = 'character', help = "Output name [default : '%default' ]" ) ) opt = parse_args(OptionParser(option_list = option_list), args = commandArgs(trailingOnly = TRUE)) if (opt$sep == "tab") {opt$sep = "\t"} if (opt$sep == "comma") {opt$sep = ","} # Open files data.counts <- read.table( opt$input, h = opt$colnames, row.names = 1, sep = opt$sep, check.names = F ) # note the [if else] below, to handle percentile_detection=absolute_detection=0 # Search for genes that are expressed in a certain percent of cells if (opt$percentile_detection > 0) { kept_genes <- rowSums(data.counts != 0) >= (opt$percentile_detection * ncol(data.counts)) } else { # Search for genes that are expressed in more than an absolute number of cells kept_genes <- rowSums(data.counts != 0) >= (opt$absolute_detection) } # Filter matrix data.counts <- data.counts[kept_genes,] # Save filtered matrix write.table( data.counts, opt$output, sep = "\t", quote = F, col.names = T, row.names = T )