comparison filter_genes.R @ 2:afe949d332b3 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_filter_genes commit b184054ad6d4230ab0a714c13f9ef32449faa327
author artbio
date Mon, 16 Oct 2023 23:26:20 +0000
parents 5d2304b09f58
children
comparison
equal deleted inserted replaced
1:5d2304b09f58 2:afe949d332b3
1 # ########################
2 # filter genes #
3 # ########################
4
5 # Filter out low expressed genes 1 # Filter out low expressed genes
6 2
7 # Example of command (used for generate output file) : 3 # Example of command (used for generate output file) :
8 # Rscript filter_genes.R -f <input file> -o <output file> 4 # Rscript filter_genes.R -f <input file> -o <output file>
9 5
10 # load packages that are provided in the conda env 6 # load packages that are provided in the conda env
11 options( show.error.messages=F, 7 options(show.error.messages = FALSE,
12 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) 8 error = function() {
9 cat(geterrmessage(), file = stderr())
10 q("no", 1, FALSE)
11 }
12 )
13 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 13 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
14 library(optparse) 14 library(optparse)
15 15
16 # Arguments 16 # Arguments
17 option_list = list( 17 option_list <- list(
18 make_option( 18 make_option(
19 c("-f", "--input"), 19 c("-f", "--input"),
20 default = NA, 20 default = NA,
21 type = 'character', 21 type = "character",
22 help = "Input file that contains count values to filter" 22 help = "Input file that contains count values to filter"
23 ), 23 ),
24 make_option( 24 make_option(
25 c("-s", "--sep"), 25 c("-s", "--sep"),
26 default = '\t', 26 default = "\t",
27 type = 'character', 27 type = "character",
28 help = "File separator [default : '%default' ]" 28 help = "File separator [default : '%default' ]"
29 ), 29 ),
30 make_option( 30 make_option(
31 c("-c", "--colnames"), 31 c("-c", "--colnames"),
32 default = TRUE, 32 default = TRUE,
33 type = 'logical', 33 type = "logical",
34 help = "first line is a header [default : '%default' ]" 34 help = "first line is a header [default : '%default' ]"
35 ), 35 ),
36 make_option( 36 make_option(
37 "--percentile_detection", 37 "--percentile_detection",
38 default = 0, 38 default = 0,
39 type = 'numeric', 39 type = "numeric",
40 help = "Include genes with detected expression in at least \ 40 help = "Include genes with detected expression in at least \
41 this fraction of cells [default : '%default' ]" 41 this fraction of cells [default : '%default' ]"
42 ), 42 ),
43 make_option( 43 make_option(
44 "--absolute_detection", 44 "--absolute_detection",
45 default = 0, 45 default = 0,
46 type = 'numeric', 46 type = "numeric",
47 help = "Include genes with detected expression in at least \ 47 help = "Include genes with detected expression in at least \
48 this number of cells [default : '%default' ]" 48 this number of cells [default : '%default' ]"
49 ), 49 ),
50 make_option( 50 make_option(
51 c("-o", "--output"), 51 c("-o", "--output"),
52 default = NA, 52 default = NA,
53 type = 'character', 53 type = "character",
54 help = "Output name [default : '%default' ]" 54 help = "Output name [default : '%default' ]"
55 ) 55 )
56 ) 56 )
57 57
58 opt = parse_args(OptionParser(option_list = option_list), 58 opt <- parse_args(OptionParser(option_list = option_list),
59 args = commandArgs(trailingOnly = TRUE)) 59 args = commandArgs(trailingOnly = TRUE))
60 if (opt$sep == "tab") {opt$sep = "\t"} 60 if (opt$sep == "tab") {
61 if (opt$sep == "comma") {opt$sep = ","} 61 opt$sep <- "\t"
62 }
63 if (opt$sep == "comma") {
64 opt$sep <- ","
65 }
62 66
63 # Open files 67 # Open files
64 data.counts <- read.table( 68 data.counts <- read.delim(
65 opt$input, 69 opt$input,
66 h = opt$colnames, 70 h = opt$colnames,
67 row.names = 1, 71 row.names = 1,
68 sep = opt$sep, 72 sep = opt$sep,
69 check.names = F 73 check.names = FALSE
70 ) 74 )
71 75
72 # note the [if else] below, to handle percentile_detection=absolute_detection=0 76 # note the [if else] below, to handle percentile_detection=absolute_detection=0
73 # Search for genes that are expressed in a certain percent of cells 77 # Search for genes that are expressed in a certain percent of cells
74 if (opt$percentile_detection > 0) { 78 if (opt$percentile_detection > 0) {
75 kept_genes <- rowSums(data.counts != 0) >= (opt$percentile_detection * ncol(data.counts)) 79 kept_genes <- rowSums(data.counts != 0) >= (opt$percentile_detection * ncol(data.counts))
76 } else { 80 } else {
77 81
78 # Search for genes that are expressed in more than an absolute number of cells 82 # Search for genes that are expressed in more than an absolute number of cells
79 kept_genes <- rowSums(data.counts != 0) >= (opt$absolute_detection) 83 kept_genes <- rowSums(data.counts != 0) >= (opt$absolute_detection)
80 } 84 }
81 85
82 # Filter matrix 86 # Filter matrix
83 data.counts <- data.counts[kept_genes,] 87 data.counts <- data.counts[kept_genes, ]
84 data.counts <- cbind(Genes=rownames(data.counts), data.counts) 88 data.counts <- cbind(Genes = rownames(data.counts), data.counts)
85 89
86 # Save filtered matrix 90 # Save filtered matrix
87 write.table( 91 write.table(
88 data.counts, 92 data.counts,
89 opt$output, 93 opt$output,
90 sep = "\t", 94 sep = "\t",
91 quote = F, 95 quote = FALSE,
92 col.names = T, 96 col.names = TRUE,
93 row.names = F 97 row.names = FALSE
94 ) 98 )