Mercurial > repos > artbio > gsc_filter_genes

--- a/filter_genes.R	Mon Oct 16 23:26:20 2023 +0000
+++ b/filter_genes.R	Thu Nov 07 19:06:35 2024 +0000
@@ -4,83 +4,84 @@
 # Rscript filter_genes.R -f <input file> -o <output file>

 # load packages that are provided in the conda env
-options(show.error.messages = FALSE,
-  error = function() {
-    cat(geterrmessage(), file = stderr())
-    q("no", 1, FALSE)
-  }
+options(
+    show.error.messages = FALSE,
+    error = function() {
+        cat(geterrmessage(), file = stderr())
+        q("no", 1, FALSE)
+    }
 )
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 library(optparse)

 # Arguments
 option_list <- list(
-  make_option(
-    c("-f", "--input"),
-    default = NA,
-    type = "character",
-    help = "Input file that contains count values to filter"
-  ),
-  make_option(
-    c("-s", "--sep"),
-    default = "\t",
-    type = "character",
-    help = "File separator [default : '%default' ]"
-  ),
-  make_option(
-    c("-c", "--colnames"),
-    default = TRUE,
-    type = "logical",
-    help = "first line is a header [default : '%default' ]"
-  ),
-  make_option(
-    "--percentile_detection",
-    default = 0,
-    type = "numeric",
-    help = "Include genes with detected expression in at least \
+    make_option(
+        c("-f", "--input"),
+        default = NA,
+        type = "character",
+        help = "Input file that contains count values to filter"
+    ),
+    make_option(
+        c("-s", "--sep"),
+        default = "\t",
+        type = "character",
+        help = "File separator [default : '%default' ]"
+    ),
+    make_option(
+        c("-c", "--colnames"),
+        default = TRUE,
+        type = "logical",
+        help = "first line is a header [default : '%default' ]"
+    ),
+    make_option(
+        "--percentile_detection",
+        default = 0,
+        type = "numeric",
+        help = "Include genes with detected expression in at least \
     this fraction of cells [default : '%default' ]"
-  ),
-  make_option(
-    "--absolute_detection",
-    default = 0,
-    type = "numeric",
-    help = "Include genes with detected expression in at least \
+    ),
+    make_option(
+        "--absolute_detection",
+        default = 0,
+        type = "numeric",
+        help = "Include genes with detected expression in at least \
     this number of cells [default : '%default' ]"
-  ),
-  make_option(
-    c("-o", "--output"),
-    default = NA,
-    type = "character",
-    help = "Output name [default : '%default' ]"
-  )
+    ),
+    make_option(
+        c("-o", "--output"),
+        default = NA,
+        type = "character",
+        help = "Output name [default : '%default' ]"
+    )
 )

 opt <- parse_args(OptionParser(option_list = option_list),
-                  args = commandArgs(trailingOnly = TRUE))
+    args = commandArgs(trailingOnly = TRUE)
+)
 if (opt$sep == "tab") {
-  opt$sep <- "\t"
+    opt$sep <- "\t"
 }
 if (opt$sep == "comma") {
-  opt$sep <- ","
+    opt$sep <- ","
 }

 # Open files
 data.counts <- read.delim(
-  opt$input,
-  h = opt$colnames,
-  row.names = 1,
-  sep = opt$sep,
-  check.names = FALSE
+    opt$input,
+    h = opt$colnames,
+    row.names = 1,
+    sep = opt$sep,
+    check.names = FALSE
 )

 # note the [if else] below, to handle percentile_detection=absolute_detection=0
 # Search for genes that are expressed in a certain percent of cells
 if (opt$percentile_detection > 0) {
-  kept_genes <- rowSums(data.counts != 0) >= (opt$percentile_detection * ncol(data.counts))
+    kept_genes <- rowSums(data.counts != 0) >= (opt$percentile_detection * ncol(data.counts))
 } else {
-
-  # Search for genes that are expressed in more than an absolute number of cells
-  kept_genes <- rowSums(data.counts != 0) >= (opt$absolute_detection)
+    # Search for genes that are expressed in more than an absolute number of cells
+    kept_genes <- rowSums(data.counts != 0) >= (opt$absolute_detection)
 }

 # Filter matrix
@@ -89,10 +90,10 @@

 # Save filtered matrix
 write.table(
-  data.counts,
-  opt$output,
-  sep = "\t",
-  quote = FALSE,
-  col.names = TRUE,
-  row.names = FALSE
+    data.counts,
+    opt$output,
+    sep = "\t",
+    quote = FALSE,
+    col.names = TRUE,
+    row.names = FALSE
 )
--- a/filter_genes.xml	Mon Oct 16 23:26:20 2023 +0000
+++ b/filter_genes.xml	Thu Nov 07 19:06:35 2024 +0000
@@ -1,5 +1,8 @@
-<tool id="filter_genes" name="Filter genes in single cell data" version="4.3.1+galaxy0" profile="21.01">
+<tool id="filter_genes" name="Filter genes in single cell data" version="4.3.1+galaxy1" profile="21.01">
     <description>which are detected in less that a given fraction of the libraries</description>
+    <xrefs>
+        <xref type="bio.tools">galaxy_single_cell_suite</xref>
+    </xrefs>
     <requirements>
         <requirement type="package" version="1.7.3">r-optparse</requirement>
     </requirements>