Repository 'sleuth'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/sleuth

Changeset 1:d3e447dd52c8 (2023-06-07)
Previous changeset 0:5f1cb4c28d73 (2023-06-01) Next changeset 2:d6b5fc94062c (2023-06-08)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sleuth commit 6fbf73689708cfbdf3d9d783af4988bad7137f93
modified:
macros.xml
sleuth.R
sleuth.xml
added:
test-data/design.tab
test-data/test02_density.pdf
test-data/test02_pca.pdf
b
diff -r 5f1cb4c28d73 -r d3e447dd52c8 macros.xml
--- a/macros.xml Thu Jun 01 07:56:00 2023 +0000
+++ b/macros.xml Wed Jun 07 11:47:30 2023 +0000
b
@@ -8,7 +8,7 @@
         </requirements>
     </xml>
     <token name="@TOOL_VERSION@">0.30.1</token>
-    <token name="@SUFFIX_VERSION@">0</token>
+    <token name="@SUFFIX_VERSION@">1</token>
     <token name="@PROFILE@">20.01</token>
     <xml name="citations">
         <citations>
b
diff -r 5f1cb4c28d73 -r d3e447dd52c8 sleuth.R
--- a/sleuth.R Thu Jun 01 07:56:00 2023 +0000
+++ b/sleuth.R Wed Jun 07 11:47:30 2023 +0000
[
@@ -26,39 +26,67 @@
 # Collect arguments from command line
 parser <- ArgumentParser(description = "Sleuth R script")
 
-parser$add_argument("--factorLevel", action = "append", required = TRUE)
+parser$add_argument("--factorLevel", action = "append", required = FALSE)
 parser$add_argument("--factorLevel_counts",
                     action = "append",
-                    required = TRUE)
-parser$add_argument("--factorLevel_n", action = "append",  required = TRUE)
-parser$add_argument("--cores",  type = "integer", required = TRUE)
+                    required = FALSE)
+parser$add_argument("--factorLevel_n", action = "append",  required = FALSE)
+parser$add_argument("--cores",  type = "integer", required = FALSE)
 parser$add_argument("--normalize", action = "store_true", required = FALSE)
-parser$add_argument("--nbins", type = "integer", required = TRUE)
-parser$add_argument("--lwr", type = "numeric", required = TRUE)
-parser$add_argument("--upr", type = "numeric", required = TRUE)
+parser$add_argument("--nbins", type = "integer", required = FALSE)
+parser$add_argument("--lwr", type = "numeric", required = FALSE)
+parser$add_argument("--upr", type = "numeric", required = FALSE)
+parser$add_argument("--metadata_file",
+                    action = "append",
+                    required = FALSE)
+parser$add_argument("--experiment_design", required = FALSE)
 
 args <- parser$parse_args()
 
-all_files <- args$factorLevel_counts
+if (args$experiment_design == "complex") {
+  ## Complex experiment design
+  ############################
 
-conditions <- c()
-for (x in seq_along(args$factorLevel)) {
-  temp <- append(conditions, rep(args$factorLevel[[x]]))
-  conditions <- temp
-}
+  s2c  <-
+    read.table(file = args$metadata_file,
+               header = TRUE,
+               sep = "\t")
+  paths <- c()
+  for (x in s2c$data_filename) {
+    paths <- c(paths, paste("./kallisto_outputs/",  x, sep = ""))
+  }
+  for (f in paths) {
+    file.rename(f, gsub(".fastq.*", "", f))
+        file.rename(f, paste(gsub(".fastq.*", "", f), ".h5", sep = ""))
+  }
+  s2c$path <- paste(gsub(".fastq.*", ".h5", paths), ".h5", sep = "")
+
+  so <- sleuth_prep(s2c, full_model = ~ condition, num_cores = 1)
+  so <- sleuth_fit(so)
 
-sample_names <- all_files %>%
-  str_replace(pattern = "\\.tab", "")
+} else {
+  ## Simple experiment design
+  ###########################
+
+  conditions <- c()
+  for (x in seq_along(args$factorLevel)) {
+    temp <- append(conditions, rep(args$factorLevel[[x]]))
+    conditions <- temp
+  }
 
-design <-
-  data.frame(list(
-    sample = sample_names,
-    condition = conditions,
-    path = all_files
-  ))
-so <- sleuth_prep(design,
-                  cores = args$cores,
-                  normalize = args$normalize)
+  sample_names <-
+    gsub(".fastq.+", "", basename(args$factorLevel_counts))
+
+  design <-
+    data.frame(list(
+      sample = sample_names,
+      condition = conditions,
+      path = args$factorLevel_counts
+    ))
+  so <- sleuth_prep(design,
+                    cores = args$cores,
+                    normalize = args$normalize)
+}
 
 so <- sleuth_fit(
   so,
b
diff -r 5f1cb4c28d73 -r d3e447dd52c8 sleuth.xml
--- a/sleuth.xml Thu Jun 01 07:56:00 2023 +0000
+++ b/sleuth.xml Wed Jun 07 11:47:30 2023 +0000
[
b'@@ -21,23 +21,38 @@\n     </stdio>\n     <version_command><![CDATA[echo $(R --version | grep version | grep -v GNU)", sleuth version" $(R --vanilla --slave -e "library(sleuth); cat(sessionInfo()\\$otherPkgs\\$sleuth\\$Version)" 2> /dev/null | grep -v -i "WARNING: ")]]></version_command>\n     <command><![CDATA[\n-        #set $factor_levels = list()\n+        #import os\n+        mkdir -p \'./kallisto_outputs\' &&\n         #set $cond_files = list()\n-        #set $cond_n_files = list()\n-        #for $level in $rep_factorLevel\n-            $factor_levels.append(str($level.factorLevel))\n-            $cond_n_files.append(len(str($level.countsFile).split(",")))\n-            #for $i, $count in enumerate(str($level.countsFile).split(","))\n-                #set $fname = str($level.factorLevel) + "_"  + str($i) + \'.h5\'\n-                ln -s \'${count}\' "${fname}" &&\n-                $cond_files.append($fname)\n+        #if $experiment_design.selector == "single"\n+            #set $factor_levels = list()\n+            #set $cond_n_files = list()\n+            #for $level in $experiment_design.rep_factorLevel\n+                $factor_levels.append(str($level.factorLevel))\n+                $cond_n_files.append(len(str($level.countsFile).split(",")))\n+                #for $i, $count in enumerate(str($level.countsFile).split(","))\n+                    #set $fname = str($level.factorLevel) + "_"  + str($i) + \'.h5\'\n+                    #set $output_path =  "/".join([\'./kallisto_outputs\',$fname])\n+                    ln -s \'${count}\' $output_path &&\n+                    $cond_files.append($output_path)\n+                #end for\n             #end for\n-        #end for\n+        #else\n+            #for $count in $experiment_design.countsFile\n+                #set $output_path =  "/".join([\'./kallisto_outputs\',$count.element_identifier])\n+                ln -s \'${count}\' $output_path &&\n+                $cond_files.append($output_path)\n+            #end for\n+        #end if\n         Rscript \'${__tool_directory__}/sleuth.R\'\n-            #for $i, $factor in enumerate($factor_levels)\n-                --factorLevel $factor\n-                --factorLevel_n $cond_n_files[$i]\n-            #end for\n+            #if $experiment_design.selector == "single"\n+                #for $i, $factor in enumerate($factor_levels)\n+                    --factorLevel $factor\n+                    --factorLevel_n $cond_n_files[$i]\n+                #end for\n+            #else\n+                --metadata_file $experiment_design.metadata_file\n+            #end if\n             #for $file in $cond_files\n                 --factorLevel_counts $file\n             #end for\n@@ -46,17 +61,30 @@\n             --nbins $advanced_options.nbins\n             --lwr $advanced_options.lwr\n             --upr $advanced_options.upr\n+            --experiment_design $experiment_design.selector\n     ]]></command>\n     <inputs>\n-        <repeat name="rep_factorLevel" title="Factor level" min="2" default="2">\n-            <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level, typical values could be \'tumor\', \'normal\', \'treated\' or \'control\'"\n-                help="Only letters, numbers and underscores will be retained in this field">\n-                <sanitizer>\n-                    <valid initial="string.letters,string.digits"><add value="_" /></valid>\n-                </sanitizer>\n+        <conditional name="experiment_design">\n+            <param name="selector" type="select" label="Experiment design" help="If you have multiple experimental conditions, you should use propably the complex design mode. In the help section you can find more information.">\n+                <option value="single">Simple design mode (one experimental factor)</option>\n+                <option value="complex">Complex design mode (two experimental factors)</option>\n             </param>\n-            <param name="countsFile" type="data" format="h5" multiple="true" label="Counts file(s)"/>\n-        </repeat'..b'me="countsFile" type="data_collection" format="h5" multiple="true" label="Counts file(s)"/>\n+                <param argument="--metadata_file" type="data" format="txt" label="Input metadata file" help="You can find more details about it in the help section" />\n+            </when>\n+        </conditional>\n         <section name="advanced_options" title="Advanced options" expanded="true">\n             <param argument="normalization" type="boolean" truevalue="--normalize" falsevalue="" checked="true" label="Normalize data" \n                 help="If this is set to false, bootstraps will not be read and transformation of the data will not be done. This should \n@@ -68,6 +96,7 @@\n             <param argument="upr" type="float" min="0" max="1" value="0.75" label="UPR" help="The upper range of variances within each \n                 bin that should be included for the shrinkage procedure." />\n         </section>\n+\n     </inputs>\n     <outputs>\n         <data name="sleuth_table" from_work_dir="sleuth_table.tab" format="tabular" label="${tool.name} on ${on_string}: DE table">\n@@ -104,6 +133,35 @@\n             <output name="pca_plot" file="test01_pca.pdf" ftype="pdf" compare="sim_size"/>\n             <output name="density_plot" file="test01_density.pdf" ftype="pdf" compare="sim_size"/>\n         </test>\n+    <test expect_num_outputs="3">\n+        <conditional name="experiment_design">\n+            <param name="selector" value="complex"/>\n+            <param name="countsFile">\n+                <collection type="list">\n+                    <element name="kallisto_output_01.h5" ftype="h5" value="kallisto_output_01.h5"/>\n+                    <element name="kallisto_output_02.h5" ftype="h5" value="kallisto_output_02.h5"/>\n+                    <element name="kallisto_output_03.h5" ftype="h5" value="kallisto_output_03.h5"/>\n+                    <element name="kallisto_output_04.h5" ftype="h5" value="kallisto_output_04.h5"/>\n+                </collection>\n+            </param>\n+            <param name="metadata_file" value="design.tab"/>\n+        </conditional>\n+        <section name="advanced_options">\n+            <param name="normalization" value="true"/>\n+            <param name="nbins" value="100"/>\n+            <param name="lwr" value="0.25"/>\n+            <param name="upr" value="0.75"/>\n+        </section>\n+        <output name="sleuth_table" ftype="tabular">\n+            <assert_contents>\n+                <has_size value="756310" delta="100"/>\n+                <has_text text="ENST00000394894.8"/>\n+                <has_text text="ENST00000524187.1"/>\n+            </assert_contents>\n+        </output>\n+        <output name="pca_plot" file="test02_pca.pdf" ftype="pdf" compare="sim_size"/>\n+        <output name="density_plot" file="test02_density.pdf" ftype="pdf" compare="sim_size"/>\n+    </test>\n     </tests>\n     <help><![CDATA[\n \n@@ -123,6 +181,24 @@\n These can serve as proxies for technical replicates, allowing for an ascertainment of the variability in estimates due to the random \n processes underlying RNA-Seq as well as the statistical procedure of read assignment. \n \n+.. class:: infomark\n+\n+**Experimental design tabular input for complex experimental designs**\n+\n+The experimental design input should have this format:\n+\n+        ::\n+        \n+            data_filename\tcondition\tsample\n+            finename_01.fastq.gz\tcondition1\treplicate1\n+            filename_02.fastq.gz\tcondition1\treplicate2\n+            filename_03.fastq.gz\tcondition2\treplicate1\n+            filename_04.fastq.gz\tcondition2\treplicate2\n+\n+\n+The tabular file **requires to have the same column names** as the example (data_file, condition, sample). The data file column correspond to original FASTQ filenames uploaded to Galaxy. \n+Condition includes the information about the first factor, and sample includes information about the second factor. **Only alphanumeric characters, undescores and dots are allowed**.\n+\n     ]]></help>\n     <expand macro="citations" />\n </tool>\n'
b
diff -r 5f1cb4c28d73 -r d3e447dd52c8 test-data/design.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/design.tab Wed Jun 07 11:47:30 2023 +0000
b
@@ -0,0 +1,5 @@
+sample condition data_filename
+CAM_01 CAM kallisto_output_01.h5
+CAM_02 CAM kallisto_output_02.h5
+reC3_01 reC3 kallisto_output_03.h5
+reC3_02 reC3 kallisto_output_04.h5
b
diff -r 5f1cb4c28d73 -r d3e447dd52c8 test-data/test02_density.pdf
b
Binary file test-data/test02_density.pdf has changed
b
diff -r 5f1cb4c28d73 -r d3e447dd52c8 test-data/test02_pca.pdf
b
Binary file test-data/test02_pca.pdf has changed