Repository 'ribowaltz_plot'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/ribowaltz_plot

Changeset 0:8e903cb3f919 (2022-09-22)
Next changeset 1:e25d81465c23 (2023-10-23)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ribowaltz commit ff002df702f544829d1b500ac4b517c1e70ad14d
added:
macros.xml
ribowaltz.R
ribowaltz_plot.R
ribowaltz_plot.xml
test-data/rep1.bam
test-data/rep1.rdata
test-data/rep1_annot.gtf.gz
b
diff -r 000000000000 -r 8e903cb3f919 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Sep 22 20:30:20 2022 +0000
b
@@ -0,0 +1,28 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.2.0">ribowaltz</requirement>
+            <requirement type="package" version="1.20.3">r-getopt</requirement>
+        </requirements>
+    </xml>
+    <token name="@TOOL_VERSION@">1.2.0</token>
+    <token name="@PROFILE@">21.05</token>
+    <xml name="edam_ontology">
+        <edam_topics>                                                                                  
+            <edam_topic>topic_4027</edam_topic>
+        </edam_topics>
+        <edam_operations>
+            <edam_operation>operation_0439</edam_operation>
+        </edam_operations>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1006169</citation>
+        </citations>
+    </xml>
+    <xml name="xrefs">
+        <xrefs>
+          <xref type='bio.tools'>riboWaltz</xref>
+        </xrefs>
+      </xml>
+</macros>
b
diff -r 000000000000 -r 8e903cb3f919 ribowaltz.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ribowaltz.R Thu Sep 22 20:30:20 2022 +0000
[
@@ -0,0 +1,131 @@
+# setup R error handling to go to stderr
+options(show.error.messages = FALSE, error = function() {
+  cat(geterrmessage(), file = stderr())
+  q("no", 1, FALSE)
+})
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+library("getopt")
+options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
+args <- commandArgs(trailingOnly = TRUE)
+
+# get options, using the spec as defined by the enclosed list.
+# we read the options from the default: commandArgs(TRUE).
+spec <- matrix(c(
+  "quiet", "q", 0, "logical",
+  "help", "h", 0, "logical",
+  "bamdir", "b", 1, "character",
+  "gtffile", "g", 1, "character",
+  "codon_coverage_info", "Y", 1, "character",
+  "cds_coverage_info", "Z", 1, "character",
+  "psite_info_rdata", "O", 0, "character",
+  "refseq_sep", "s", 0, "character",
+  "params_duplicate_filterting", "d", 0, "character",
+  "params_peridiocity_filterting", "l", 0, "character",
+  "params_custom_filterting", "c", 0, "character",
+  "params_psite_additional", "p", 0, "character",
+  "params_coverage_additional", "C", 0, "character"
+), byrow = TRUE, ncol = 4)
+opt <- getopt(spec)
+
+# if help was asked for print a friendly message
+# and exit with a non-zero error code
+if (!is.null(opt$help)) {
+  cat(getopt(spec, usage = TRUE))
+  q(status = 1)
+}
+
+verbose <- is.null(opt$quiet)
+
+library("riboWaltz")
+
+# create annotation data table
+annotation_dt <- create_annotation(opt$gtffile)
+
+sep <- opt$refseq_sep
+if (opt$refseq_sep == "") {
+  sep <- NULL
+}
+# convert alignments in BAM files into list of data tables
+reads_list <- bamtolist(bamfolder = opt$bamdir, annotation = annotation_dt, refseq_sep = sep)
+
+library("jsonlite")
+# remove duplicate reads
+if (!is.null(opt$params_duplicate_filterting)) {
+  json_duplicate_filterting <- fromJSON(opt$params_duplicate_filterting)
+  reads_list <- duplicates_filter(
+    data = reads_list,
+    extremity = json_duplicate_filterting$extremity,
+    keep = json_duplicate_filterting$keep
+  )
+}
+
+# selection of read lengths - periodicity filtering
+if (!is.null(opt$params_peridiocity_filterting)) {
+    json_peridiocity_filterting <- fromJSON(opt$params_peridiocity_filterting)
+    reads_list <- length_filter(
+      data = reads_list,
+      length_filter_mode = "periodicity",
+      periodicity_threshold = json_peridiocity_filterting$periodicity_threshold
+    )
+}
+# selection of read lengths - length range filtering
+if (!is.null(opt$params_custom_filterting)) {
+    json_custom_filterting <- fromJSON(opt$params_custom_filterting)
+    reads_list <- length_filter(
+      data = reads_list,
+      length_filter_mode = "custom",
+      length_range = json_custom_filterting$length_range
+    )
+}
+
+# compute P-site offset
+json_psite_additional <- fromJSON(opt$params_psite_additional)
+psite_offset <- psite(
+  reads_list,
+  start = json_psite_additional$use_start,
+  flanking = json_psite_additional$flanking,
+  extremity = json_psite_additional$psite_extrimity,
+  plot = TRUE,
+  cl = json_psite_additional$cl,
+  plot_format = "pdf",
+  plot_dir = "plots"
+)
+psite_offset
+
+reads_psite_list <- psite_info(reads_list, psite_offset)
+reads_psite_list
+# write a separate P-site offset info table for each sample
+for (sample in names(reads_psite_list)) {
+  write.table(
+    reads_psite_list[[sample]],
+    file = paste(sample, "psite_info.tsv",  sep = "_"),
+    sep = "\t", row.names = FALSE, quote = FALSE
+  )
+  print(paste(sample, "psite_info.tsv",  sep = "_"))
+}
+
+# write R object to a file
+if (!is.null(opt$psite_info_rdata)) {
+  save(reads_psite_list, annotation_dt, file = opt$psite_info_rdata)
+}
+
+json_coverage_additional <- fromJSON(opt$params_coverage_additional)
+# codon coverage
+codon_coverage_out <- codon_coverage(
+  reads_psite_list,
+  annotation_dt,
+  psite = json_coverage_additional$psites_per_region,
+  min_overlap = json_coverage_additional$min_overlap
+)
+write.table(codon_coverage_out, file = opt$codon_coverage_info, sep = "\t", row.names = FALSE, quote = FALSE)
+
+# CDS coverage
+cds_coverage_out <- cds_coverage(
+  reads_psite_list,
+  annotation_dt,
+  start_nts = json_coverage_additional$start_nts,
+  stop_nts = json_coverage_additional$stop_nts
+)
+write.table(cds_coverage_out, file = opt$cds_coverage_info, sep = "\t", row.names = FALSE, quote = FALSE)
b
diff -r 000000000000 -r 8e903cb3f919 ribowaltz_plot.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ribowaltz_plot.R Thu Sep 22 20:30:20 2022 +0000
[
@@ -0,0 +1,152 @@
+# setup R error handling to go to stderr
+options(show.error.messages = FALSE, error = function() {
+  cat(geterrmessage(), file = stderr())
+  q("no", 1, FALSE)
+})
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+library("getopt")
+options(stringAsFactors = FALSE, useFancyQuotes = FALSE)
+args <- commandArgs(trailingOnly = TRUE)
+
+# get options, using the spec as defined by the enclosed list.
+# we read the options from the default: commandArgs(TRUE).
+spec <- matrix(c(
+  "quiet", "q", 0, "logical",
+  "help", "h", 0, "logical",
+  "input_rdata", "i", 1, "character",
+  "params_rlength_distr", "r", 0, "character",
+  "params_rends_heat", "e", 0, "character",
+  "region_psite_plot", "R", 0, "logical",
+  "params_trint_periodicity", "t", 0, "character",
+  "params_metaplots", "m", 0, "character",
+  "params_codon_usage_psite", "u", 0, "character"
+), byrow = TRUE, ncol = 4)
+opt <- getopt(spec)
+
+# if help was asked for print a friendly message
+# and exit with a non-zero error code
+if (!is.null(opt$help)) {
+  cat(getopt(spec, usage = TRUE))
+  q(status = 1)
+}
+
+verbose <- is.null(opt$quiet)
+
+library("riboWaltz")
+library("jsonlite")
+
+load(opt$input_rdata)
+
+if (!is.null(opt$params_rlength_distr)) {
+  pdf("read_lengths.pdf")
+  json_rlength_distr <- fromJSON(opt$params_rlength_distr)
+  length_dist <- rlength_distr(
+    reads_psite_list,
+    sample = names(reads_psite_list),
+    cl = json_rlength_distr$cl,
+    multisamples = json_rlength_distr$multisamples,
+    plot_style = json_rlength_distr$plot_style
+  )
+  print(length_dist)
+  dev.off()
+}
+
+if (!is.null(opt$params_rends_heat)) {
+  pdf("read_ends_heatmap.pdf", height = 5 * length(reads_psite_list), width = 15)
+  json_rends_heat <- fromJSON(opt$params_rends_heat)
+  for (sample_name in names(reads_psite_list)) {
+    ends_heatmap <- rends_heat(
+      reads_psite_list,
+      annotation_dt,
+      sample = sample_name,
+      cl = json_rends_heat$cl,
+      utr5l = json_rends_heat$utr5l,
+      cdsl = json_rends_heat$cdsl,
+      utr3l = json_rends_heat$utr3l
+    )
+    print(ends_heatmap[["plot"]])
+  }
+  dev.off()
+}
+
+if (!is.null(opt$region_psite_plot)) {
+  pdf("psites_per_region.pdf", height = 12, width = 7 * length(reads_psite_list))
+  psite_region <- region_psite(reads_psite_list, annotation_dt, sample = names(reads_psite_list))
+  print(psite_region[["plot"]])
+  dev.off()
+}
+
+if (!is.null(opt$params_trint_periodicity)) {
+  pdf("trinucleotide_periodicity.pdf", height = 6 * length(reads_psite_list), width = 10)
+  json_trint_periodicity <- fromJSON(opt$params_trint_periodicity)
+  frames_stratified <- frame_psite_length(
+    reads_psite_list,
+    sample = names(reads_psite_list),
+    cl = json_trint_periodicity$cl,
+    region = json_trint_periodicity$region,
+    length_range = json_trint_periodicity$length_range
+  )
+  frames_stratified[["plot"]]
+  frames <- frame_psite_length(
+    reads_psite_list,
+    sample = names(reads_psite_list),
+    region = json_trint_periodicity$region,
+    length_range = json_trint_periodicity$length_range
+  )
+  print(frames[["plot"]])
+  dev.off()
+}
+
+if (!is.null(opt$params_metaplots)) {
+  pdf("metaplots.pdf", height = 5 * length(reads_psite_list), width = 24)
+  json_metaplots <- fromJSON(opt$params_metaplots)
+  metaprofile <- metaprofile_psite(
+    reads_psite_list,
+    annotation_dt,
+    sample = names(reads_psite_list),
+    multisamples = json_metaplots$multisamples,
+    plot_style = json_metaplots$plot_style,
+    length_range = json_metaplots$length_range,
+    frequency = json_metaplots$frequency,
+    utr5l = json_metaplots$utr5l,
+    cdsl = json_metaplots$cdsl,
+    utr3l = json_metaplots$utr3l,
+    plot_title = "sample.transcript.length_range"
+  )
+  print(metaprofile)
+  sample_list <- list()
+  for (sample_name in names(reads_psite_list)) {
+  sample_list[[sample_name]] <- c(sample_name)
+  }
+  metaheatmap <- metaheatmap_psite(
+    reads_psite_list,
+    annotation_dt,
+    sample = sample_list,
+    length_range = json_metaplots$length_range,
+    utr5l = json_metaplots$utr5l,
+    cdsl = json_metaplots$cdsl,
+    utr3l = json_metaplots$utr3l,
+    plot_title = "Comparison metaheatmap"
+  )
+  print(metaheatmap[["plot"]])
+  dev.off()
+}
+
+if (!is.null(opt$params_codon_usage_psite)) {
+  pdf("codon_usage.pdf", height = 6, width = 16)
+  json_codon_usage_psite <- fromJSON(opt$params_codon_usage_psite)
+  for (sample_name in names(reads_psite_list)) {
+  cu_barplot <- codon_usage_psite(
+    reads_psite_list,
+    annotation_dt,
+    sample = sample_name,
+    fastapath = json_codon_usage_psite$fastapath,
+    fasta_genome = FALSE,
+    frequency_normalization = json_codon_usage_psite$frequency
+  )
+  print(cu_barplot[["plot"]])
+  }
+  dev.off()
+}
b
diff -r 000000000000 -r 8e903cb3f919 ribowaltz_plot.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ribowaltz_plot.xml Thu Sep 22 20:30:20 2022 +0000
[
b'@@ -0,0 +1,253 @@\n+<tool id="ribowaltz_plot" name="riboWaltz-plot" version="@VERSION@" profile="@PROFILE@">\n+    <description>visual inspection of ribosome profiling data</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro=\'requirements\'/>\n+    <expand macro=\'edam_ontology\' />\n+    <expand macro=\'xrefs\'/>\n+    <command detect_errors="exit_code"><![CDATA[\n+        Rscript \'${__tool_directory__}/ribowaltz_plot.R\' -i \'$input_rdata\'\n+        #import json\n+        #if $rlength_distr.plot == \'yes\':\n+            #set params_rlength_distr = []\n+            #silent $params_rlength_distr.append(\n+                {"cl": int($rlength_distr.plot_options.cl), "multisamples": str($rlength_distr.plot_options.multisamples),\n+                "plot_style": str($rlength_distr.plot_options.plot_style)})\n+            --params_rlength_distr \'#echo json.dumps($params_rlength_distr)#\'\n+        #end if\n+        #if $rends_heat.plot == \'yes\':\n+            #set params_rends_heat = []\n+            #silent $params_rends_heat.append(\n+                {"cl": int($rends_heat.plot_options.cl), "utr5l": int($rends_heat.plot_options.utr5l),\n+                "cdsl": int($rends_heat.plot_options.cdsl), "utr3l": int($rends_heat.plot_options.utr3l)})\n+            --params_rends_heat \'#echo json.dumps($params_rends_heat)#\'\n+        #end if\n+        #if $region_psite:\n+            --region_psite_plot\n+        #end if\n+        #if $trint_periodicity.plot == \'yes\':\n+            #set length_range = \'all\' \n+            #if $trint_periodicity.plot_options.length_range.filter == \'range\':\n+                #$length_range = str($trint_periodicity.plot_options.length_range.length_range_min) + \':\' + str($trint_periodicity.plot_options.length_range.length_range_max)\n+            #end if\n+            #set params_trint_periodicity = []\n+            #silent $params_trint_periodicity.append(\n+                {"cl": int($trint_periodicity.plot_options.cl), "region": str($trint_periodicity.plot_options.region), \n+                "length_range": $length_range})\n+            --params_trint_periodicity \'#echo json.dumps($params_trint_periodicity)#\'\n+        #end if\n+        #if $metaplots.plot == \'yes\':\n+            #set length_range = \'all\' \n+            #if $metaplots.plot_options.length_range.filter == \'range\':\n+                #$length_range = str($metaplots.plot_options.length_range.length_range_min) + \':\' +  str($metaplots.plot_options.length_range.length_range_max)\n+            #end if\n+            #set params_metaplots = []\n+            #silent $params_metaplots.append(\n+                {"multisamples": str($metaplots.plot_options.multisamples), "plot_style": str($metaplots.plot_options.plot_style),\n+                "length_range": $length_range, "frequency": bool($metaplots.plot_options.frequency),\n+                "utr5l": int($metaplots.plot_options.utr5l), "cdsl": int($metaplots.plot_options.cdsl),\n+                "utr3l": int($metaplots.plot_options.utr3l)})\n+            --params_metaplots \'#echo json.dumps($params_metaplots)#\'\n+        #end if\n+        #if $codon_usage_psite.plot == \'yes\':\n+            #set params_codon_usage_psite = []\n+            #silent $params_codon_usage_psite.append(\n+                {"fastapath": str($codon_usage_psite.plot_options.fastapath), "frequency": bool($codon_usage_psite.plot_options.frequency),\n+                "label_scatter": bool($codon_usage_psite.plot_options.label_scatter),\n+                "label_number": int($codon_usage_psite.plot_options.label_number)})\n+            --params_codon_usage_psite \'#echo json.dumps($params_codon_usage_psite)#\'\n+        #end if\n+    ]]></command>\n+    <inputs>\n+        <param name="input_rdata" type="data" format="rdata" label="RDATA file generated by riboWaltz tool"/>\n+        <conditional name="rlength_distr">\n+            <param name="plot" type="select" label="Include read length distribution plots?">\n+                <option value="no">no</option>\n+ '..b'tart and stop codon"/>\n+                    <param name="utr3l" type="integer" value="25" min="0" label="3\' UTR region flanking the stop codon"/>\n+                </section>\n+            </when>\n+            <when value="no"/>\n+        </conditional>\n+        <conditional name="codon_usage_psite">\n+            <param name="plot" type="select" label="Inlcude plots with codon usage?">\n+                <option value="no">no</option>\n+                <option value="yes">yes</option>\n+            </param>\n+            <when value="yes">\n+                <section name="plot_options" title="Plot options">\n+                    <param name="fastapath" type="data" format="fasta" />\n+                    <param name="frequency" type="boolean" truevalue="1" falsevalue="0" checked="true"\n+                        label="normalize the 64 codon usage indexes for the corresponding codon frequencies in coding sequences?"/>\n+                    <param name="label_scatter" type="boolean" truevalue="1" falsevalue="0" checked="false"\n+                        label="Label the dots in the scatter plot?"\n+                        help=" This parameter is considered only if there exatcly two input samples"/>\n+                    <param name="label_number" type="integer" value="64" min="1" max="64" label="how many dots in the scatter plot should be labeled?"/>\n+                </section>\n+            </when>\n+            <when value="no"/>\n+        </conditional>\n+    </inputs>\n+    <outputs>\n+        <collection name="out_plots" type="list" label="riboWaltz plots on ${on_string}">\n+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\\.pdf" format="pdf" directory="." visible="false"/>\n+        </collection>\n+    </outputs>\n+    <tests>\n+        <test expect_num_outputs="1">\n+            <param name="input_rdata" value="rep1.rdata"/>\n+            <param name="region_psite" value="1"/>\n+            <conditional name="rlength_distr">\n+                <param name="plot" value="yes"/>\n+            </conditional>\n+            <conditional name="rends_heat">\n+                <param name="plot" value="yes"/>\n+            </conditional>\n+            <conditional name="trint_periodicity">\n+                <param name="plot" value="yes"/>\n+            </conditional>\n+            <conditional name="metaplots">\n+                <param name="plot" value="yes"/>\n+            </conditional>\n+            <output_collection name="out_plots" type="list">\n+                <element name="metaplots">\n+                    <assert_contents>\n+                        <has_size value="9180" delta="100"/>\n+                    </assert_contents>\n+                </element>\n+                <element name="psites_per_region">\n+                    <assert_contents>\n+                        <has_size value="5187" delta="100"/>\n+                    </assert_contents>\n+                </element>\n+                <element name="read_ends_heatmap">\n+                    <assert_contents>\n+                        <has_size value="26327" delta="500"/>\n+                    </assert_contents>\n+                </element>\n+                <element name="read_lengths">\n+                    <assert_contents>\n+                        <has_size value="4877" delta="100"/>\n+                    </assert_contents>\n+                </element>\n+                <element name="trinucleotide_periodicity">\n+                    <assert_contents>\n+                        <has_size value="7730" delta="100"/>\n+                    </assert_contents>\n+                </element>\n+            </output_collection>\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+Visual inspection of ribosome profiling data. More information can be found here: https://github.com/LabTranslationalArchitectomics/riboWaltz\n+\n+**Inputs**\n+\n+RDATA file generated by rioWaltz tool.\n+\n+**Outputs**\n+\n+Generates various plots to visualize P-site offsets, codon usage etc.\n+\n+    ]]></help>\n+    <expand macro="citations" />\n+</tool>\n'
b
diff -r 000000000000 -r 8e903cb3f919 test-data/rep1.bam
b
Binary file test-data/rep1.bam has changed
b
diff -r 000000000000 -r 8e903cb3f919 test-data/rep1.rdata
b
Binary file test-data/rep1.rdata has changed
b
diff -r 000000000000 -r 8e903cb3f919 test-data/rep1_annot.gtf.gz
b
Binary file test-data/rep1_annot.gtf.gz has changed