Repository 'plant_tribes_ks_distribution'
hg clone https://toolshed.g2.bx.psu.edu/repos/greg/plant_tribes_ks_distribution

Changeset 0:c5846258c458 (2017-06-08)
Next changeset 1:56f42cc1dd58 (2017-06-28)
Commit message:
Uploaded
added:
.shed.yml
components.tabular
kaks_input1.tabular
ks_distribution.R
ks_distribution.xml
macros.xml
output.pdf
b
diff -r 000000000000 -r c5846258c458 .shed.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml Thu Jun 08 12:55:49 2017 -0400
b
@@ -0,0 +1,11 @@
+name: plant_tribes_ks_distribution
+owner: greg
+description: |
+  Contains a tool that plots the distribution of synonymous substitution (Ks) rates and fits significant component(s).
+homepage_url: https://github.com/dePamphilis/PlantTribes
+long_description: |
+  Contains a tool that plots the distribution of synonymous substitution (Ks) rates and fits significant component(s).
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/plant_tribes/ks_distribution
+type: unrestricted
+categories:
+- Phylogenetics
b
diff -r 000000000000 -r c5846258c458 components.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components.tabular Thu Jun 08 12:55:49 2017 -0400
b
@@ -0,0 +1,2 @@
+species n number_comp lnL AIC BIC mean variance porportion
+species1 3 1 -3.4750 6.95 6.95 3.1183 5.7732 1.00
b
diff -r 000000000000 -r c5846258c458 kaks_input1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/kaks_input1.tabular Thu Jun 08 12:55:49 2017 -0400
b
@@ -0,0 +1,4 @@
+SEQ1 SEQ2 Ka Ks Ka\Ks
+contig_241; contig_241 1.5312 7.1619 0.2138
+contig_300 contig_300; 0.8653 3.7872 0.2285
+contig_586 contig_586; 1.7791 1.1181 1.5912
b
diff -r 000000000000 -r c5846258c458 ks_distribution.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ks_distribution.R Thu Jun 08 12:55:49 2017 -0400
[
@@ -0,0 +1,163 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("optparse"))
+
+option_list <- list(
+    make_option(c("-c", "--components_input"), action="store", dest="components_input", help="Ks significant components input dataset"),
+    make_option(c("-k", "--kaks_input"), action="store", dest="kaks_input", help="KaKs analysis input dataset"),
+    make_option(c("-o", "--output"), action="store", dest="output", help="Output dataset")
+)
+
+parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
+args <- parse_args(parser, positional_arguments=TRUE)
+opt <- args$options
+
+
+get_num_components = function(components_data)
+{
+    # Get the max of the number_comp column.
+    number_comp = components_data[, 3]
+    num_components <- max(number_comp, na.rm=TRUE)
+    return(num_components)
+}
+
+get_pi_mu_var = function(components_data, num_components)
+{
+    # FixMe: enhance this to generically handle any integer value for num_components.
+    if (num_components == 1)
+    {
+        pi <- c(components_data[1, 9])
+        mu <- c(components_data[1, 7])
+        var <- c(components_data[1, 8])
+    }
+    else if (num_components == 2)
+    {
+        pi <- c(components_data[2, 9], components_data[3, 9])
+        mu <- c(components_data[2, 7], components_data[3, 7])
+        var <- c(components_data[2, 8], components_data[3, 8])
+    }
+    else if (num_components == 3)
+    {
+      pi <- c(components_data[4, 9], components_data[5, 9], components_data[6, 9])
+      mu <- c(components_data[4, 7], components_data[5, 7], components_data[6, 7])
+      var <- c(components_data[4, 8], components_data[5, 8], components_data[6, 8])
+    }
+    else if (num_components == 4)
+    {
+        pi <- c(components_data[7, 9], components_data[8, 9], components_data[9, 9], components_data[10, 9])
+        mu <- c(components_data[7, 7], components_data[8, 7], components_data[9, 7], components_data[10, 7])
+        var <- c(components_data[7, 8], components_data[8, 8], components_data[9, 8], components_data[10, 8])
+    }
+    else if (num_components == 5)
+    {
+        pi <- c(components_data[11, 9], components_data[12, 9], components_data[13, 9], components_data[14, 9], components_data[15, 9])
+        mu <- c(components_data[11, 7], components_data[12, 7], components_data[13, 7], components_data[14, 7], components_data[15, 7])
+        var <- c(components_data[11, 8], components_data[12, 8], components_data[13, 8], components_data[14, 8], components_data[15, 8])
+    }
+    else if (num_components == 6)
+    {
+        pi <- c(components_data[16, 9], components_data[17, 9], components_data[18, 9], components_data[19, 9], components_data[20, 9], components_data[21, 9])
+        mu <- c(components_data[16, 7], components_data[17, 7], components_data[18, 7], components_data[19, 7], components_data[20, 7], components_data[21, 7])
+        var <- c(components_data[16, 8], components_data[17, 8], components_data[18, 8], components_data[19, 8], components_data[20, 8], components_data[21, 8])
+    }
+    results = c(pi, mu, var)
+    return(results)
+}
+
+plot_ks<-function(kaks_input, output, pi, mu, var)
+{
+    # Start PDF device driver to save charts to output.
+    pdf(file=output, bg="white")
+    kaks <- read.table(file=kaks_input, header=T)
+    max_ks <- max(kaks$Ks, na.rm=TRUE)
+    # Change bin width
+    max_bin_range <- as.integer(max_ks / 0.05)
+    bin <- 0.05 * seq(0, (max_bin_range + 1 ))
+    kaks <- kaks[kaks$Ks<max_ks,]
+    h.kst <- hist(kaks$Ks, breaks=bin, plot=F)
+    nc <- h.kst$counts
+    vx <- h.kst$mids
+    ntot <- sum(nc)
+    # Set margin for plot bottom, left top, right.
+    par(mai=c(0.5, 0.5, 0, 0))
+    # Plot dimension in inches.
+    par(pin=c(3.0, 3.0))
+    g <- calculate_fitted_density(pi, mu, var, max_ks)
+    h <- ntot * 1.5 / sum(g)
+    vx <- seq(1, 100) * (max_ks / 100)
+    ymax <- max(nc)
+    barplot(nc, space=0.25, offset=0, width=0.04, xlim=c(0, max_ks), ylim=c(0, ymax), col="lightpink1", border="lightpink3")
+    # Add x-axis.
+    axis(1)
+    color <- c('red', 'yellow','green','black','blue', 'darkorange' )
+    for (i in 1:length(mu))
+    {
+       lines(vx, g[,i] * h, lwd=2, col=color[i])
+    }
+}
+
+calculate_fitted_density <- function(pi, mu, var, max_ks)
+{
+    comp <- length(pi)
+    var <- var/mu^2
+    mu <- log(mu)
+    # Calculate lognormal density.
+    vx <- seq(1, 100) * (max_ks / 100)
+    fx <- matrix(0, 100, comp)
+    for (i in 1:100)
+    {
+        for (j in 1:comp)
+        {
+           fx[i, j] <- pi[j] * dlnorm(vx[i], meanlog=mu[j], sdlog=(sqrt(var[j])))
+           if (is.nan(fx[i,j])) fx[i,j]<-0
+        }
+     }
+    return(fx)
+}
+
+# Read in the components data.
+components_data <- read.delim(opt$components_input, header=TRUE)
+# Get the number of components.
+num_components <- get_num_components(components_data)
+
+# Set pi, mu, var.
+items <- get_pi_mu_var(components_data, num_components)
+if (num_components == 1)
+{
+ pi <- items[1]
+ mu <- items[2]
+ var <- items[3]
+}
+if (num_components == 2)
+{
+ pi <- items[1:2]
+ mu <- items[3:4]
+ var <- items[5:6]
+}
+if (num_components == 3)
+{
+ pi <- items[1:3]
+ mu <- items[4:6]
+ var <- items[7:9]
+}
+if (num_components == 4)
+{
+ pi <- items[1:4]
+ mu <- items[5:8]
+ var <- items[9:12]
+}
+if (num_components == 5)
+{
+ pi <- items[1:5]
+ mu <- items[6:10]
+ var <- items[11:15]
+}
+if (num_components == 6)
+{
+ pi <- items[1:6]
+ mu <- items[7:12]
+ var <- items[13:18]
+}
+
+# Plot the output.
+plot_ks(opt$kaks_input, opt$output, pi, mu, var)
b
diff -r 000000000000 -r c5846258c458 ks_distribution.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ks_distribution.xml Thu Jun 08 12:55:49 2017 -0400
[
@@ -0,0 +1,105 @@
+<tool id="ks_distribution" name="KsDistribution" version="1.0.0">
+    <description>plots the distribution of synonymous substitution (Ks) rates and fits significant component(s)</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements_ks_distribution" />
+    <command detect_errors="exit_code"><![CDATA[
+Rscript $__tool_directory__/ks_distribution.R
+-k '$input'
+-c '$components'
+-o '$output'
+    ]]></command>
+    <inputs>
+        <param name="input" format="tabular" type="data" label="KaKsAnalysis tabular file" />
+        <param name="components" format="tabular" type="data" label="Significant components" />
+    </inputs>
+    <outputs>
+        <data name="output" format="pdf"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="kaks_input1.tabular" ftype="tabular" />
+            <param name="components" value="components.tabular" ftype="tabular" />
+            <output name="output" file="output.pdf" ftype="pdf" compare="contains" />
+        </test>
+    </tests>
+    <help>
+**What it does**

+This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary analyses of
+genome-scale gene families and transcriptomes.  This tool uses the analysis results produced by the KaKsAnalysis tool to plot the
+distribution of synonymous substitution (Ks) rates and fit the estimated significant normal mixtures component(s) onto the distribution.
+
+-----
+
+**Options**
+
+ * **Synonymous substitution rates** - estimated synonymous substitution (Ks) rates output file produced by the KaKsAnalysis tool selected from your history.
+ * **Synonymous components** - estimated significant component(s) output file produced by the KaKsAnalysis tool selected from your history.
+    </help>
+    <citations>
+        <expand macro="citation1" />
+        <citation type="bibtex">
+            @article{Wall2008,
+            journal = {Nucleic Acids Research},
+            author = {2. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS},
+            title = {PlantTribes: a gene and gene family resource for comparative genomics in plants},
+            year = {2008},
+            volume = {36},
+            number = {suppl 1},
+            pages = {D970-D976},}
+        </citation>
+        <citation type="bibtex">
+            @article{Altschul1990,
+            journal = {Journal of molecular biology}
+            author = {3. Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ},
+            title = {Basic local alignment search tool},
+            year = {1990},
+            volume = {215},
+            number = {3},
+            pages = {403-410},}
+        </citation>
+        <citation type="bibtex">
+            @article{Katoh2013,
+            journal = {Molecular biology and evolution},
+            author = {4. Katoh K, Standley DM},
+            title = {MAFFT multiple sequence alignment software version 7: improvements in performance and usability},
+            year = {2013},
+            volume = {30},
+            number = {4},
+            pages = {772-780},}
+        </citation>
+        <citation type="bibtex">
+            @article{Yang2007,
+            journal = {Molecular biology and evolution},
+            author = {5. Yang Z},
+            title = {PAML 4: phylogenetic analysis by maximum likelihood},
+            year = {2007},
+            volume = {24},
+            number = {8},
+            pages = {1586-1591},}
+        </citation>
+        <citation type="bibtex">
+            @article{Cui2006,
+            journal = {Genome Research},
+            author = {6. Cui L, Wall PK, Leebens-Mack JH, Lindsay BG, Soltis DE, Doyle JJ, Soltis PS, Carlson JE, Arumuganathan K, Barakat A, Albert VA},
+            title = {Widespread genome duplications throughout the history of flowering plants},
+            year = {2006},
+            volume = {16},
+            number = {6},
+            pages = {738-749},}
+        </citation>
+        <citation type="bibtex">
+            @article{McLachlan1999,
+            journal = {Journal of Statistical Software},
+            author = {7. McLachlan GJ, Peel D, Basford KE, Adams P},
+            title = {The EMMIX software for the fitting of mixtures of normal and t-components},
+            year = {1999},
+            volume = {4},
+            number = {2},
+            pages = {1-14},}
+        </citation>
+    </citations>
+</tool>
+
b
diff -r 000000000000 -r c5846258c458 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Jun 08 12:55:49 2017 -0400
b
@@ -0,0 +1,130 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<macros>
+    <token name="@WRAPPER_VERSION@">1.0</token>
+    <xml name="requirements_assembly_post_processor">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_assembly_post_processor</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_aligner">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_aligner</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_classifier">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_classifier</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_integrator">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_integrator</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_kaks_analysis">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_kaks_analysis</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_ks_distribution">
+        <requirements>
+            <requirement type="package" version="1.3.0">r-optparse</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_phylogeny_builder">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_phylogeny_builder</requirement>
+        </requirements>
+    </xml>
+    <xml name="param_codon_alignments">
+        <param name="codon_alignments" type="select" label="Codon alignments">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no">No</option>
+        </param>
+    </xml>
+    <xml name="param_method">
+        <param name="method" type="select" label="Protein clustering method">
+            <option value="gfam" selected="true">GFam</option>
+            <option value="orthofinder">OrthoFinder</option>
+            <option value="orthomcl">OrthoMCL</option>
+        </param>
+    </xml>
+    <xml name="param_options_type">
+        <param name="options_type" type="select" label="Options Configuration">
+            <option value="basic" selected="true">Basic</option>
+            <option value="advanced">Advanced</option>
+        </param>
+    </xml>
+    <xml name="param_orthogroup_fna">
+        <param name="orthogroup_fna" type="select" label="Orthogroups coding sequences">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no">No</option>
+        </param>
+    </xml>
+    <xml name="param_scaffold">
+        <param name="scaffold" type="select" label="Gene family scaffold">
+            <options from_data_table="plant_tribes_scaffolds" />
+            <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." />
+        </param>
+    </xml>
+    <xml name="param_sequence_type">
+        <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference (dna)">
+            <option value="protein" selected="true">Amino acid based</option>
+            <option value="dna">Nucleotide based</option>
+        </param>
+    </xml>
+    <xml name="cond_alignment_method">
+        <conditional name="alignment_method_cond">
+            <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method">
+                <option value="mafft" selected="true">MAFFT</option>
+                <option value="pasta">PASTA</option>
+            </param>
+            <when value="mafft" />
+            <when value="pasta">
+                <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="cond_remove_gappy_sequences">
+        <conditional name="remove_gappy_sequences_cond">
+            <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration">
+                <option value="no" selected="true">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no" />
+            <when value="yes">
+                <conditional name="trim_type_cond">
+                    <param name="trim_type" type="select" label="Trimming method">
+                        <option value="gap_trimming" selected="true">Gap score based trimming</option>
+                        <option value="automated_trimming">Automated heuristic trimming</option>
+                    </param>
+                    <when value="gap_trimming">
+                        <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" />
+                    </when>
+                    <when value="automated_trimming" />
+                </conditional>
+                <conditional name="remove_sequences_with_gaps_cond">
+                    <param name="remove_sequences_with_gaps" type="select" label="Remove sequences">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no" />
+                    <when value="yes">
+                        <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" />
+                        <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" />
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="citation1">
+        <citation type="bibtex">
+            @misc{None,
+            journal = {None},
+            author = {1. Wafula EK},
+            title = {Manuscript in preparation},
+            year = {None},
+            url = {https://github.com/dePamphilis/PlantTribes},}
+        </citation>
+    </xml>
+</macros>
b
diff -r 000000000000 -r c5846258c458 output.pdf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/output.pdf Thu Jun 08 12:55:49 2017 -0400
[
@@ -0,0 +1,52 @@
+1 0 obj
+<<
+/Title (R Graphics Output)
+/Creator (R)
+>>
+endobj
+2 0 obj
+<< /Type /Catalog /Pages 3 0 R >>
+endobj
+7 0 obj
+<< /Type /Page /Parent 3 0 R /Contents 8 0 R /Resources 4 0 R >>
+endobj
+8 0 obj
+<<
+>>
+stream
+endobj
+3 0 obj
+<< /Type /Pages /Kids [ 7 0 R ] /Count 1 /MediaBox [0 0 504 504] >>
+endobj
+4 0 obj
+<<
+/ProcSet [/PDF /Text]
+/Font <</F2 10 0 R >>
+/ExtGState << >>
+/ColorSpace << /sRGB 5 0 R >>
+>>
+endobj
+5 0 obj
+[/ICCBased 6 0 R]
+endobj
+6 0 obj
+<< /Alternate /DeviceRGB /N 3 /Length 2596 /Filter /FlateDecode >>
+stream
+9 0 obj
+<<
+/Type /Encoding /BaseEncoding /WinAnsiEncoding
+/Differences [ 45/minus 96/quoteleft
+144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent
+/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space]
+>>
+endobj
+10 0 obj
+<< /Type /Font /Subtype /Type1 /Name /F2 /BaseFont /Helvetica
+/Encoding 9 0 R >>
+endobj
+xref
+0 11
+trailer
+<< /Size 11 /Info 1 0 R /Root 2 0 R >>
+startxref
+%%EOF