changeset 25:b00fef2b1c2c draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit eeb46055822c6805c209af0c450ae941100960bd
author artbio
date Wed, 06 Jul 2022 11:43:09 +0000
parents ca6c19ee7da0
children af5c65ad5317
files mutational_patterns.R mutational_patterns.xml test-data/cosmic_output1.pdf test-data/cosmic_output_v3.pdf test-data/denovo_1.RData test-data/denovo_output1.pdf test-data/sigmatrix.tab test-data/spectrum_output1.pdf test-data/user_output.pdf
diffstat 9 files changed, 118 insertions(+), 109 deletions(-) [+]
line wrap: on
line diff
--- a/mutational_patterns.R	Tue Jul 05 21:41:43 2022 +0000
+++ b/mutational_patterns.R	Wed Jul 06 11:43:09 2022 +0000
@@ -70,6 +70,12 @@
     type = "integer",
     help = "Number of new signatures to be captured"
   ),
+    make_option(
+    "--cosmic_id_threshold",
+    default = 0.85,
+    type = "double",
+    help = "minimu cosine similarity to rename a new signature according to cosmic v3.2"
+  ),
   make_option(
     "--output_spectrum",
     default = NA,
@@ -205,12 +211,14 @@
     nmf_res <- extract_signatures(pseudo_mut_mat, rank = opt$newsignum, nrun = opt$nrun)
     # Assign signature COSMICv3.2 names
     cosmic_signatures <- get_known_signatures()
-    nmf_res <- rename_nmf_signatures(nmf_res, cosmic_signatures, cutoff = 0.85)
+    nmf_res <- rename_nmf_signatures(nmf_res, cosmic_signatures, cutoff = opt$cosmic_id_threshold)
     sim_matrix <- cos_sim_matrix(cosmic_signatures, nmf_res$signatures)
     plot_cosine_sim <- plot_cosine_heatmap(sim_matrix)
     grid.arrange(plot_cosine_sim)
     # Plot the 96-profile of the signatures:
     p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE)
+    grid.arrange(p5)
+    # write matrix of deno signatures for user
     new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq")
     new_sig_matrix <- format(new_sig_matrix, scientific = TRUE)
     newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = TRUE),
@@ -218,7 +226,6 @@
                      gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE))
     new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]])
     write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t")
-    grid.arrange(p5)
     # Visualize the contribution of the signatures in a barplot
     pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "relative", coord_flip = TRUE)
     # Visualize the contribution of the signatures in absolute number of mutations
@@ -231,18 +238,15 @@
     # The samples can be hierarchically clustered based on their euclidean dis- tance. The signatures
     # can be plotted in a user-specified order.
     # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order:
-    pch1 <- plot_contribution_heatmap(nmf_res$contribution,
-                                      sig_order = paste0("NewSig_", 1:opt$newsignum))
+    pch1 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = TRUE)
     # Plot signature contribution as a heatmap without sample clustering:
     pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = FALSE)
     #Combine the plots into one figure:
     grid.arrange(pch1, pch2, ncol = 2, widths = c(2, 1.6))
 
     # Compare the reconstructed mutational profile with the original mutational profile:
-    plot_compare_profiles(pseudo_mut_mat[, 1],
-                          nmf_res$reconstructed[, 1],
-                          profile_names = c("Original", "Reconstructed"),
-                          condensed = TRUE)
+    pch3 <- plot_original_vs_reconstructed(pseudo_mut_mat, nmf_res$reconstructed, y_intercept = 0.95)
+    grid.arrange(pch3)
     dev.off()
     }
 
--- a/mutational_patterns.xml	Tue Jul 05 21:41:43 2022 +0000
+++ b/mutational_patterns.xml	Wed Jul 06 11:43:09 2022 +0000
@@ -1,4 +1,4 @@
-<tool id="mutational_patterns" name="Analyse Mutational Patterns/Signatures" version="3.4.0+galaxy0">
+<tool id="mutational_patterns" name="Analyse Mutational Patterns/Signatures" version="3.4.0+galaxy1">
     <description>from genomic variations in vcf files</description>
     <requirements>
         <requirement type="package" version="3.4.0=r41hdfd78af_0">bioconductor-mutationalpatterns</requirement>
@@ -46,6 +46,7 @@
         --newsignum '$set_denovo.newsignum'
         --output_denovo '$denovo'
         --sigmatrix '$sigmatrix'
+        --cosmic_id_threshold '$set_denovo.cosmic_id_threshold'
     #end if
 
     #if $set_preset.choices == 'yes':
@@ -127,6 +128,9 @@
                 <param name="newsignum" type="integer" value="4" min="2" max="30"
                        label="Number of de novo signatures to capture"
                        help="High values extend the computational time. Note also that you cannot extract more signature than the number of available samples in the study"/>
+                <param name="cosmic_id_threshold" type="float" value="0.85" min="0" max="1"
+                       label="Minimum cosine similarity to rename a signature"
+                       help="If a signature has a cosine similarity higher than this value with a cosmic v3.2 signature, it will renamed accordingly, with a -like suffix"/>
             </when>
             <when value="no" />
         </conditional>
@@ -220,9 +224,10 @@
             <conditional name="set_preset">
                 <param name="choices" value="no"/>
             </conditional>
-            <param name="nrun" value="10" />
+            <param name="nrun" value="30" />
             <param name="rank" value="4" />
-            <param name="newsignum" value="4" />
+            <param name="newsignum" value="2" />
+            <param name="cosmic_id_threshold" value="0.84" />
             <param name="rdata_out" value="true" />
             <output name="denovo" file="denovo_output1.pdf" compare="sim_size"/>
             <output name="sigmatrix" file="sigmatrix.tab" compare="sim_size"/>
@@ -367,7 +372,7 @@
 
 * a collection of VCF files with somatic mutations calls from analysis of samples.
 * a tabular table describing the correspondance of sample names to levels of a factor
-(tissues, ages, sexes, etc.)
+  (tissues, ages, sexes, etc.)
 
 **Outputs**
 
Binary file test-data/cosmic_output1.pdf has changed
Binary file test-data/cosmic_output_v3.pdf has changed
Binary file test-data/denovo_1.RData has changed
Binary file test-data/denovo_output1.pdf has changed
--- a/test-data/sigmatrix.tab	Tue Jul 05 21:41:43 2022 +0000
+++ b/test-data/sigmatrix.tab	Wed Jul 06 11:43:09 2022 +0000
@@ -1,97 +1,97 @@
-Type	SBSA	SBSB	SBS5-like	SBSC
-A[C>A]A	2.537216e-02	2.002096e-02	1.710480e-02	2.141184e-02
-A[C>A]C	9.450873e-03	1.458332e-02	1.433290e-02	8.146530e-15
-A[C>A]G	2.154737e-06	2.245761e-03	7.951470e-03	3.165761e-03
-A[C>A]T	1.157395e-02	5.815074e-03	1.240409e-02	4.643111e-03
-C[C>A]A	1.088500e-02	1.062103e-02	1.055922e-02	6.408859e-03
-C[C>A]C	9.642499e-03	8.802069e-03	1.580375e-02	2.817272e-03
-C[C>A]G	3.260193e-03	3.291545e-03	2.316976e-03	1.206510e-03
-C[C>A]T	1.342709e-02	1.891337e-03	7.936790e-03	7.279236e-03
-G[C>A]A	1.008899e-02	1.222901e-02	3.142566e-09	9.140614e-03
-G[C>A]C	6.174019e-03	4.836613e-03	9.098106e-03	4.149261e-03
-G[C>A]G	3.986726e-03	3.691483e-03	4.513223e-04	2.546877e-03
-G[C>A]T	8.260710e-03	7.958102e-03	6.964008e-03	1.757568e-06
-T[C>A]A	1.249317e-02	3.585683e-03	8.718414e-03	1.266466e-02
-T[C>A]C	1.291698e-02	8.850082e-03	1.670388e-04	7.329271e-03
-T[C>A]G	5.004928e-04	1.156145e-03	2.337253e-03	7.830488e-04
-T[C>A]T	1.337425e-02	1.214008e-02	1.530841e-02	8.418327e-03
-A[C>G]A	1.237441e-02	1.981081e-02	1.653601e-02	1.304232e-02
-A[C>G]C	7.834505e-16	3.248220e-10	1.572297e-02	6.551608e-03
-A[C>G]G	2.523774e-03	3.253406e-03	5.114731e-03	7.858234e-04
-A[C>G]T	1.096100e-02	6.928247e-03	1.066230e-02	7.532280e-03
-C[C>G]A	9.424413e-03	3.782759e-03	7.078604e-03	5.700867e-03
-C[C>G]C	8.549212e-03	3.674810e-20	1.851021e-03	7.246756e-03
-C[C>G]G	7.748336e-03	3.487494e-04	2.371674e-03	9.595624e-04
-C[C>G]T	5.480175e-03	9.922058e-03	3.885279e-03	4.326614e-03
-G[C>G]A	1.157248e-02	1.370895e-03	7.806339e-03	3.108266e-20
-G[C>G]C	7.420157e-03	2.171773e-03	1.284785e-04	4.770442e-03
-G[C>G]G	3.372856e-03	1.135636e-03	3.554018e-03	4.457136e-04
-G[C>G]T	8.449691e-03	2.887116e-03	8.619063e-03	4.100463e-03
-T[C>G]A	1.070613e-02	4.177331e-03	7.325655e-03	7.313308e-03
-T[C>G]C	6.976389e-03	3.829361e-03	7.894157e-03	6.971152e-03
-T[C>G]G	1.997729e-03	1.042906e-04	5.183138e-03	3.550364e-03
-T[C>G]T	1.906704e-02	5.403760e-03	2.917595e-02	1.685698e-02
-A[C>T]A	3.131895e-02	3.559289e-02	4.320756e-02	3.595643e-02
-A[C>T]C	1.413245e-02	1.602975e-02	4.547827e-03	1.754285e-02
-A[C>T]G	3.979119e-05	7.124023e-02	2.719950e-02	3.367261e-02
-A[C>T]T	1.859032e-02	1.694007e-02	1.791330e-02	1.441652e-02
-C[C>T]A	9.334568e-03	1.548055e-02	1.339818e-02	1.398155e-02
-C[C>T]C	1.346106e-02	1.657747e-02	9.838139e-03	1.233688e-02
-C[C>T]G	1.184973e-02	4.118642e-02	9.104291e-03	2.848006e-02
-C[C>T]T	1.867309e-02	2.194293e-02	2.160789e-02	3.289596e-02
-G[C>T]A	1.220901e-02	1.830932e-02	8.420540e-03	1.082556e-02
-G[C>T]C	1.165663e-02	1.366817e-02	9.906539e-03	2.030184e-02
-G[C>T]G	2.837739e-02	3.492253e-02	9.763772e-03	3.371439e-02
-G[C>T]T	1.423362e-02	1.172648e-02	1.151020e-02	2.449816e-02
-T[C>T]A	6.181812e-03	1.026000e-02	1.598478e-02	1.784120e-02
-T[C>T]C	1.426343e-02	1.765650e-02	1.686071e-02	1.961689e-02
-T[C>T]G	6.697405e-04	2.981773e-02	8.807199e-03	1.357187e-02
-T[C>T]T	3.425831e-02	3.190586e-02	3.306195e-02	2.416517e-02
-A[T>A]A	1.350407e-02	1.783317e-02	1.433952e-02	1.204297e-02
-A[T>A]C	5.208908e-03	2.426998e-03	4.661406e-03	7.889899e-03
-A[T>A]G	6.203645e-03	7.899487e-03	4.210160e-03	1.020936e-02
-A[T>A]T	1.832951e-02	9.760497e-03	1.283916e-02	1.679321e-02
-C[T>A]A	5.152894e-03	2.995165e-03	7.202406e-03	6.415967e-03
-C[T>A]C	1.288517e-02	1.197751e-02	2.199823e-03	6.184057e-03
-C[T>A]G	1.017668e-02	3.858590e-03	1.797496e-03	6.521194e-03
-C[T>A]T	1.311494e-02	5.783272e-03	4.684082e-03	1.384174e-02
-G[T>A]A	6.264117e-03	1.063500e-03	4.890578e-03	2.764218e-03
-G[T>A]C	7.583102e-04	1.569458e-03	5.251988e-03	3.297294e-03
-G[T>A]G	7.218613e-03	1.024681e-02	6.403638e-03	1.217812e-02
-G[T>A]T	1.447924e-02	8.632744e-03	5.062681e-03	4.542537e-03
-T[T>A]A	6.737527e-03	1.490078e-02	1.449887e-02	8.361527e-03
-T[T>A]C	6.075934e-03	6.914890e-03	3.547468e-03	2.400262e-03
-T[T>A]G	9.247980e-03	2.446205e-03	6.669669e-04	3.742054e-03
-T[T>A]T	3.338055e-02	2.065066e-02	1.611251e-02	1.680514e-02
-A[T>C]A	3.717380e-02	3.265699e-02	4.982621e-02	2.922387e-02
-A[T>C]C	4.082749e-03	8.947616e-03	1.042822e-02	9.205745e-03
-A[T>C]G	5.725035e-03	2.233932e-02	1.897642e-02	1.192167e-02
-A[T>C]T	9.216111e-03	1.218855e-02	1.674300e-02	1.362895e-02
-C[T>C]A	9.092771e-03	6.894784e-03	9.263013e-03	6.566154e-03
-C[T>C]C	1.818673e-02	9.733987e-03	3.899203e-03	7.127605e-03
-C[T>C]G	9.199519e-03	7.413690e-03	1.642174e-02	1.224971e-02
-C[T>C]T	1.327566e-02	9.959621e-03	1.871874e-02	2.250458e-02
-G[T>C]A	1.126406e-02	7.644979e-03	5.362476e-03	1.191308e-02
-G[T>C]C	5.382469e-03	4.828479e-03	1.970185e-12	9.597621e-03
-G[T>C]G	1.123791e-02	1.724741e-02	1.516244e-02	1.372539e-02
-G[T>C]T	1.282307e-02	4.586347e-03	1.411896e-02	1.039154e-02
-T[T>C]A	1.092086e-02	5.588158e-03	8.761926e-03	1.007685e-02
-T[T>C]C	1.803799e-02	2.013810e-02	8.203282e-03	1.529979e-02
-T[T>C]G	8.490634e-03	4.340314e-03	1.235122e-02	4.467692e-03
-T[T>C]T	3.206972e-02	1.668243e-02	3.818623e-02	4.544754e-02
-A[T>G]A	4.607840e-03	1.111060e-02	4.088577e-03	1.675423e-02
-A[T>G]C	2.785895e-20	3.406825e-03	4.944014e-04	2.164564e-03
-A[T>G]G	9.026035e-03	3.004937e-03	5.635912e-03	2.524956e-03
-A[T>G]T	4.203199e-03	5.247793e-03	1.255466e-03	8.623252e-03
-C[T>G]A	2.785895e-20	3.674810e-20	3.467823e-03	6.471005e-03
-C[T>G]C	6.581920e-03	6.843904e-04	1.594754e-03	9.266438e-04
-C[T>G]G	1.978907e-03	5.066625e-03	7.694784e-03	1.800258e-03
-C[T>G]T	1.094034e-03	2.834946e-03	8.971833e-03	3.108266e-20
-G[T>G]A	2.124062e-03	2.575322e-03	3.809196e-03	5.453246e-06
-G[T>G]C	3.641890e-03	3.851018e-03	2.472880e-06	1.886510e-03
-G[T>G]G	8.768503e-03	7.669052e-03	1.102237e-02	9.725885e-03
-G[T>G]T	1.076981e-02	6.207912e-03	6.393167e-03	1.271972e-02
-T[T>G]A	8.496984e-03	2.798437e-03	1.940681e-03	4.645137e-03
-T[T>G]C	2.128895e-03	5.382631e-03	6.346618e-03	5.118158e-03
-T[T>G]G	1.196188e-02	6.471983e-03	1.892812e-02	9.668990e-03
-T[T>G]T	2.241590e-02	2.343561e-02	3.806567e-02	1.368935e-02
+Type	SBS5-like	SBSA
+A[C>A]A	2.124308e-02	2.022610e-02
+A[C>A]C	1.152124e-02	9.471896e-03
+A[C>A]G	3.820981e-03	2.804656e-03
+A[C>A]T	1.134424e-02	5.362864e-03
+C[C>A]A	1.054951e-02	9.070399e-03
+C[C>A]C	1.202944e-02	6.837280e-03
+C[C>A]G	2.762178e-03	2.501478e-03
+C[C>A]T	1.015697e-02	3.518667e-03
+G[C>A]A	5.519337e-03	1.100345e-02
+G[C>A]C	7.306249e-03	4.676835e-03
+G[C>A]G	2.322192e-03	3.185639e-03
+G[C>A]T	7.355381e-03	4.996044e-03
+T[C>A]A	1.034505e-02	6.678461e-03
+T[C>A]C	6.797692e-03	7.873886e-03
+T[C>A]G	1.360369e-03	1.086867e-03
+T[C>A]T	1.396581e-02	1.083312e-02
+A[C>G]A	1.467152e-02	1.752997e-02
+A[C>G]C	7.058313e-03	3.083366e-03
+A[C>G]G	3.636692e-03	2.450720e-03
+A[C>G]T	1.047135e-02	7.097885e-03
+C[C>G]A	7.937276e-03	4.338985e-03
+C[C>G]C	5.253641e-03	2.123655e-03
+C[C>G]G	4.762395e-03	2.428946e-04
+C[C>G]T	4.952736e-03	7.896379e-03
+G[C>G]A	8.916999e-03	6.099084e-04
+G[C>G]C	3.809622e-03	2.817481e-03
+G[C>G]G	3.211134e-03	8.704721e-04
+G[C>G]T	8.042910e-03	3.282992e-03
+T[C>G]A	8.727119e-03	5.133207e-03
+T[C>G]C	7.206149e-03	4.977209e-03
+T[C>G]G	3.274678e-03	1.548092e-03
+T[C>G]T	2.262552e-02	9.859960e-03
+A[C>T]A	3.702558e-02	3.612675e-02
+A[C>T]C	1.002069e-02	1.641188e-02
+A[C>T]G	1.789618e-02	5.847645e-02
+A[C>T]T	1.803376e-02	1.598155e-02
+C[C>T]A	1.167880e-02	1.510162e-02
+C[C>T]C	1.195924e-02	1.497230e-02
+C[C>T]G	1.280125e-02	3.687739e-02
+C[C>T]T	2.072727e-02	2.605414e-02
+G[C>T]A	1.077552e-02	1.555955e-02
+G[C>T]C	1.131579e-02	1.607356e-02
+G[C>T]G	2.049063e-02	3.418976e-02
+G[C>T]T	1.327638e-02	1.629518e-02
+T[C>T]A	1.125650e-02	1.333035e-02
+T[C>T]C	1.582151e-02	1.847956e-02
+T[C>T]G	6.567233e-03	2.424562e-02
+T[C>T]T	3.319931e-02	2.901933e-02
+A[T>A]A	1.405615e-02	1.578937e-02
+A[T>A]C	4.917247e-03	4.376933e-03
+A[T>A]G	5.550123e-03	8.720574e-03
+A[T>A]T	1.536717e-02	1.206088e-02
+C[T>A]A	5.986857e-03	4.293221e-03
+C[T>A]C	7.778087e-03	9.577001e-03
+C[T>A]G	5.983379e-03	4.484491e-03
+C[T>A]T	8.990920e-03	8.394852e-03
+G[T>A]A	5.240678e-03	1.576264e-03
+G[T>A]C	2.892533e-03	2.354285e-03
+G[T>A]G	7.209332e-03	1.096859e-02
+G[T>A]T	9.614464e-03	6.754049e-03
+T[T>A]A	1.072587e-02	1.280547e-02
+T[T>A]C	4.847977e-03	5.204523e-03
+T[T>A]G	4.900357e-03	2.522573e-03
+T[T>A]T	2.439672e-02	1.853853e-02
+A[T>C]A	4.219870e-02	3.178417e-02
+A[T>C]C	7.399058e-03	9.255617e-03
+A[T>C]G	1.285954e-02	1.901398e-02
+A[T>C]T	1.290585e-02	1.296671e-02
+C[T>C]A	8.946937e-03	6.758427e-03
+C[T>C]C	1.097622e-02	8.213891e-03
+C[T>C]G	1.241063e-02	9.404346e-03
+C[T>C]T	1.587318e-02	1.467435e-02
+G[T>C]A	8.455188e-03	8.999783e-03
+G[T>C]C	2.931931e-03	6.618690e-03
+G[T>C]G	1.342277e-02	1.612988e-02
+G[T>C]T	1.283681e-02	6.674995e-03
+T[T>C]A	9.639435e-03	7.100539e-03
+T[T>C]C	1.361753e-02	1.815807e-02
+T[T>C]G	9.787002e-03	4.498450e-03
+T[T>C]T	3.444768e-02	2.723502e-02
+A[T>G]A	5.198391e-03	1.323929e-02
+A[T>G]C	4.405440e-04	3.062221e-03
+A[T>G]G	6.960889e-03	2.631345e-03
+A[T>G]T	3.082631e-03	6.448524e-03
+C[T>G]A	1.937101e-03	2.309071e-03
+C[T>G]C	3.886385e-03	4.826853e-04
+C[T>G]G	4.677149e-03	4.086015e-03
+C[T>G]T	4.583659e-03	2.130806e-03
+G[T>G]A	2.808671e-03	1.699607e-03
+G[T>G]C	1.950620e-03	3.043178e-03
+G[T>G]G	9.744602e-03	8.480532e-03
+G[T>G]T	8.646393e-03	8.409194e-03
+T[T>G]A	5.149241e-03	3.178257e-03
+T[T>G]C	4.318309e-03	5.427921e-03
+T[T>G]G	1.461769e-02	7.848913e-03
+T[T>G]T	2.902799e-02	2.042809e-02
Binary file test-data/spectrum_output1.pdf has changed
Binary file test-data/user_output.pdf has changed