Mercurial > repos > artbio > mutational_patterns
changeset 24:ca6c19ee7da0 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit bad92e3210a78b5ebf47d6950f4dba10c1cbf07d
author | artbio |
---|---|
date | Tue, 05 Jul 2022 21:41:43 +0000 |
parents | 83f8c93c34b4 |
children | b00fef2b1c2c |
files | mutational_patterns.R mutational_patterns.xml test-data/cosmic_output1.pdf test-data/cosmic_output_v3.pdf test-data/denovo_1.RData test-data/denovo_output1.pdf test-data/sig_contrib_table.tsv test-data/sig_contrib_table_v3.tsv test-data/sigmatrix.tab test-data/spectrum_output1.pdf test-data/user_output.pdf |
diffstat | 11 files changed, 63 insertions(+), 59 deletions(-) [+] |
line wrap: on
line diff
--- a/mutational_patterns.R Wed Oct 27 00:46:47 2021 +0000 +++ b/mutational_patterns.R Tue Jul 05 21:41:43 2022 +0000 @@ -1,7 +1,8 @@ # load packages that are provided in the conda env -options(show.error.messages = F, +options(show.error.messages = FALSE, error = function() { - cat(geterrmessage(), file = stderr()); q("no", 1, F) + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) } ) loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") @@ -202,16 +203,19 @@ # (For larger datasets it is wise to perform more iterations by changing the nrun parameter # to achieve stability and avoid local minima) nmf_res <- extract_signatures(pseudo_mut_mat, rank = opt$newsignum, nrun = opt$nrun) - # Assign signature names - colnames(nmf_res$signatures) <- paste0("SBS", 1:opt$newsignum) - rownames(nmf_res$contribution) <- paste0("SBS", 1:opt$newsignum) + # Assign signature COSMICv3.2 names + cosmic_signatures <- get_known_signatures() + nmf_res <- rename_nmf_signatures(nmf_res, cosmic_signatures, cutoff = 0.85) + sim_matrix <- cos_sim_matrix(cosmic_signatures, nmf_res$signatures) + plot_cosine_sim <- plot_cosine_heatmap(sim_matrix) + grid.arrange(plot_cosine_sim) # Plot the 96-profile of the signatures: p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq") new_sig_matrix <- format(new_sig_matrix, scientific = TRUE) - newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = T), + newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = TRUE), "[", new_sig_matrix$substitution, "]", - gsub("^.\\.", "", new_sig_matrix$context, perl = T)) + gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE)) new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]]) write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t") grid.arrange(p5) @@ -329,8 +333,8 @@ fit_res <- fit_to_signatures(pseudo_mut_mat, sbs_signatures) # Plot contribution barplots - pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "absolute") - pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "relative") + pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "absolute") + pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "relative") if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs pc3_data <- pc3$data pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + @@ -397,7 +401,7 @@ level = rep("nolabels", length(fit_res_contrib[, 1])), fit_res_contrib, sum = rowSums(fit_res_contrib)) - worklist <- worklist[order(worklist[, "sum"], decreasing = T), ] + worklist <- worklist[order(worklist[, "sum"], decreasing = TRUE), ] worklist <- worklist[1:opt$signum, ] worklist <- worklist[, -length(worklist[1, ])] worklist <- melt(worklist) @@ -405,10 +409,10 @@ } else { worklist <- list() for (i in levels(factor(levels_table$level))) { - fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] -> worklist[[i]] + worklist[[i]] <- fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] sum <- rowSums(as.data.frame(worklist[[i]])) worklist[[i]] <- cbind(worklist[[i]], sum) - worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = T), ] + worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = TRUE), ] worklist[[i]] <- worklist[[i]][1:opt$signum, ] worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))] } @@ -424,7 +428,7 @@ p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) + geom_bar(width = 1, stat = "identity") + geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) + - coord_polar("y", start = 0) + facet_wrap(.~sample) + + coord_polar("y", start = 0) + facet_wrap(. ~ sample) + labs(x = "", y = "Samples", fill = tag) + scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), values = signature_colors[levels(worklist$signature)], @@ -452,7 +456,7 @@ output_table <- data.frame(sample = rownames(output_table), output_table) colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) } - write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = F, row.names = F) + write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = FALSE, row.names = FALSE) } # calculate all pairwise cosine similarities
--- a/mutational_patterns.xml Wed Oct 27 00:46:47 2021 +0000 +++ b/mutational_patterns.xml Tue Jul 05 21:41:43 2022 +0000 @@ -1,15 +1,15 @@ -<tool id="mutational_patterns" name="Analyse Mutational Patterns/Signatures" version="3.2.0+galaxy8"> +<tool id="mutational_patterns" name="Analyse Mutational Patterns/Signatures" version="3.4.0+galaxy0"> <description>from genomic variations in vcf files</description> <requirements> - <requirement type="package" version="3.2.0=r41hdfd78af_0">bioconductor-mutationalpatterns</requirement> - <requirement type="package" version="1.1.1l=h7f98852_0">openssl</requirement> - <requirement type="package" version="3.3.5=r41hc72bb7e_0">r-ggplot2</requirement> - <requirement type="package" version="1.6.6=r41hc72bb7e_1">r-optparse</requirement> - <requirement type="package" version="0.2.20=r41h03ef668_1002">r-rjson</requirement> + <requirement type="package" version="3.4.0=r41hdfd78af_0">bioconductor-mutationalpatterns</requirement> + <!--requirement type="package" version="1.1.1l=h7f98852_0">openssl</requirement> --> + <requirement type="package" version="3.3.6=r41hc72bb7e_0">r-ggplot2</requirement> + <requirement type="package" version="1.7.1=r41hc72bb7e_0">r-optparse</requirement> + <requirement type="package" version="0.2.21=r41h7525677_1">r-rjson</requirement> <requirement type="package" version="0.21.0=r41h03ef668_1004">r-nmf</requirement> <requirement type="package" version="2.3=r41hc72bb7e_1003">r-gridextra</requirement> - <requirement type="package" version="1.4.3=r41hdfd78af_3">bioconductor-bsgenome.hsapiens.ucsc.hg19</requirement> - <requirement type="package" version="1.4.3=r41hdfd78af_3">bioconductor-bsgenome.hsapiens.ucsc.hg38</requirement> + <requirement type="package" version="1.4.3=r41hdfd78af_4">bioconductor-bsgenome.hsapiens.ucsc.hg19</requirement> + <requirement type="package" version="1.4.4=r41hdfd78af_0">bioconductor-bsgenome.hsapiens.ucsc.hg38</requirement> <!-- install more bioconda genomes bioconductor-bsgenome.mmusculus.ucsc.mm9 @@ -200,6 +200,34 @@ </outputs> <tests> + <!-- de novo signatures --> + <test> + <param name="vcfs"> + <collection type="list"> + <element name="6" value="F.vcf"/> + <element name="7" value="G.vcf"/> + <element name="8" value="H.vcf"/> + <element name="9" value="I.vcf"/> + </collection> + </param> + <param name="genome" value="BSgenome.Hsapiens.UCSC.hg38"/> + <conditional name="set_spectrum"> + <param name="choices" value="no"/> + </conditional> + <conditional name="set_denovo"> + <param name="choices" value="yes"/> + </conditional> + <conditional name="set_preset"> + <param name="choices" value="no"/> + </conditional> + <param name="nrun" value="10" /> + <param name="rank" value="4" /> + <param name="newsignum" value="4" /> + <param name="rdata_out" value="true" /> + <output name="denovo" file="denovo_output1.pdf" compare="sim_size"/> + <output name="sigmatrix" file="sigmatrix.tab" compare="sim_size"/> + <output name="rdata" file="denovo_1.RData" compare="sim_size" delta="400000"/> <!-- delta="170000" --> + </test> <!-- user defined (v3.2 restricted 30 minus 27 renamed to test nomenclature effect) --> <test> <param name="vcfs"> @@ -315,34 +343,6 @@ </conditional> <output name="spectrum" file="spectrum_output1.pdf" compare="sim_size"/> </test> - <!-- de novo signatures --> - <test> - <param name="vcfs"> - <collection type="list"> - <element name="6" value="F.vcf"/> - <element name="7" value="G.vcf"/> - <element name="8" value="H.vcf"/> - <element name="9" value="I.vcf"/> - </collection> - </param> - <param name="genome" value="BSgenome.Hsapiens.UCSC.hg38"/> - <conditional name="set_spectrum"> - <param name="choices" value="no"/> - </conditional> - <conditional name="set_denovo"> - <param name="choices" value="yes"/> - </conditional> - <conditional name="set_preset"> - <param name="choices" value="no"/> - </conditional> - <param name="nrun" value="10" /> - <param name="rank" value="4" /> - <param name="newsignum" value="4" /> - <param name="rdata_out" value="true" /> - <output name="denovo" file="denovo_output1.pdf" compare="sim_size"/> - <output name="sigmatrix" file="sigmatrix.tab" compare="sim_size"/> - <output name="rdata" file="denovo_1.RData" compare="sim_size" delta="400000"/> <!-- delta="170000" --> - </test> </tests> <help>
--- a/test-data/sig_contrib_table.tsv Wed Oct 27 00:46:47 2021 +0000 +++ b/test-data/sig_contrib_table.tsv Tue Jul 05 21:41:43 2022 +0000 @@ -1,5 +1,5 @@ sample SBS1 SBS2 SBS3 SBS4 SBS5 SBS6 SBS7 SBS8 SBS9 SBS10 SBS11 SBS12 SBS13 SBS14 SBS15 SBS16 SBS17 SBS18 SBS19 SBS20 SBS21 SBS22 SBS23 SBS24 SBS25 SBS26 SBS27 SBS28 SBS29 SBS30 -ovary-I.vcf 0.107676737465178 0.0107477401085281 0.263118370815298 0 0.0975192598491968 0.00654300960428024 0 0.121325654310372 0.0675812234604709 0 0.0475946682228466 0.0483240788134274 0.00116766365611527 0 0.0195773794486755 0.128367970358363 0 0 0 0 0 0 0 0 0.0564468988265929 0 0.000297382010539655 0.0237119630501146 0 0 -ovary-H.vcf 0.105483533021363 0.0160722050763363 0.220294683323486 0 0.270646531625838 0 0 0.0632630764001629 0.0651960109780565 0 0.0265849338024135 0 0.00937728828228106 0 0 0.169077458531394 0 0 0 0 0 0 0 0 0.0141372418969508 0 0.0064233897483352 0.0334436473133834 0 0 -lung-G.vcf 0.210828794798222 0.0160042182355152 0.174216713183146 0 0.248848735805233 0 0 0.0079146795135654 0.122852241624819 0 0.0316731744534412 0.0400882001887818 0 0 0 0.0415901687752403 0 0 0.0349534584507835 0 0 0 0 0 0.0686079325155911 0 0.0013880107407027 0.00103367171495887 0 0 -lung-F.vcf 0.272200051777229 0.00523782090972898 0.176429226414213 0 0.107322684968036 0 0 0.0922420552015618 0.0176321903825532 0 0.0308049189266778 0.00421945577891649 0 0 0 0.195343895329217 0 0 0 0 0 0 0 0 0.026099693090191 0 0.0050497865751646 0.0410158172026456 0 0.0264024034438657 +6 0.272200051777229 0.00523782090972898 0.176429226414213 0 0.107322684968036 0 0 0.0922420552015618 0.0176321903825532 0 0.0308049189266778 0.00421945577891649 0 0 0 0.195343895329217 0 0 0 0 0 0 0 0 0.026099693090191 0 0.0050497865751646 0.0410158172026456 0 0.0264024034438657 +7 0.210828794798222 0.0160042182355152 0.174216713183146 0 0.248848735805233 0 0 0.0079146795135654 0.122852241624819 0 0.0316731744534412 0.0400882001887818 0 0 0 0.0415901687752403 0 0 0.0349534584507835 0 0 0 0 0 0.0686079325155911 0 0.0013880107407027 0.00103367171495887 0 0 +8 0.105483533021363 0.0160722050763363 0.220294683323486 0 0.270646531625838 0 0 0.0632630764001629 0.0651960109780565 0 0.0265849338024135 0 0.00937728828228106 0 0 0.169077458531394 0 0 0 0 0 0 0 0 0.0141372418969508 0 0.0064233897483352 0.0334436473133834 0 0 +9 0.107676737465178 0.0107477401085281 0.263118370815298 0 0.0975192598491968 0.00654300960428024 0 0.121325654310372 0.0675812234604709 0 0.0475946682228466 0.0483240788134274 0.00116766365611527 0 0.0195773794486755 0.128367970358363 0 0 0 0 0 0 0 0 0.0564468988265929 0 0.000297382010539655 0.0237119630501146 0 0
--- a/test-data/sig_contrib_table_v3.tsv Wed Oct 27 00:46:47 2021 +0000 +++ b/test-data/sig_contrib_table_v3.tsv Tue Jul 05 21:41:43 2022 +0000 @@ -1,5 +1,5 @@ sample SBS1 SBS2 SBS3 SBS4 SBS5 SBS6 SBS7a SBS7b SBS7c SBS7d SBS8 SBS9 SBS10a SBS10b SBS10c SBS10d SBS11 SBS12 SBS13 SBS14 SBS15 SBS16 SBS17a SBS17b SBS18 SBS19 SBS20 SBS21 SBS22 SBS23 SBS24 SBS25 SBS26 SBS27 SBS28 SBS29 SBS30 SBS31 SBS32 SBS33 SBS34 SBS35 SBS36 SBS37 SBS38 SBS39 SBS40 SBS41 SBS42 SBS43 SBS44 SBS45 SBS46 SBS47 SBS48 SBS49 SBS50 SBS51 SBS52 SBS53 SBS54 SBS55 SBS56 SBS57 SBS58 SBS59 SBS60 SBS84 SBS85 SBS86 SBS87 SBS88 SBS89 SBS90 SBS91 SBS92 SBS93 SBS94 -ovary-I.vcf 0.0248743382983054 0 0 0 0 0.0382804313784858 0.000279867877099847 0 0.0523368831013261 0 0.0882193437709409 0 0.00105713908429325 0 0 0 0 0 0.00790452113929918 0 0.0228952649764221 0.035824437322864 0 0 0.0157694785042366 0.00112106011995225 0 0.00901729432205061 0 0 0.0067314503274015 0.0424173427597118 0.00897709595233141 0 0.0134288190900345 0 0.0315118740824124 0.0274274413781644 0.0459722043929115 0.00919119609371442 0.0135875438956959 0 0 0.0813737079036436 0 0.134441510514793 0 0 0 0.00968535679523162 0.0232988957406608 0 0 0 0 0 0.00901138983347702 0.0232219272273402 0 0 0.0140535953847184 0 0 0.0588372989742489 0.126837942673735 0.000414033475201199 0 0 0 0 0.0123975208041955 0 0 0 0 0.00960179280510004 0 0 -ovary-H.vcf 0.0526112623195139 0 0 0.00259659019141931 0 0 0.00441492436160073 0 0.028442412784458 0.00371282908990969 0.0771150268514295 0 0.00469080414948085 0 0 0 0 0 0.0131075005010316 0 0 0.0564087542136608 0 0.00228775187171679 0 0.0197458393231762 0 0.00177238312166065 0 0 0.00886883514891954 0 0.00387092295756819 0.0034150231168899 0.0174808677883371 0 0.0435652656329046 0.001821180738257 0.0264091265962629 0.0107100101651646 0.0174453430547186 0 0 0.0961708355601418 0 0.121116601538177 0 0 0 0 0.0571796516660221 0 0 0 0 0.000214057521674493 0.0111211317159625 0.00829256700318903 0 0.00164798457109997 0.0258483341879308 0.00644458809280953 0 0.0846638702721653 0.122478370005574 0.00222471578608652 0 0.000420172000753205 0 0.00163043517541805 0.000141322538953654 0 0.0574873294316273 0.00242537895433406 0 0 0 0 -lung-G.vcf 0.0823617355865553 0 0 0 0 0.0525713539199432 0.00861488625601096 0 0.0339595873379564 0 0.0456255244916971 0.0145585252714382 0 0.000421381081383047 0 0 0 0 0.00522670784278192 0 0 0.0293382321445568 0.00139649570607906 0 0.0194196611974925 0.0386127956083362 0 0.013789732383022 0 0 0.0100733150661604 0.0349706130241875 0 0 0 0 0.0434093655847984 0.00181825891236839 0.0303474622606777 0 0.0187029901709085 0 0 0.120376065787024 0 0.0599280433448042 0 0 0.00132966985611013 0 0 0 0 0.0039271065334873 0 0 0 0.0280674592931216 0 0 0.035137879879521 0 0 0.0812463129866582 0.0917684488653737 0 0 0.0124734516843239 0 0.00901316909994899 0.00681265646517624 0 0.0647011123580966 0 0 0 0 0 -lung-F.vcf 0.125106083697333 0 0.0894527459328343 0 0 0 0 0 0.0324668080771622 0 0.0672874649502956 0 0.00268401866175131 0.00917023093306937 0 0 0 0 0 0 0 0.0451906045983062 0 0 0.00910592930802089 0.0167722110675364 0 0.0038298515888168 0 0 0.009531234514015 0 0 0 0.0232363343868912 0 0.062330351090351 0.00493364473237903 0.0324520312030243 0.00486738928923164 0.0100557162196348 0 0 0.0888476521405422 0 0.0556313157931215 0 0 0 0 0.020753584501296 0 0 0.0183541664469706 0 0.000330058861573153 0.00181335415290947 0.0123934694096205 0 0.00196180738612452 0.0392499133494657 0 0 0.0247940538178981 0.120319900452309 0.00358101555748017 0 0.0122742608109216 0.000420655172516775 0 0.0145849808127487 0 0.0316331756761206 0.00458398540772828 0 0 0 0 +6 0.125106083697333 0 0.0894527459328343 0 0 0 0 0 0.0324668080771622 0 0.0672874649502956 0 0.00268401866175131 0.00917023093306937 0 0 0 0 0 0 0 0.0451906045983062 0 0 0.00910592930802089 0.0167722110675364 0 0.0038298515888168 0 0 0.009531234514015 0 0 0 0.0232363343868912 0 0.062330351090351 0.00493364473237903 0.0324520312030243 0.00486738928923164 0.0100557162196348 0 0 0.0888476521405422 0 0.0556313157931215 0 0 0 0 0.020753584501296 0 0 0.0183541664469706 0 0.000330058861573153 0.00181335415290947 0.0123934694096205 0 0.00196180738612452 0.0392499133494657 0 0 0.0247940538178981 0.120319900452309 0.00358101555748017 0 0.0122742608109216 0.000420655172516775 0 0.0145849808127487 0 0.0316331756761206 0.00458398540772828 0 0 0 0 +7 0.0823617355865553 0 0 0 0 0.0525713539199432 0.00861488625601096 0 0.0339595873379564 0 0.0456255244916971 0.0145585252714382 0 0.000421381081383047 0 0 0 0 0.00522670784278192 0 0 0.0293382321445568 0.00139649570607906 0 0.0194196611974925 0.0386127956083362 0 0.013789732383022 0 0 0.0100733150661604 0.0349706130241875 0 0 0 0 0.0434093655847984 0.00181825891236839 0.0303474622606777 0 0.0187029901709085 0 0 0.120376065787024 0 0.0599280433448042 0 0 0.00132966985611013 0 0 0 0 0.0039271065334873 0 0 0 0.0280674592931216 0 0 0.035137879879521 0 0 0.0812463129866582 0.0917684488653737 0 0 0.0124734516843239 0 0.00901316909994899 0.00681265646517624 0 0.0647011123580966 0 0 0 0 0 +8 0.0526112623195139 0 0 0.00259659019141931 0 0 0.00441492436160073 0 0.028442412784458 0.00371282908990969 0.0771150268514295 0 0.00469080414948085 0 0 0 0 0 0.0131075005010316 0 0 0.0564087542136608 0 0.00228775187171679 0 0.0197458393231762 0 0.00177238312166065 0 0 0.00886883514891954 0 0.00387092295756819 0.0034150231168899 0.0174808677883371 0 0.0435652656329046 0.001821180738257 0.0264091265962629 0.0107100101651646 0.0174453430547186 0 0 0.0961708355601418 0 0.121116601538177 0 0 0 0 0.0571796516660221 0 0 0 0 0.000214057521674493 0.0111211317159625 0.00829256700318903 0 0.00164798457109997 0.0258483341879308 0.00644458809280953 0 0.0846638702721653 0.122478370005574 0.00222471578608652 0 0.000420172000753205 0 0.00163043517541805 0.000141322538953654 0 0.0574873294316273 0.00242537895433406 0 0 0 0 +9 0.0248743382983054 0 0 0 0 0.0382804313784858 0.000279867877099847 0 0.0523368831013261 0 0.0882193437709409 0 0.00105713908429325 0 0 0 0 0 0.00790452113929918 0 0.0228952649764221 0.035824437322864 0 0 0.0157694785042366 0.00112106011995225 0 0.00901729432205061 0 0 0.0067314503274015 0.0424173427597118 0.00897709595233141 0 0.0134288190900345 0 0.0315118740824124 0.0274274413781644 0.0459722043929115 0.00919119609371442 0.0135875438956959 0 0 0.0813737079036436 0 0.134441510514793 0 0 0 0.00968535679523162 0.0232988957406608 0 0 0 0 0 0.00901138983347702 0.0232219272273402 0 0 0.0140535953847184 0 0 0.0588372989742489 0.126837942673735 0.000414033475201199 0 0 0 0 0.0123975208041955 0 0 0 0 0.00960179280510004 0 0
--- a/test-data/sigmatrix.tab Wed Oct 27 00:46:47 2021 +0000 +++ b/test-data/sigmatrix.tab Tue Jul 05 21:41:43 2022 +0000 @@ -1,4 +1,4 @@ -Type SBS1 SBS2 SBS3 SBS4 +Type SBSA SBSB SBS5-like SBSC A[C>A]A 2.537216e-02 2.002096e-02 1.710480e-02 2.141184e-02 A[C>A]C 9.450873e-03 1.458332e-02 1.433290e-02 8.146530e-15 A[C>A]G 2.154737e-06 2.245761e-03 7.951470e-03 3.165761e-03