# HG changeset patch # User iuc # Date 1606329433 0 # Node ID 5d60331757d3a3edb04cd2f02f364a2b371b275b # Parent 45af4a9748cff0804e0fabc757cd64a03aecc110 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/exomedepth commit 9eb6d07510ccf27d6499172d62c81661078ec57b" diff -r 45af4a9748cf -r 5d60331757d3 exomedepth.R --- a/exomedepth.R Fri Nov 08 13:25:44 2019 -0500 +++ b/exomedepth.R Wed Nov 25 18:37:13 2020 +0000 @@ -2,94 +2,107 @@ suppressMessages(library(ExomeDepth)) # Import parameters from xml wrapper (args_file) -args <- commandArgs(trailingOnly=TRUE) -param <- read.table(args[1],sep="=", as.is=TRUE) +args <- commandArgs(trailingOnly = TRUE) +param <- read.table(args[1], sep = "=", as.is = TRUE) # Set common parameters -target <- param[match("target",param[,1]),2] -trans_prob <- as.numeric(param[match("trans_prob",param[,1]),2]) -output <- param[match("output",param[,1]),2] -test_vs_ref <- as.logical(param[match("test_vs_ref",param[,1]),2]) +target <- param[match("target", param[, 1]), 2] +trans_prob <- as.numeric(param[match("trans_prob", param[, 1]), 2]) +output <- param[match("output", param[, 1]), 2] +test_vs_ref <- as.logical(param[match("test_vs_ref", param[, 1]), 2]) -# Create symbolic links for multiple bam and bai -bam <- param[param[,1]=="bam",2] -bam_bai <- param[param[,1]=="bam_bai",2] -bam_label <- param[param[,1]=="bam_label",2] -bam_label <- gsub(" ", "_", bam_label) +# Create symbolic links for multiple bam and bai +bam <- param[param[, 1] == "bam", 2] +bam_bai <- param[param[, 1] == "bam_bai", 2] +bam_label <- param[param[, 1] == "bam_label", 2] +bam_label <- gsub(" ", "_", bam_label) -for(i in 1:length(bam)){ - stopifnot(file.symlink(bam[i], paste(bam_label[i], "bam", sep="."))) - stopifnot(file.symlink(bam_bai[i], paste(bam_label[i], "bam.bai", sep="."))) +for (i in seq_len(length(bam))) { + stopifnot(file.symlink(bam[i], paste(bam_label[i], "bam", sep = "."))) + stopifnot(file.symlink(bam_bai[i], paste(bam_label[i], "bam.bai", sep = "."))) } # Generate read count data -BAMFiles <- paste(bam_label, "bam", sep=".") +bam_files <- paste(bam_label, "bam", sep = ".") sink("/dev/null") -ExomeCount <- suppressMessages(getBamCounts(bed.file=target, bam.files = BAMFiles)) +exome_count <- suppressMessages(getBamCounts(bed.file = target, bam.files = bam_files)) sink() # Convert counts in a data frame -ExomeCount.dafr <- as(ExomeCount[, colnames(ExomeCount)], 'data.frame') +exome_count_dafr <- as(exome_count[, colnames(exome_count)], "data.frame") # Prepare the main matrix of read count data -ExomeCount.mat <- as.matrix(ExomeCount.dafr[, grep(names(ExomeCount.dafr), pattern='.bam')]) +exome_count_mat <- as.matrix(exome_count_dafr[, grep(names(exome_count_dafr), pattern = ".bam")]) # Remove .bam from sample name -colnames(ExomeCount.mat) <- gsub(".bam", "", colnames(ExomeCount.mat)) +colnames(exome_count_mat) <- gsub(".bam", "", colnames(exome_count_mat)) # Set nsamples == 1 if mode is test vs reference, assuming test is sample 1 -nsamples <- ifelse(test_vs_ref, 1, ncol(ExomeCount.mat)) +nsamples <- ifelse(test_vs_ref, 1, ncol(exome_count_mat)) # Loop over samples -for (i in 1:nsamples){ +for (i in 1:nsamples) { + # Create the aggregate reference set for this sample + my_choice <- suppressWarnings(suppressMessages(select.reference.set( + test.counts = exome_count_mat[, i], + reference.counts = subset(exome_count_mat, select = -i), + bin.length = (exome_count_dafr$end - exome_count_dafr$start) / 1000, + n.bins.reduced = 10000 + ))) - # Create the aggregate reference set for this sample - my.choice <- suppressWarnings(suppressMessages( - select.reference.set(test.counts = ExomeCount.mat[,i], - reference.counts = subset(ExomeCount.mat, select=-i), - bin.length = (ExomeCount.dafr$end - ExomeCount.dafr$start)/1000, - n.bins.reduced = 10000))) + my_reference_selected <- apply( + X = exome_count_mat[, my_choice$reference.choice, drop = FALSE], + MAR = 1, + FUN = sum + ) - my.reference.selected <- apply(X = ExomeCount.mat[, my.choice$reference.choice, drop=FALSE], - MAR = 1, - FUN = sum) - - # Now create the ExomeDepth object for the CNVs call - all.exons<-suppressWarnings(suppressMessages(new('ExomeDepth', - test = ExomeCount.mat[,i], - reference = my.reference.selected, - formula = 'cbind(test,reference)~1'))) + # Now create the ExomeDepth object for the CNVs call + all.exons <- suppressWarnings(suppressMessages( + new("ExomeDepth", + test = exome_count_mat[, i], + reference = my_reference_selected, + formula = "cbind(test,reference)~1" + ))) + # Now call the CNVs + result <- try(all.exons <- suppressMessages(CallCNVs( + x = all.exons, + transition.probability = trans_prob, + chromosome = exome_count_dafr$space, + start = exome_count_dafr$start, + end = exome_count_dafr$end, + name = exome_count_dafr$names + )), silent = T) - # Now call the CNVs - result <- try(all.exons<-suppressMessages(CallCNVs(x=all.exons, - transition.probability = trans_prob, - chromosome = ExomeCount.dafr$space, - start = ExomeCount.dafr$start, - end = ExomeCount.dafr$end, - name = ExomeCount.dafr$names)), silent=T) + # Next if CNVs are not detected + if (class(result) == "try-error") { + next + } + + # Compute correlation between ref and test + my_cor <- cor(all.exons@reference, all.exons@test) - # Next if CNVs are not detected - if (class(result)=="try-error"){ - next - } + # Write results + my_results <- cbind( + all.exons@CNV.calls[, c(7, 5, 6, 3)], + sample = colnames(exome_count_mat)[i], + corr = my_cor, + all.exons@CNV.calls[, c(4, 9, 12)] + ) - # Compute correlation between ref and test - my.cor <- cor(all.exons@reference, all.exons@test) - n.call <- nrow(all.exons@CNV.calls) + # Re-order by chr and position + chr_order <- c(paste("chr", 1:22, sep = ""), "chrX", "chrY", "chrM") + my_results[, 1] <- factor(my_results[, 1], levels = chr_order) + my_results <- my_results[order(my_results[, 1], my_results[, 2], my_results[, 3]), ] - # Write results - my.results <- cbind(all.exons@CNV.calls[,c(7,5,6,3)], - sample=colnames(ExomeCount.mat)[i], - corr=my.cor, - all.exons@CNV.calls[,c(4,9,12)]) - - # Re-order by chr and position - chrOrder<-c(paste("chr",1:22,sep=""),"chrX","chrY","chrM") - my.results[,1] <- factor(my.results[,1], levels=chrOrder) - my.results <- my.results[order(my.results[,1], my.results[,2], my.results[,3]),] - - write.table(sep='\t', quote=FALSE, file = output, - x = my.results, - row.names = FALSE, col.names = FALSE, dec=".", append=TRUE) + write.table( + sep = "\t", + quote = FALSE, + file = output, + x = my_results, + row.names = FALSE, + col.names = FALSE, + dec = ".", + append = TRUE + ) } diff -r 45af4a9748cf -r 5d60331757d3 exomedepth.xml --- a/exomedepth.xml Fri Nov 08 13:25:44 2019 -0500 +++ b/exomedepth.xml Wed Nov 25 18:37:13 2020 +0000 @@ -103,7 +103,7 @@ ExomeDepth uses read depth data to call CNVs from exome sequencing experiments. A key idea is that the test exome should be compared to a matched aggregate reference set. This aggregate reference set should combine -exomes from the same batch and it should also be optimized for each exome. It will certainly differ from one exome +exomes from the same batch and it should also be optimized for each exome. It will certainly differ from one exome to the next. Importantly, ExomeDepth assumes that the CNV of interest is absent from the aggregate reference set. Hence @@ -111,8 +111,8 @@ common CNVs, if the call is also present in the aggregate reference. ExomeDepth is really suited to detect rare CNV calls (typically for rare Mendelian disorder analysis). -The ideas used in this package are of course not specific to exome sequencing and could be applied to other -targeted sequencing datasets, as long as they contain a sufficiently large number of exons to estimate the parameters +The ideas used in this package are of course not specific to exome sequencing and could be applied to other +targeted sequencing datasets, as long as they contain a sufficiently large number of exons to estimate the parameters (at least 20 genes, say, but probably more would be useful). Also note that PCR based enrichment studies are often not well suited for this type of read depth analysis. The reason is that as the number of cycles is often set to a high number in order to equalize the representation of each amplicon, which can discard the CNV information.