Mercurial > repos > artbio > small_rna_maps
changeset 6:a3be3601bcb3 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 9f9c64aff0d225881bedb97bd5035ccbca945d9d
author | artbio |
---|---|
date | Mon, 09 Oct 2017 11:07:09 -0400 |
parents | 12c14642e6ac |
children | a96e6a7df2b7 |
files | small_rna_maps.r small_rna_maps.xml test-data/coverage_normed.pdf test-data/size-count_normed.pdf |
diffstat | 4 files changed, 72 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/small_rna_maps.r Sun Oct 08 17:56:13 2017 -0400 +++ b/small_rna_maps.r Mon Oct 09 11:07:09 2017 -0400 @@ -12,6 +12,7 @@ option_list <- list( make_option(c("-f", "--first_dataframe"), type="character", help="path to first dataframe"), make_option(c("-e", "--extra_dataframe"), type="character", help="path to additional dataframe"), + make_option(c("-n", "--normalization"), type="character", help="space-separated normalization/size factors"), make_option("--first_plot_method", type = "character", help="How additional data should be plotted"), make_option("--extra_plot_method", type = "character", help="How additional data should be plotted"), make_option("--output_pdf", type = "character", help="path to the pdf file with plots") @@ -27,6 +28,21 @@ Table <- within(Table, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1)) } n_samples=length(unique(Table$Dataset)) +samples = unique(Table$Dataset) +if (args$normalization != "") { + norm_factors = as.numeric(unlist(strsplit(args$normalization, " "))) +} else { + norm_factors = rep(1, n_samples) +} +if (args$first_plot_method == "Counts" | args$first_plot_method == "Size" | args$first_plot_method == "Coverage") { + i = 1 + for (sample in samples) { + print(norm_factors[i]) + Table[, length(Table)][Table$Dataset==sample] <- Table[, length(Table)][Table$Dataset==sample]*norm_factors[i] + i = i + 1 + } + print(tail(Table)) +} genes=unique(levels(Table$Chromosome)) per_gene_readmap=lapply(genes, function(x) subset(Table, Chromosome==x)) per_gene_limit=lapply(genes, function(x) c(1, unique(subset(Table, Chromosome==x)$Chrom_length)) ) @@ -36,9 +52,16 @@ ExtraTable=read.delim(args$extra_dataframe, header=T, row.names=NULL) if (args$extra_plot_method == "Counts" | args$extra_plot_method=='Size') { ExtraTable <- within(ExtraTable, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1)) + } + if (args$extra_plot_method == "Counts" | args$extra_plot_method == "Size" | args$extra_plot_method == "Coverage") { + i = 1 + for (sample in samples) { + ExtraTable[, length(ExtraTable)][ExtraTable$Dataset==sample] <- ExtraTable[, length(ExtraTable)][ExtraTable$Dataset==sample]*norm_factors[i] + i = i + 1 } + } per_gene_size=lapply(genes, function(x) subset(ExtraTable, Chromosome==x)) - } +} ## functions
--- a/small_rna_maps.xml Sun Oct 08 17:56:13 2017 -0400 +++ b/small_rna_maps.xml Mon Oct 09 11:07:09 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="small_rna_maps" name="small_rna_maps" version="2.0.0"> +<tool id="small_rna_maps" name="small_rna_maps" version="2.1.0"> <description></description> <requirements> <requirement type="package" version="1.11.2=py27_0">numpy</requirement> @@ -33,6 +33,7 @@ Rscript '$__tool_directory__'/small_rna_maps.r --first_dataframe '$output_tab' --extra_dataframe '$extra_output_tab' + --normalization '$normalization' #if str($plots_options.plots_options_selector ) == "two_plot": --first_plot_method '${plots_options.first_plot}' --extra_plot_method '${plots_options.extra_plot}' @@ -44,20 +45,22 @@ ]]></command> <inputs> <param name="inputs" type="data" format="bam" label="Select multiple alignments to parse" multiple="True"/> + <param name="normalization" type="text" label="Enter size/normalization factors as a space-separated list. Leave blank for no normalization (default)" + help="e.g. '1 0.75 1.23'. Values of the list map to the above selected files from bottom to top" /> <conditional name="plots_options"> <param name="plots_options_selector" type="select" label="Number of plots per chromosome"> <option value="one_plot">Just one plot per chromosome</option> <option value="two_plot" selected="True">Two plots per chromosome</option> </param> <when value="two_plot"> - <param name="first_plot" type="select" label="select the type of the first plot"> + <param name="first_plot" type="select" label="Select the type of the first plot"> <option value="Counts">Counts</option> <option value="Coverage">Coverage</option> <option value="Mean">Mean Sizes</option> <option value="Median">Median Sizes</option> <option value="Size">Size Distributions</option> </param> - <param name="extra_plot" type="select" label="select the type of the second plot"> + <param name="extra_plot" type="select" label="Select the type of the second plot"> <option value="Counts">Counts</option> <option value="Coverage">Coverage</option> <option value="Mean">Mean Sizes</option> @@ -92,6 +95,7 @@ <param name="plots_options_selector" value="two_plot" /> <param name="first_plot" value="Counts" /> <param name="extra_plot" value="Mean" /> + <param name="normalization" value="1 1" /> <output file="count.tab" name="output_tab" /> <output file="mean.tab" name="extra_output_tab" /> <output file="count-mean.pdf" name="output_pdf" /> @@ -101,6 +105,7 @@ <param name="plots_options_selector" value="two_plot" /> <param name="first_plot" value="Counts" /> <param name="extra_plot" value="Mean" /> + <param name="normalization" value="1 1" /> <output file="doubled_count.tab" name="output_tab" /> <output file="doubled_mean.tab" name="extra_output_tab" /> <output file="doubled_count-mean.pdf" name="output_pdf" /> @@ -110,6 +115,7 @@ <param name="plots_options_selector" value="two_plot" /> <param name="first_plot" value="Counts" /> <param name="extra_plot" value="Median" /> + <param name="normalization" value="1 1" /> <output file="count.tab" name="output_tab" /> <output file="median.tab" name="extra_output_tab" /> <output file="count-median.pdf" name="output_pdf" /> @@ -119,6 +125,7 @@ <param name="plots_options_selector" value="two_plot" /> <param name="first_plot" value="Counts" /> <param name="extra_plot" value="Coverage" /> + <param name="normalization" value="1 1" /> <output file="count.tab" name="output_tab" /> <output file="coverage.tab" name="extra_output_tab" /> <output file="count-coverage.pdf" name="output_pdf" /> @@ -128,6 +135,7 @@ <param name="plots_options_selector" value="two_plot" /> <param name="first_plot" value="Counts" /> <param name="extra_plot" value="Size" /> + <param name="normalization" value="1 1" /> <output file="count.tab" name="output_tab" /> <output file="size.tab" name="extra_output_tab" /> <output file="count-size.pdf" name="output_pdf" /> @@ -137,13 +145,25 @@ <param name="plots_options_selector" value="two_plot" /> <param name="first_plot" value="Size" /> <param name="extra_plot" value="Counts" /> + <param name="normalization" value="1 1" /> <output file="size.tab" name="output_tab" /> <output file="count.tab" name="extra_output_tab" /> <output file="size-count.pdf" name="output_pdf" /> </test> <test> <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/> + <param name="plots_options_selector" value="two_plot" /> + <param name="first_plot" value="Size" /> + <param name="extra_plot" value="Counts" /> + <param name="normalization" value="1 0.75" /> + <output file="size.tab" name="output_tab" /> + <output file="count.tab" name="extra_output_tab" /> + <output file="size-count_normed.pdf" name="output_pdf" /> + </test> + <test> + <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/> <param name="plots_options_selector" value="one_plot" /> + <param name="normalization" value="1 1" /> <param name="first_plot" value="Counts" /> <output file="count.tab" name="output_tab" /> <output file="count.pdf" name="output_pdf" /> @@ -152,6 +172,7 @@ <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/> <param name="plots_options_selector" value="one_plot" /> <param name="first_plot" value="Size" /> + <param name="normalization" value="1 1" /> <output file="size.tab" name="output_tab" /> <output file="size.pdf" name="output_pdf" /> </test> @@ -159,9 +180,18 @@ <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/> <param name="plots_options_selector" value="one_plot" /> <param name="first_plot" value="Coverage" /> + <param name="normalization" value="1 1" /> <output file="coverage.tab" name="output_tab" /> <output file="coverage.pdf" name="output_pdf" /> </test> + <test> + <param name="inputs" value="input1.bam,input2.bam" ftype="bam"/> + <param name="plots_options_selector" value="one_plot" /> + <param name="first_plot" value="Coverage" /> + <param name="normalization" value="1 0.2" /> + <output file="coverage.tab" name="output_tab" /> + <output file="coverage_normed.pdf" name="output_pdf" /> + </test> </tests> @@ -169,9 +199,19 @@ **What it does** -Generate read count maps from alignment BAM files, using pysam and lattice. +Plots maps of (1) read counts, (2) mean sizes, (3) median sizes, (4) coverage depth or (5) +size read distribution along chromosome references. + +Mean sizes and median sizes are the mean and the median sizes, respectively, of all reads +whose 5' end map to a given coordinate in a chromosome reference. +Coverage depths are computed from the input bam alignment files using the python pysam module. -In addition to the read counts (lower graphs), median size, mean size and coverage depth of reads(lower graphs) mapping at a given position are plotted. +The variables mentioned above (1-5) can be plotted either separately or in all possible +pairwise combinations. + +For comparison purpose, values from bam alignment files can be normalized by a size factor +before plotting. If the normalization field is leaved blank, default normalization of 1 +is assumed. **Inputs** @@ -180,6 +220,9 @@ - single-read - sorted - mapping to the same reference + +Optionally, a space-separated list of normalization/size factors may be added before plotting. +This list maps to the selected bam alignments from bottom to top. **Output**