Repository 'msp_sr_readmap_and_size_histograms'
hg clone https://toolshed.g2.bx.psu.edu/repos/drosofff/msp_sr_readmap_and_size_histograms

Changeset 8:be0c6b6466cc (2016-09-19)
Previous changeset 7:c9e267cb84c0 (2016-09-18) Next changeset 9:92898cc3ea19 (2016-10-08)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 97b40d7a593cef6c3303f7baba781a84d242e454
modified:
readmap.py
readmap.xml
smRtools.py
test-data/Readmap_dataframe.tab
test-data/Size_distribution_dataframe.tab
added:
plot_size_readmap.r
removed:
smRtools.pyc
test-data/Readmaps.pdf
test-data/Size_distribution.pdf
test-data/Size_distribution_and_Readmaps.pdf
tool_dependencies.xml
b
diff -r c9e267cb84c0 -r be0c6b6466cc plot_size_readmap.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/plot_size_readmap.r Mon Sep 19 06:16:21 2016 -0400
[
@@ -0,0 +1,145 @@
+## Setup R error handling to go to stderr
+options( show.error.messages=F,
+       error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+library(RColorBrewer)
+library(lattice)
+library(latticeExtra)
+library(grid)
+library(gridExtra)
+library(optparse)
+
+# Parse arguments
+option_list <- list(
+    make_option(c("-r", "--readmap_tab"), type="character", help="Path to file with tabular readmap"),
+    make_option(c("-s", "--size_distribution_tab"), type="character", help="Path to file with tabular size distribution"),
+    make_option("--readmap_pdf", type="character", help="Path to file with readmap plot"),
+    make_option("--size_distribution_pdf", type="character", help="Path to file with size distribution plot"),
+    make_option("--combi_pdf", type="character", help="Path to file with size distribution and readmap plot"),
+    make_option("--title", type="character", help="Title for readmaps and size distribution"),
+    make_option("--xlabel", type="character", help="xlabel for readmaps and size distribution"),
+    make_option("--ylabel", type="character", help="ylabel for readmaps and size distribution"),
+    make_option("--yrange", type="integer", help="Y-axis range"),
+    make_option("--rows_per_page", type="integer", help="rows_per_page")
+    )
+
+parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
+args = parse_args(parser)
+
+## data frames implementation
+
+rm=read.delim(args$readmap_tab, header=T, row.names=NULL)
+n_samples=length(unique(rm$sample))
+genes=unique(levels(rm$gene))
+per_gene_readmap=lapply(genes, function(x) subset(rm, gene==x))
+n_genes=length(per_gene_readmap)
+
+size=read.delim(args$size_distribution_tab, header=T, row.names=NULL)
+per_gene_size=lapply(genes, function(x) subset(size, gene==x))
+
+## end of data frames implementation
+
+## functions
+
+plot_readmap=function(df, ...) {
+combineLimits(xyplot(count~coord|factor(sample, levels=unique(sample))+reorder(gene, count, function(x) -sum(abs(x))),
+data=df,
+type='h',
+scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
+xlab=NULL, main=NULL, ylab=NULL,
+as.table=T,
+origin = 0,
+horizontal=FALSE,
+group=polarity,
+col=c("red","blue"),
+par.strip.text = list(cex=0.7),
+...))
+}
+
+plot_size_distribution= function(df, ...) {
+  smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);}
+  bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample))+gene, data = df, origin = 0,
+    horizontal=FALSE,
+group=polarity,
+stack=TRUE,
+    col=c('red', 'blue'),
+    cex=0.75,
+    scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.7), x=list(cex=0.7) ),
+    prepanel=smR.prepanel,
+    xlab = NULL,
+    ylab = NULL,
+    main = NULL,
+    as.table=TRUE,
+    newpage = T,
+    par.strip.text = list(cex=0.7),
+    ...)
+  combineLimits(bc)
+  }
+
+## end of functions
+
+## function parameters'
+
+par.settings.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-2.5), strip.background = list(col=c("lightblue","lightgreen")) )
+par.settings.size=list(layout.heights=list(top.padding=-1, bottom.padding=-2.5), strip.background = list(col=c("lightblue","lightgreen")) )
+par.settings.combination.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-3), strip.background=list(col=c("lightblue","lightgreen")) )
+par.settings.combination.size=list(layout.heights=list(top.padding=-2, bottom.padding=-0.5), strip.background=list(col=c("lightblue", "lightgreen")) )
+
+## end of function parameters'
+
+## GRAPHS
+
+if (n_genes > 7) {page_height_simple = 11.69; page_height_combi=11.69; rows_per_page=args$rows_per_page} else {
+                 rows_per_page= n_genes; page_height_simple = 2.5*n_genes; page_height_combi=page_height_simple*2 }
+if (n_samples > 4) {page_width = 8.2677*n_samples/4} else {page_width = 8.2677*n_samples/3} # to test
+
+
+pdf(file=args$readmap_pdf, paper="special", height=page_height_simple, width=page_width)
+for (i in seq(1,n_genes,rows_per_page)) {
+start=i
+end=i+rows_per_page-1
+if (end>n_genes) {end=n_genes}
+if (args$yrange == 0) { readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.readmap)) } else {
+readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, ylim=c(-args.yrange, args.yrange) , par.settings=par.settings.readmap)) }
+args_list=c(readmap_plot.list, list(nrow=rows_per_page, ncol=1,
+                                    top=textGrob("Read Maps (nucleotide coordinates)", gp=gpar(cex=1), just="top"),
+                                    left=textGrob(args$ylabel, gp=gpar(cex=1), vjust=1, rot=90)
+                                    )
+           )
+do.call(grid.arrange, args_list)
+}
+devname=dev.off()
+
+pdf(file=args$size_distribution_pdf, paper="special", height=page_height_simple, width=page_width)
+for (i in seq(1,n_genes,rows_per_page)) {
+start=i
+end=i+rows_per_page-1
+if (end>n_genes) {end=n_genes}
+plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, par.settings=par.settings.size) )
+args_list=c(plot.list, list(nrow=rows_per_page, ncol=1,
+                            top=textGrob("Size distributions (in nucleotides)", gp=gpar(cex=1), just="top"),
+                            left=textGrob(args$ylabel, gp=gpar(cex=1), vjust=1, rot=90)
+                            )
+            )
+do.call(grid.arrange, args_list)
+}
+devname=dev.off()
+
+pdf(file=args$combi_pdf, paper="special", height=page_height_combi, width=page_width)
+if (rows_per_page %% 2 != 0) { rows_per_page = rows_per_page + 1}
+for (i in seq(1,n_genes,rows_per_page/2)) {
+start=i
+end=i+rows_per_page/2-1
+if (end>n_genes) {end=n_genes}
+if (args$yrange == 0) {readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.readmap)) } else {
+readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, ylim=c(-args.yrange, args.yrange), par.settings=par.settings.readmap)) }
+size_plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, strip=FALSE, par.settings=par.settings.combination.size))
+plot.list=rbind(readmap_plot.list, size_plot.list )
+args_list=c(plot.list, list(nrow=rows_per_page+1, ncol=1,
+                            top=textGrob(args$title, gp=gpar(cex=1), just="top"),
+                            left=textGrob(args$ylabel, gp=gpar(cex=1), vjust=1, rot=90),
+                            sub=textGrob(args$xlabel, gp=gpar(cex=1), just="bottom")
+                            )
+            )
+do.call(grid.arrange, args_list)
+}
+devname=dev.off()
\ No newline at end of file
b
diff -r c9e267cb84c0 -r be0c6b6466cc readmap.py
--- a/readmap.py Sun Sep 18 12:55:27 2016 -0400
+++ b/readmap.py Mon Sep 19 06:16:21 2016 -0400
[
@@ -23,7 +23,6 @@
   the_parser.add_argument('--gff', type=str, help="GFF containing regions of interest")
   the_parser.add_argument('--minquery', type=int, help="Minimum readsize")
   the_parser.add_argument('--maxquery', type=int, help="Maximum readsize")
-  the_parser.add_argument('--rcode', type=str, help="R script")
   args = the_parser.parse_args()
   return args
 
@@ -38,7 +37,6 @@
 size_distribution_file=args.output_size_distribution
 minquery=args.minquery
 maxquery=args.maxquery
-Rcode = args.rcode
 filePath=args.input
 fileExt=args.ext
 fileLabel=args.label
@@ -54,16 +52,19 @@
                         biosample=fileLabel[i], size_inf=minquery, size_sup=maxquery, norm=norm)
   return MasterListOfGenomes
 
-def dataframe_sanityzer (listofdatalines):
-  Dict = defaultdict(float) 
+def remove_null_entries(listofdatalines):
+  """
+  This function removes genes that have no reads aligned.
+  """
+  Dict = defaultdict(float)
   for line in listofdatalines:
     fields= line.split("\t")
-    Dict[fields[0]] += float (fields[2])
+    Dict[fields[0]] += abs(float(fields[2]))
   filtered_list = []
   for line in listofdatalines:
     fields= line.split("\t")
     if Dict[fields[0]] != 0:
-      filtered_list.append(line) 
+      filtered_list.append(line)
   return filtered_list
 
 
@@ -110,9 +111,8 @@
         plottable = dict[gene].readplot()
         plottable = handle_start_stop_coordinates(plottable, readDict)
         for line in plottable:
-          #print >>readmap, "%s\t%s" % (line, sample)
           listoflines.append ("%s\t%s" % (line, sample))
-    listoflines = dataframe_sanityzer(listoflines)
+    listoflines = remove_null_entries(listoflines)
     for line in listoflines:
       print >>readmap, line
 
@@ -126,19 +126,15 @@
       else:
         dict=readDict[sample].instanceDict
       for gene in dict.keys():
-        histogram = dict[gene].size_histogram(minquery=args.minquery, maxquery=args.maxquery)
+        histogram = dict[gene].size_histogram(minquery=minquery, maxquery=maxquery)
         for polarity in histogram.keys():
           if polarity=='both':
             continue
-          #for size in xrange(args.minquery, args.maxquery):
-          #  if not size in histogram[polarity].keys():
-          #    histogram[size]=0
           for size, count in histogram[polarity].iteritems():
-            #print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, size, count, polarity, sample) # test, changed the order accordingly
             listoflines.append ("%s\t%s\t%s\t%s\t%s" % (gene, size, count, polarity, sample) )
-    listoflines = dataframe_sanityzer(listoflines)
+    listoflines = remove_null_entries(listoflines)
     for line in listoflines:
-      print >>size_distrib, line  
+      print >>size_distrib, line
 
 def gff_item_subinstances(readDict, gff3):
   GFFinstanceDict=OrderedDict()
@@ -154,10 +150,6 @@
       item_downstream_coordinate = int(gff_fields[4])
       item_polarity = gff_fields[6]
       for sample in readDict.keys():
-## this is not required anymore but test
-#        if not GFFinstanceDict.has_key(sample):
-#          GFFinstanceDict[sample]={}
-####
         subinstance=extractsubinstance(item_upstream_coordinate, item_downstream_coordinate, readDict[sample].instanceDict[chrom])
         if item_polarity == '-':
           subinstance.readDict={key*-1:value for key, value in subinstance.readDict.iteritems()}
@@ -172,8 +164,4 @@
 
 write_readplot_dataframe(MasterListOfGenomes, readmap_file)
 write_size_distribution_dataframe(MasterListOfGenomes, size_distribution_file)
-
-R_command="Rscript "+ Rcode
-process = subprocess.Popen(R_command.split())
-process.wait()
 
b
diff -r c9e267cb84c0 -r be0c6b6466cc readmap.xml
--- a/readmap.xml Sun Sep 18 12:55:27 2016 -0400
+++ b/readmap.xml Mon Sep 19 06:16:21 2016 -0400
[
b'@@ -1,239 +1,109 @@\n-<tool id="Readmap" name="Generate readmap and histograms from alignment files" version="1.1.5">\n-  <description>from sRbowtie aligment</description>\n-  <requirements>\n-        <requirement type="package" version="0.12.7">bowtie</requirement>\n-        <requirement type="package" version="0.7.7">pysam</requirement>\n-        <requirement type="package" version="3.1.2">R</requirement>\n-        <requirement type="package" version="2.14">biocbasics</requirement>\n-        <requirement type="package" version="1.9">numpy</requirement>\n-  </requirements>\n-<command interpreter="python">\n-        readmap.py \n-\t          #if $refGenomeSource.genomeSource == "history":\n-         \t    --reference_fasta  ## sys.argv[2]\n-                    $refGenomeSource.ownFile ## index source\n-          \t  #else:\n-                    #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ \'bowtie_indexes\' ].get_fields() )[0][-1]\n-\t\t    --reference_bowtie_index\n-                    $reference\n-          \t  #end if\n-\t\t  --rcode\n-\t\t  $plotCode\n-\t\t  --output_readmap\n-\t\t  $readmap_dataframe\n-\t\t  --output_size_distribution\n-\t\t  $size_distribution_dataframe\n-\t\t  --minquery\n-\t\t  $minquery\n-\t\t  --maxquery\n-\t\t  $maxquery\n-\t\t  --input\n-\t\t  #for $i in $refGenomeSource.series\n-    \t\t    $i.input \n-\t\t  #end for\n-\t\t  --ext\n-\t\t  #for $i in $refGenomeSource.series\n-    \t\t    $i.input.ext \n-\t\t  #end for\n-\t\t  --label\n-\t\t  #for $i in $refGenomeSource.series\n-    \t\t    "$i.input.name" \n-\t\t  #end for\n-\t\t  --normalization_factor\n-\t\t  #for $i in $refGenomeSource.series\n-    \t\t    $i.norm\n-\t\t  #end for\n-\t\t  #if $gff:\n-\t\t    --gff\n-                    $gff\n-                  #end if\n-\n+<tool id="Readmap" name="Generate readmap and histograms from alignment files" version="1.2.0">\n+    <description>from sRbowtie aligment</description>\n+    <requirements>\n+        <requirement type="package" version="1.0.0">bowtie</requirement>\n+        <requirement type="package" version="0.9.0">pysam</requirement>\n+        <requirement type="package" version="1.9.3">numpy</requirement>\n+        <requirement type="package" version="1.3.0">r-optparse</requirement>\n+        <requirement type="package" version="0.6_26">r-latticeextra</requirement>\n+        <requirement type="package" version="2.0.0">r-gridextra</requirement>\n+    </requirements>\n+    <command><![CDATA[\n+        python2 $__tool_directory__/readmap.py\n+        #if $refGenomeSource.genomeSource == "history":\n+            --reference_fasta\n+            $refGenomeSource.ownFile ## index source\n+        #else:\n+            #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ \'bowtie_indexes\' ].get_fields() )[0][-1]\n+            --reference_bowtie_index\n+            $reference\n+        #end if\n+            --output_readmap\n+        "$readmap_dataframe"\n+        --output_size_distribution\n+        "$size_distribution_dataframe"\n+        --minquery $minquery\n+        --maxquery $maxquery\n+        --input\n+        #for $i in $refGenomeSource.series\n+            $i.input\n+        #end for\n+        --ext\n+        #for $i in $refGenomeSource.series\n+            $i.input.ext\n+        #end for\n+        --label\n+        #for $i in $refGenomeSource.series\n+            "$i.input.name"\n+        #end for\n+        --normalization_factor\n+        #for $i in $refGenomeSource.series\n+            $i.norm\n+        #end for\n+        #if $gff:\n+            --gff\n+            $gff\n+        #end if\n+        ; Rscript $__tool_directory__/plot_size_readmap.r\n+        --readmap_tab "$readmap_dataframe"\n+        --size_distribution_tab "$size_distribution_dataframe"\n+        --readmap_pdf "$readmap_PDF"\n+        --size_distribution_pdf "$size_PDF"\n+        --combi_pdf "$combi_PDF"\n+        --title "$title"\n+        --xlabel "$xlabel"\n+        --ylabel "$ylabel"\n+        --yrange "$yrange"\n+        --rows_p'..b'\n+    </outputs>\n+    <help>\n \n **What it does**\n \n-Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a "Readmap", \n-where by default for each "chromosome" the position of the read is recorded on the x-axis, and the y-axis indicates \n+Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a "Readmap",\n+where by default for each "chromosome" the position of the read is recorded on the x-axis, and the y-axis indicates\n the number of reads per position. Reads that map in sense are on the top, reads that map antisense are on the bottom.\n \n \n@@ -248,42 +118,39 @@\n Query sequence::\n For a SAM file as the following:\n \n-  5\t16\t2L_79\t24393\t255\t17M\t*\t0\t0\tCCTTCATCTTTTTTTTT\tIIIIIIIIIIIIIIIII\tXA:i:0\tMD:Z:17\tNM:i:0\n+5\t16\t2L_79\t24393\t255\t17M\t*\t0\t0\tCCTTCATCTTTTTTTTT\tIIIIIIIIIIIIIIIII\tXA:i:0\tMD:Z:17\tNM:i:0\n \n-  11\t0\t2R_1\t12675\t255\t21M\t*\t0\t0\tAAAAAAAACGCGTCCTTGTGC\tIIIIIIIIIIIIIIIIIIIII\tXA:i:0\tMD:Z:21\tNM:i:0\n+11\t0\t2R_1\t12675\t255\t21M\t*\t0\t0\tAAAAAAAACGCGTCCTTGTGC\tIIIIIIIIIIIIIIIIIIIII\tXA:i:0\tMD:Z:21\tNM:i:0\n \n-  2\t16\t2L_5\t669\t255\t23M\t*\t0\t0\tTGTTGCTGCATTTCTTTTTTTTT\tIIIIIIIIIIIIIIIIIIIIIII\tXA:i:0\tMD:Z:23\tNM:i:0\n+2\t16\t2L_5\t669\t255\t23M\t*\t0\t0\tTGTTGCTGCATTTCTTTTTTTTT\tIIIIIIIIIIIIIIIIIIIIIII\tXA:i:0\tMD:Z:23\tNM:i:0\n \n produce a plot like this:\n \n ----\n \n-.. image:: static/images/readmap.png \n-    :height: 800 \n-    :width: 500\n+.. image:: static/images/readmap.png\n+:height: 800\n+:width: 500\n \n-</help>\n-  <tests>\n-  <test>\n-      <param name="genomeSource" value="history" />\n-      <param name="ownFile" value ="transposons.fasta" ftype="fasta" />\n-      <param name="series_0|input" value="sample1.srbowtie_out" ftype="tabular"/>\n-      <param name="series_0|norm" value="1" />\n-      <param name="series_1|input" value="sample2.srbowtie_out" ftype="tabular"/>\n-      <param name="series_1|norm" value="1" />\n-      <param name="series_2|input" value="sample3.srbowtie_out" ftype="tabular"/>\n-      <param name="series_2|norm" value="1" />\n-      <param name="minquery" value="20" />\n-      <param name="maxquery" value="30" />\n-      <param name="title" value="Readmaps and size distributions" />\n-      <param name="xlabel" value="Coordinates/read size" />\n-      <param name="ylabel" value="Number of reads" />\n-      <param name="rows_per_page" value="8" />\n-      <output name="readmap_dataframe" ftype="tabular" file="Readmap_dataframe.tab" />\n-      <output name="size_distribution_dataframe" ftype="tabular" file="Size_distribution_dataframe.tab" />\n-      <output name="readmap_PDF" ftype="pdf" file="Readmaps.pdf" />\n-      <output name="size_PDF" ftype="pdf" file="Size_distribution.pdf" />\n-      <output name="combi_PDF" ftype="pdf" file="Size_distribution_and_Readmaps.pdf" />\n-  </test>\n-  </tests>\n+    </help>\n+    <tests>\n+        <test>\n+            <param name="genomeSource" value="history" />\n+            <param name="ownFile" value ="transposons.fasta" ftype="fasta" />\n+            <param name="series_0|input" value="sample1.srbowtie_out" ftype="tabular"/>\n+            <param name="series_0|norm" value="1" />\n+            <param name="series_1|input" value="sample2.srbowtie_out" ftype="tabular"/>\n+            <param name="series_1|norm" value="1" />\n+            <param name="series_2|input" value="sample3.srbowtie_out" ftype="tabular"/>\n+            <param name="series_2|norm" value="1" />\n+            <param name="minquery" value="20" />\n+            <param name="maxquery" value="30" />\n+            <param name="title" value="Readmaps and size distributions" />\n+            <param name="xlabel" value="Coordinates/read size" />\n+            <param name="ylabel" value="Number of reads" />\n+            <param name="rows_per_page" value="8" />\n+            <output name="readmap_dataframe" ftype="tabular" file="Readmap_dataframe.tab" />\n+            <output name="size_distribution_dataframe" ftype="tabular" file="Size_distribution_dataframe.tab" />\n+        </test>\n+    </tests>\n </tool>\n'
b
diff -r c9e267cb84c0 -r be0c6b6466cc smRtools.py
--- a/smRtools.py Sun Sep 18 12:55:27 2016 -0400
+++ b/smRtools.py Mon Sep 19 06:16:21 2016 -0400
[
@@ -142,26 +142,6 @@
           self.alignedReads += 1
       F.close()
       return self.instanceDict
-#    elif self.alignmentFileFormat == "sam":
-#      F = open (self.alignmentFile, "r")
-#      dict = {"0":"+", "16":"-"}
-#      for line in F:
-#        if line[0]=='@':
-#            continue
-#        fields = line.split()
-#        if fields[2] == "*": continue
-#        polarity = dict[fields[1]]
-#        gene = fields[2]
-#        offset = int(fields[3])
-#        size = len (fields[9])
-#        if self.size_inf:
-#          if (size>=self.size_inf and size<= self.size_sup):
-#            self.instanceDict[gene].addread (polarity, offset, size)
-#            self.alignedReads += 1
-#       else:
-#          self.instanceDict[gene].addread (polarity, offset, size)
-#          self.alignedReads += 1
-#      F.close()
     elif self.alignmentFileFormat == "bam" or self.alignmentFileFormat == "sam":
       import pysam
       samfile = pysam.Samfile(self.alignmentFile)
@@ -184,22 +164,6 @@
           self.alignedReads += 1
       return self.instanceDict
 
-#  def size_histogram (self):
-#    size_dict={}
-#    size_dict['F']= defaultdict (int)
-#    size_dict['R']= defaultdict (int)
-#    size_dict['both'] = defaultdict (int)
-#    for item in self.instanceDict:
-#      buffer_dict_F = self.instanceDict[item].size_histogram()['F']
-#      buffer_dict_R = self.instanceDict[item].size_histogram()['R']
-#      for size in buffer_dict_F:
-#        size_dict['F'][size] += buffer_dict_F[size]
-#      for size in buffer_dict_R:
-#        size_dict['R'][size] -= buffer_dict_R[size]
-#    allSizeKeys = list (set (size_dict['F'].keys() + size_dict['R'].keys() ) )
-#    for size in allSizeKeys:
-#      size_dict['both'][size] = size_dict['F'][size] + size_dict['R'][size]
-#    return size_dict
   def size_histogram (self): # in HandleSmRNAwindows
     '''refactored on 7-9-2014 to debug size_histogram tool'''
     size_dict={}
@@ -361,24 +325,7 @@
     for offset in range (min(dicsize.keys()), max(dicsize.keys())+1):
       dicsize[size] = dicsize.get(size, 0) # to fill offsets with null values
     return dicsize
-    
-#  def size_histogram(self):
-#    norm=self.norm
-#    hist_dict={}
-#    hist_dict['F']={}
-#    hist_dict['R']={}
-#    for offset in self.readDict:
-#      for size in self.readDict[offset]:
-#        if offset < 0:
-#          hist_dict['R'][size] = hist_dict['R'].get(size, 0) - 1*norm
-#        else:
-#          hist_dict['F'][size] = hist_dict['F'].get(size, 0) + 1*norm
-#   ## patch to avoid missing graphs when parsed by R-lattice. 27-08-2014. Test and validate !    
-#    if not (hist_dict['F']) and (not hist_dict['R']):
-#      hist_dict['F'][21] = 0
-#      hist_dict['R'][21] = 0
-#   ##
-#    return hist_dict
+
 
   def size_histogram(self, minquery=None, maxquery=None): # in SmRNAwindow
     '''refactored on 7-9-2014 to debug size_histogram tool'''
@@ -480,7 +427,6 @@
       return ". | %s" % (freqDic["Trev"] / reverse_sum * 100)
     else:
       return "%s | %s" % (freqDic["Tfor"] / forward_sum * 100, freqDic["Trev"] / reverse_sum * 100)
-
     
   def readplot (self):
     norm=self.norm
b
diff -r c9e267cb84c0 -r be0c6b6466cc smRtools.pyc
b
Binary file smRtools.pyc has changed
b
diff -r c9e267cb84c0 -r be0c6b6466cc test-data/Readmap_dataframe.tab
--- a/test-data/Readmap_dataframe.tab Sun Sep 18 12:55:27 2016 -0400
+++ b/test-data/Readmap_dataframe.tab Mon Sep 19 06:16:21 2016 -0400
b
b'@@ -1,4 +1,5 @@\n gene\tcoord\tcount\tpolarity\tsample\n+FBti0020401\t0\t0\tF\tsample1.srbowtie_out\n FBti0020401\t78\t-1.0\tR\tsample1.srbowtie_out\n FBti0020401\t102\t-1.0\tR\tsample1.srbowtie_out\n FBti0020401\t271\t-1.0\tR\tsample1.srbowtie_out\n@@ -47,12 +48,20 @@\n FBti0020401\t6184\t-2.0\tR\tsample1.srbowtie_out\n FBti0020401\t6209\t1.0\tF\tsample1.srbowtie_out\n FBti0020401\t6327\t-2.0\tR\tsample1.srbowtie_out\n+FBti0020401\t6348\t0\tF\tsample1.srbowtie_out\n+FBti0020406\t0\t0\tF\tsample1.srbowtie_out\n FBti0020406\t174\t1.0\tF\tsample1.srbowtie_out\n FBti0020406\t516\t-1.0\tR\tsample1.srbowtie_out\n FBti0020406\t542\t-1.0\tR\tsample1.srbowtie_out\n FBti0020406\t595\t-1.0\tR\tsample1.srbowtie_out\n+FBti0020406\t812\t0\tF\tsample1.srbowtie_out\n+FBti0019511\t0\t0\tF\tsample1.srbowtie_out\n FBti0019511\t1\t0\tF\tsample1.srbowtie_out\n+FBti0019511\t1402\t0\tF\tsample1.srbowtie_out\n+FBti0019512\t0\t0\tF\tsample1.srbowtie_out\n FBti0019512\t1\t0\tF\tsample1.srbowtie_out\n+FBti0019512\t221\t0\tF\tsample1.srbowtie_out\n+FBti0019473\t0\t0\tF\tsample1.srbowtie_out\n FBti0019473\t62\t-1.0\tR\tsample1.srbowtie_out\n FBti0019473\t199\t-1.0\tR\tsample1.srbowtie_out\n FBti0019473\t203\t1.0\tF\tsample1.srbowtie_out\n@@ -109,6 +118,8 @@\n FBti0019473\t4860\t-1.0\tR\tsample1.srbowtie_out\n FBti0019473\t4939\t1.0\tF\tsample1.srbowtie_out\n FBti0019473\t4948\t-2.0\tR\tsample1.srbowtie_out\n+FBti0019473\t5368\t0\tF\tsample1.srbowtie_out\n+FBti0019518\t0\t0\tF\tsample1.srbowtie_out\n FBti0019518\t130\t1.0\tF\tsample1.srbowtie_out\n FBti0019518\t182\t1.0\tF\tsample1.srbowtie_out\n FBti0019518\t217\t-1.0\tR\tsample1.srbowtie_out\n@@ -117,6 +128,8 @@\n FBti0019518\t589\t-1.0\tR\tsample1.srbowtie_out\n FBti0019518\t617\t3.0\tF\tsample1.srbowtie_out\n FBti0019518\t817\t-1.0\tR\tsample1.srbowtie_out\n+FBti0019518\t1012\t0\tF\tsample1.srbowtie_out\n+FBti0019519\t0\t0\tF\tsample1.srbowtie_out\n FBti0019519\t1202\t2.0\tF\tsample1.srbowtie_out\n FBti0019519\t1325\t2.0\tF\tsample1.srbowtie_out\n FBti0019519\t1379\t1.0\tF\tsample1.srbowtie_out\n@@ -132,7 +145,11 @@\n FBti0019519\t1985\t2.0\tF\tsample1.srbowtie_out\n FBti0019519\t2059\t1.0\tF\tsample1.srbowtie_out\n FBti0019519\t2247\t1.0\tF\tsample1.srbowtie_out\n+FBti0019519\t3897\t0\tF\tsample1.srbowtie_out\n+FBti0019514\t0\t0\tF\tsample1.srbowtie_out\n FBti0019514\t337\t1.0\tF\tsample1.srbowtie_out\n+FBti0019514\t466\t0\tF\tsample1.srbowtie_out\n+FBti0019515\t0\t0\tF\tsample1.srbowtie_out\n FBti0019515\t531\t-1.0\tR\tsample1.srbowtie_out\n FBti0019515\t1099\t-1.0\tR\tsample1.srbowtie_out\n FBti0019515\t1113\t1.0\tF\tsample1.srbowtie_out\n@@ -154,11 +171,15 @@\n FBti0019515\t2475\t-8.0\tR\tsample1.srbowtie_out\n FBti0019515\t2484\t-1.0\tR\tsample1.srbowtie_out\n FBti0019515\t2520\t1.0\tF\tsample1.srbowtie_out\n+FBti0019515\t2592\t0\tF\tsample1.srbowtie_out\n+FBti0019516\t0\t0\tF\tsample1.srbowtie_out\n FBti0019516\t15\t1.0\tF\tsample1.srbowtie_out\n FBti0019516\t264\t1.0\tF\tsample1.srbowtie_out\n FBti0019516\t737\t5.0\tF\tsample1.srbowtie_out\n FBti0019516\t799\t1.0\tF\tsample1.srbowtie_out\n FBti0019516\t941\t-1.0\tR\tsample1.srbowtie_out\n+FBti0019516\t1132\t0\tF\tsample1.srbowtie_out\n+FBti0019517\t0\t0\tF\tsample1.srbowtie_out\n FBti0019517\t5\t1.0\tF\tsample1.srbowtie_out\n FBti0019517\t47\t-1.0\tR\tsample1.srbowtie_out\n FBti0019517\t138\t-1.0\tR\tsample1.srbowtie_out\n@@ -184,6 +205,8 @@\n FBti0019517\t515\t-1.0\tR\tsample1.srbowtie_out\n FBti0019517\t581\t1.0\tF\tsample1.srbowtie_out\n FBti0019517\t590\t-1.0\tR\tsample1.srbowtie_out\n+FBti0019517\t740\t0\tF\tsample1.srbowtie_out\n+FBti0020404\t0\t0\tF\tsample1.srbowtie_out\n FBti0020404\t40\t1.0\tF\tsample1.srbowtie_out\n FBti0020404\t56\t1.0\tF\tsample1.srbowtie_out\n FBti0020404\t210\t3.0\tF\tsample1.srbowtie_out\n@@ -195,10 +218,14 @@\n FBti0020404\t835\t-3.0\tR\tsample1.srbowtie_out\n FBti0020404\t1309\t-2.0\tR\tsample1.srbowtie_out\n FBti0020404\t1383\t-1.0\tR\tsample1.srbowtie_out\n+FBti0020404\t1470\t0\tF\tsample1.srbowtie_out\n+FBti0020405\t0\t0\tF\tsample1.srbowtie_out\n FBti0020405\t127\t-1.0\tR\tsample1.srbowtie_out\n FBti0020405\t404\t1.0\tF\tsample1.srbowtie_out\n FBti0020405\t586\t2.0\tF\tsample1.srbowtie_out\n FBti0020405\t674\t1.0\tF\tsample1.srbowtie_out\n+FBti0020405\t745\t0\tF\tsample1.srbowtie_out\n+FBti0019499\t0\t0\tF\tsample1.srbowtie_out\n FBti0019499\t18\t1.0\tF\tsample1.srbowtie_out\n FBti0019499\t271\t1.0\tF\tsample1.srbowtie_out\n FBti0019499\t369\t-1.'..b'\tsample3.srbowtie_out\n FBti0019502\t86\t1.0\tF\tsample3.srbowtie_out\n@@ -3035,8 +3265,12 @@\n FBti0019502\t870\t1.0\tF\tsample3.srbowtie_out\n FBti0019502\t882\t-2.0\tR\tsample3.srbowtie_out\n FBti0019502\t1062\t-1.0\tR\tsample3.srbowtie_out\n+FBti0019502\t1076\t0\tF\tsample3.srbowtie_out\n+FBti0019501\t0\t0\tF\tsample3.srbowtie_out\n FBti0019501\t599\t-1.0\tR\tsample3.srbowtie_out\n FBti0019501\t726\t-1.0\tR\tsample3.srbowtie_out\n+FBti0019501\t966\t0\tF\tsample3.srbowtie_out\n+FBti0019500\t0\t0\tF\tsample3.srbowtie_out\n FBti0019500\t24\t-1.0\tR\tsample3.srbowtie_out\n FBti0019500\t56\t1.0\tF\tsample3.srbowtie_out\n FBti0019500\t58\t1.0\tF\tsample3.srbowtie_out\n@@ -3051,6 +3285,8 @@\n FBti0019500\t935\t-1.0\tR\tsample3.srbowtie_out\n FBti0019500\t1034\t-1.0\tR\tsample3.srbowtie_out\n FBti0019500\t1097\t-1.0\tR\tsample3.srbowtie_out\n+FBti0019500\t1111\t0\tF\tsample3.srbowtie_out\n+FBti0020402\t0\t0\tF\tsample3.srbowtie_out\n FBti0020402\t44\t-1.0\tR\tsample3.srbowtie_out\n FBti0020402\t301\t1.0\tF\tsample3.srbowtie_out\n FBti0020402\t349\t-1.0\tR\tsample3.srbowtie_out\n@@ -3093,6 +3329,8 @@\n FBti0020402\t6361\t1.0\tF\tsample3.srbowtie_out\n FBti0020402\t6367\t-1.0\tR\tsample3.srbowtie_out\n FBti0020402\t6395\t1.0\tF\tsample3.srbowtie_out\n+FBti0020402\t6438\t0\tF\tsample3.srbowtie_out\n+FBti0020410\t0\t0\tF\tsample3.srbowtie_out\n FBti0020410\t103\t-1.0\tR\tsample3.srbowtie_out\n FBti0020410\t123\t1.0\tF\tsample3.srbowtie_out\n FBti0020410\t197\t1.0\tF\tsample3.srbowtie_out\n@@ -3148,6 +3386,8 @@\n FBti0020410\t6479\t-1.0\tR\tsample3.srbowtie_out\n FBti0020410\t6629\t-1.0\tR\tsample3.srbowtie_out\n FBti0020410\t6647\t1.0\tF\tsample3.srbowtie_out\n+FBti0020410\t6752\t0\tF\tsample3.srbowtie_out\n+FBti0020403\t0\t0\tF\tsample3.srbowtie_out\n FBti0020403\t40\t1.0\tF\tsample3.srbowtie_out\n FBti0020403\t60\t1.0\tF\tsample3.srbowtie_out\n FBti0020403\t161\t2.0\tF\tsample3.srbowtie_out\n@@ -3158,6 +3398,8 @@\n FBti0020403\t869\t-1.0\tR\tsample3.srbowtie_out\n FBti0020403\t908\t-1.0\tR\tsample3.srbowtie_out\n FBti0020403\t1014\t-1.0\tR\tsample3.srbowtie_out\n+FBti0020403\t1101\t0\tF\tsample3.srbowtie_out\n+FBti0019486\t0\t0\tF\tsample3.srbowtie_out\n FBti0019486\t299\t-1.0\tR\tsample3.srbowtie_out\n FBti0019486\t468\t-1.0\tR\tsample3.srbowtie_out\n FBti0019486\t529\t-1.0\tR\tsample3.srbowtie_out\n@@ -3166,7 +3408,11 @@\n FBti0019486\t715\t1.0\tF\tsample3.srbowtie_out\n FBti0019486\t784\t-1.0\tR\tsample3.srbowtie_out\n FBti0019486\t1008\t-1.0\tR\tsample3.srbowtie_out\n+FBti0019486\t1205\t0\tF\tsample3.srbowtie_out\n+FBti0019489\t0\t0\tF\tsample3.srbowtie_out\n FBti0019489\t1\t0\tF\tsample3.srbowtie_out\n+FBti0019489\t369\t0\tF\tsample3.srbowtie_out\n+FBti0019484\t0\t0\tF\tsample3.srbowtie_out\n FBti0019484\t101\t1.0\tF\tsample3.srbowtie_out\n FBti0019484\t138\t-1.0\tR\tsample3.srbowtie_out\n FBti0019484\t247\t-1.0\tR\tsample3.srbowtie_out\n@@ -3176,11 +3422,17 @@\n FBti0019484\t703\t5.0\tF\tsample3.srbowtie_out\n FBti0019484\t903\t-2.0\tR\tsample3.srbowtie_out\n FBti0019484\t952\t2.0\tF\tsample3.srbowtie_out\n+FBti0019484\t1084\t0\tF\tsample3.srbowtie_out\n+FBti0019485\t0\t0\tF\tsample3.srbowtie_out\n FBti0019485\t220\t1.0\tF\tsample3.srbowtie_out\n FBti0019485\t711\t-1.0\tR\tsample3.srbowtie_out\n FBti0019485\t796\t-1.0\tR\tsample3.srbowtie_out\n+FBti0019485\t1075\t0\tF\tsample3.srbowtie_out\n+FBti0019482\t0\t0\tF\tsample3.srbowtie_out\n FBti0019482\t112\t-1.0\tR\tsample3.srbowtie_out\n FBti0019482\t340\t-1.0\tR\tsample3.srbowtie_out\n+FBti0019482\t597\t0\tF\tsample3.srbowtie_out\n+FBti0020400\t0\t0\tF\tsample3.srbowtie_out\n FBti0020400\t15\t2.0\tF\tsample3.srbowtie_out\n FBti0020400\t40\t1.0\tF\tsample3.srbowtie_out\n FBti0020400\t84\t1.0\tF\tsample3.srbowtie_out\n@@ -3430,3 +3682,14 @@\n FBti0020400\t9275\t1.0\tF\tsample3.srbowtie_out\n FBti0020400\t9306\t-1.0\tR\tsample3.srbowtie_out\n FBti0020400\t9334\t-1.0\tR\tsample3.srbowtie_out\n+FBti0020400\t9398\t0\tF\tsample3.srbowtie_out\n+FBti0019480\t0\t0\tF\tsample3.srbowtie_out\n+FBti0019480\t23\t1.0\tF\tsample3.srbowtie_out\n+FBti0019480\t384\t1.0\tF\tsample3.srbowtie_out\n+FBti0019480\t493\t5.0\tF\tsample3.srbowtie_out\n+FBti0019480\t501\t-1.0\tR\tsample3.srbowtie_out\n+FBti0019480\t502\t-7.0\tR\tsample3.srbowtie_out\n+FBti0019480\t503\t-1.0\tR\tsample3.srbowtie_out\n+FBti0019480\t594\t1.0\tF\tsample3.srbowtie_out\n+FBti0019480\t619\t1.0\tF\tsample3.srbowtie_out\n+FBti0019480\t669\t0\tF\tsample3.srbowtie_out\n'
b
diff -r c9e267cb84c0 -r be0c6b6466cc test-data/Readmaps.pdf
b
Binary file test-data/Readmaps.pdf has changed
b
diff -r c9e267cb84c0 -r be0c6b6466cc test-data/Size_distribution.pdf
b
Binary file test-data/Size_distribution.pdf has changed
b
diff -r c9e267cb84c0 -r be0c6b6466cc test-data/Size_distribution_and_Readmaps.pdf
b
Binary file test-data/Size_distribution_and_Readmaps.pdf has changed
b
diff -r c9e267cb84c0 -r be0c6b6466cc test-data/Size_distribution_dataframe.tab
--- a/test-data/Size_distribution_dataframe.tab Sun Sep 18 12:55:27 2016 -0400
+++ b/test-data/Size_distribution_dataframe.tab Mon Sep 19 06:16:21 2016 -0400
b
@@ -879,6 +879,28 @@
 FBti0020400 28 16.0 F sample1.srbowtie_out
 FBti0020400 29 0.0 F sample1.srbowtie_out
 FBti0020400 30 0 F sample1.srbowtie_out
+FBti0019480 20 0 R sample1.srbowtie_out
+FBti0019480 21 0 R sample1.srbowtie_out
+FBti0019480 22 -1.0 R sample1.srbowtie_out
+FBti0019480 23 0 R sample1.srbowtie_out
+FBti0019480 24 -2.0 R sample1.srbowtie_out
+FBti0019480 25 -1.0 R sample1.srbowtie_out
+FBti0019480 26 0.0 R sample1.srbowtie_out
+FBti0019480 27 0 R sample1.srbowtie_out
+FBti0019480 28 0 R sample1.srbowtie_out
+FBti0019480 29 0 R sample1.srbowtie_out
+FBti0019480 30 0 R sample1.srbowtie_out
+FBti0019480 20 0 F sample1.srbowtie_out
+FBti0019480 21 0 F sample1.srbowtie_out
+FBti0019480 22 0.0 F sample1.srbowtie_out
+FBti0019480 23 0 F sample1.srbowtie_out
+FBti0019480 24 0.0 F sample1.srbowtie_out
+FBti0019480 25 1.0 F sample1.srbowtie_out
+FBti0019480 26 2.0 F sample1.srbowtie_out
+FBti0019480 27 0 F sample1.srbowtie_out
+FBti0019480 28 0 F sample1.srbowtie_out
+FBti0019480 29 0 F sample1.srbowtie_out
+FBti0019480 30 0 F sample1.srbowtie_out
 FBti0020401 20 -2.0 R sample2.srbowtie_out
 FBti0020401 21 0 R sample2.srbowtie_out
 FBti0020401 22 0.0 R sample2.srbowtie_out
@@ -1759,6 +1781,28 @@
 FBti0020400 28 15.0 F sample2.srbowtie_out
 FBti0020400 29 1.0 F sample2.srbowtie_out
 FBti0020400 30 0 F sample2.srbowtie_out
+FBti0019480 20 0.0 R sample2.srbowtie_out
+FBti0019480 21 0 R sample2.srbowtie_out
+FBti0019480 22 0 R sample2.srbowtie_out
+FBti0019480 23 0 R sample2.srbowtie_out
+FBti0019480 24 -2.0 R sample2.srbowtie_out
+FBti0019480 25 -1.0 R sample2.srbowtie_out
+FBti0019480 26 0.0 R sample2.srbowtie_out
+FBti0019480 27 0.0 R sample2.srbowtie_out
+FBti0019480 28 0 R sample2.srbowtie_out
+FBti0019480 29 0 R sample2.srbowtie_out
+FBti0019480 30 0 R sample2.srbowtie_out
+FBti0019480 20 1.0 F sample2.srbowtie_out
+FBti0019480 21 0 F sample2.srbowtie_out
+FBti0019480 22 0 F sample2.srbowtie_out
+FBti0019480 23 0 F sample2.srbowtie_out
+FBti0019480 24 1.0 F sample2.srbowtie_out
+FBti0019480 25 0.0 F sample2.srbowtie_out
+FBti0019480 26 1.0 F sample2.srbowtie_out
+FBti0019480 27 1.0 F sample2.srbowtie_out
+FBti0019480 28 0 F sample2.srbowtie_out
+FBti0019480 29 0 F sample2.srbowtie_out
+FBti0019480 30 0 F sample2.srbowtie_out
 FBti0020401 20 -1.0 R sample3.srbowtie_out
 FBti0020401 21 0.0 R sample3.srbowtie_out
 FBti0020401 22 -1.0 R sample3.srbowtie_out
@@ -2639,3 +2683,25 @@
 FBti0020400 28 12.0 F sample3.srbowtie_out
 FBti0020400 29 0 F sample3.srbowtie_out
 FBti0020400 30 0 F sample3.srbowtie_out
+FBti0019480 20 0 R sample3.srbowtie_out
+FBti0019480 21 -1.0 R sample3.srbowtie_out
+FBti0019480 22 0.0 R sample3.srbowtie_out
+FBti0019480 23 -2.0 R sample3.srbowtie_out
+FBti0019480 24 -5.0 R sample3.srbowtie_out
+FBti0019480 25 -1.0 R sample3.srbowtie_out
+FBti0019480 26 0.0 R sample3.srbowtie_out
+FBti0019480 27 0.0 R sample3.srbowtie_out
+FBti0019480 28 0 R sample3.srbowtie_out
+FBti0019480 29 0 R sample3.srbowtie_out
+FBti0019480 30 0 R sample3.srbowtie_out
+FBti0019480 20 0 F sample3.srbowtie_out
+FBti0019480 21 0.0 F sample3.srbowtie_out
+FBti0019480 22 1.0 F sample3.srbowtie_out
+FBti0019480 23 0.0 F sample3.srbowtie_out
+FBti0019480 24 3.0 F sample3.srbowtie_out
+FBti0019480 25 2.0 F sample3.srbowtie_out
+FBti0019480 26 2.0 F sample3.srbowtie_out
+FBti0019480 27 1.0 F sample3.srbowtie_out
+FBti0019480 28 0 F sample3.srbowtie_out
+FBti0019480 29 0 F sample3.srbowtie_out
+FBti0019480 30 0 F sample3.srbowtie_out
b
diff -r c9e267cb84c0 -r be0c6b6466cc tool_dependencies.xml
--- a/tool_dependencies.xml Sun Sep 18 12:55:27 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,18 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="bowtie" version="0.12.7">
-      <repository changeset_revision="9f9f38617a98" name="package_bowtie_0_12_7" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="pysam" version="0.7.7">
-      <repository changeset_revision="0a5141bdf9d0" name="package_pysam_0_7_7" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="numpy" version="1.9">
-        <repository changeset_revision="83d12e13dbbd" name="package_numpy_1_9" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="R" version="3.1.2">
-        <repository changeset_revision="4d2fd1413b56" name="package_r_3_1_2" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="biocbasics" version="2.14">
-        <repository changeset_revision="f0ef1a7b157e" name="package_biocbasics_2_14" owner="mvdbeek" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>