# HG changeset patch
# User iuc
# Date 1450880742 18000
# Node ID a072f0f30ea302ea061217a931f4ea2dc92c4c58
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repmatch_gff3 commit 0e04a4c237677c1f5be1950babcf8591097996a9
diff -r 000000000000 -r a072f0f30ea3 repmatch_gff3.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repmatch_gff3.py Wed Dec 23 09:25:42 2015 -0500
@@ -0,0 +1,49 @@
+# repmatch.py
+#
+# Replicate matching - matches paired peaks from two or more replicates
+#
+# Input: one or more gff files (matched_peak output from cwpair2, each a list of paired peaks from a replicate
+#
+# Output: list of matched groups and list of unmatched peaks
+# Files: statistics_table.tabular (file to replicate ID), matched_paired_peaks.tabular, detail.tabular, unmatched_peaks.tabular
+
+import argparse
+import repmatch_gff3_util
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--input', dest='inputs', action='append', nargs=2, help="Input datasets")
+ parser.add_argument('--method', dest='method', default='closest', help='Method of finding match')
+ parser.add_argument('--distance', dest='distance', type=int, default=50, help='Maximum distance between peaks in different replicates to allow merging')
+ parser.add_argument('--step', dest='step', type=int, default=0, help='Step size of distance for each iteration')
+ parser.add_argument('--replicates', dest='replicates', type=int, default=2, help='Minimum number of replicates that must be matched for merging to occur')
+ parser.add_argument('--low_limit', dest='low_limit', type=int, default=-1000, help='Lower limit for c-w distance filter')
+ parser.add_argument('--up_limit', dest='up_limit', type=int, default=1000, help='Upper limit for c-w distance filter')
+ parser.add_argument('--output_files', dest='output_files', default='all', help='Restrict output dataset collections.')
+ parser.add_argument('--output_matched_peaks', dest='output_matched_peaks', help='Matched groups in gff format')
+ parser.add_argument('--output_unmatched_peaks', dest='output_unmatched_peaks', default=None, help='Unmatched paired peaks in tabular format')
+ parser.add_argument('--output_detail', dest='output_detail', default=None, help='Details in tabular format')
+ parser.add_argument('--output_statistics_table', dest='output_statistics_table', default=None, help='Keys in tabular format')
+ parser.add_argument('--output_statistics_histogram', dest='output_statistics_histogram', default=None, help='Histogram')
+
+ args = parser.parse_args()
+
+ dataset_paths = []
+ hids = []
+ for (dataset_path, hid) in args.inputs:
+ dataset_paths.append(dataset_path)
+ hids.append(hid)
+ repmatch_gff3_util.process_files(dataset_paths,
+ hids,
+ args.method,
+ args.distance,
+ args.step,
+ args.replicates,
+ args.up_limit,
+ args.low_limit,
+ args.output_files,
+ args.output_matched_peaks,
+ args.output_unmatched_peaks,
+ args.output_detail,
+ args.output_statistics_table,
+ args.output_statistics_histogram)
diff -r 000000000000 -r a072f0f30ea3 repmatch_gff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repmatch_gff3.xml Wed Dec 23 09:25:42 2015 -0500
@@ -0,0 +1,161 @@
+
+
+ Match paired peaks from two or more replicates
+
+ repmatch_gff3_macros.xml
+
+
+
+ python $__tool_directory__/repmatch_gff3.py
+ #for $i in $input:
+ --input "${i}" "${i.hid}"
+ #end for
+ --method $method
+ --distance $distance
+ --replicates $replicates
+ --output_files $output_files_cond.output_files
+ --output_matched_peaks "$output_matched_peaks"
+ #if str($output_files_cond.output_files) in ["all", "matched_peaks_unmatched_peaks"]:
+ --output_unmatched_peaks "$output_unmatched_peaks"
+ #end if
+ #if str($output_files_cond.output_files) =="all":
+ --output_detail "$output_detail"
+ --output_statistics_table "$output_statistics_table"
+ --output_statistics_histogram "$output_statistics_histogram"
+ #end if
+ #if str($advanced_options_cond.advanced_options) == "on":
+ --step $advanced_options_cond.step
+ --low_limit $advanced_options_cond.low_limit
+ --up_limit $advanced_options_cond.up_limit
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ output_files_cond["output_files"] == "all"
+
+
+ output_files_cond["output_files"] == "all"
+
+
+ output_files_cond["output_files"] == "all"
+
+
+ output_files_cond["output_files"] in ["all", "matched_peaks_unmatched_peaks"]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+RepMatch accepts two or more input datasets, and starts by defining peak-pair midpoints in the first dataset. It then
+discovers all peak-pair midpoints in the second dataset that are within the distance, defined by the tool's **Maximum
+distance between peaks in different replicates to allow merging** parameter, from the peak-pair midpoint coordinate in
+the first dataset. When encountering multiple candidates to match (one-to-many), RepMatch uses the method defined by
+the tool's **Method of finding match** parameter so that there is at most only a one-to-one match across the two datasets.
+This method provides the following options:
+
+ * **closest** - matches only the closest one in bp distance.
+ * **largest** - matches the one that contain the most number of reads.
+ * **all** - both methods are run separately.
+
+RepMatch matching is an iterative process, as it attempts to find the centroid coordinate amongst all replicates. As such,
+the centroid is the point of reference for "distqnce" and "closest". This process can be sped up by increasing the tool's
+**Step size** parameter.
+
+The minimum number of replicates that can be matched for a match to occur is defined by the tool's **Minimum number of
+replicates that must be matched for merging to occur** parameter. Additional filters can be applied using the tool's
+**Advanced options**, including a lower and upper limit for the C-W distance.
+
+.. image:: $PATH_TO_IMAGES/repmatch.png
+
+-----
+
+**Options**
+
+ * **Distance** - Maximum distance for discovering all peak-pair midpoints in a second dataset relative to the peak-pair midpoints in the first dataset
+ * **Method** - Method to use when encountering multiple candidates to match so that there is at most only a one-to-one match across the two datasets.
+ * **Step Size** - Distance for each iteration.
+ * **Replicates** - Minimum number of replicates that can be matched for a match to occur. This value must be at least 2.
+ * **Lower Limit** - Lower limit for the Crick-Watson distance filter.
+ * **Upper Limit** - Upper limit for the Crick-Watson distance filter.
+
+-----
+
+**Output Data Files**
+
+ * **Data MP** - gff file consisting of only peak pairs
+
+ - Columns are **chr**, **script**, **blank**, **peak start**, **peak end**, **blank**, **normalized tag counts**, **blank** and **info**.
+ - Peak start and end are separated by one coordinate.
+ - Normalized tag is the occupancy averaged across replicates.
+ - Attributes include C-W distance, sum total of tag counts, number of replicates merged.
+
+ * **Data D** - tabular file consisting of the list of all matched replicates.
+ * **Data UP** - tabular file consisting of all unmatched peak-pairs.
+
+**Output Statistics Files**
+
+ * **Statistics Table** - tabular file providing the description key of **Data D**.
+ * **Statistics Histogram** - graph of the number of matched locations having the indicated replicate counts.
+
+**Comments on Replicates**
+
+Three types of replicates may be considered. Biological replicates represent independently collected biological samples.
+At least two biological replicate must be performed for each experiment from which a conclusion is being drawn, and the
+conclusion must be evident in both biological replicates when analyzed separately. Technical replicates represent a re-run
+of the assay on the same biological material. This is usually done when one replicate fails to produce quality data, and is
+used to replace that earlier replicate. Sequencing replicates represent additional sequencing of the same successful library
+in order to obtain more reads should the analysis require it. The reads from individual sequencing replicates are usually
+merged without need for separate analysis.
+
+
+
+
diff -r 000000000000 -r a072f0f30ea3 repmatch_gff3_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repmatch_gff3_macros.xml Wed Dec 23 09:25:42 2015 -0500
@@ -0,0 +1,29 @@
+
+
+ 1.0
+
+
+ anaconda
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @unpublished{None,
+ author = {None},
+ title = {None},
+ year = {None},
+ eprint = {None},
+ url = {http://www.huck.psu.edu/content/research/independent-centers-excellence/center-for-eukaryotic-gene-regulation}
+ }
+
+
+
diff -r 000000000000 -r a072f0f30ea3 repmatch_gff3_util.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repmatch_gff3_util.py Wed Dec 23 09:25:42 2015 -0500
@@ -0,0 +1,462 @@
+import bisect
+import csv
+import os
+import shutil
+import sys
+import tempfile
+import matplotlib
+matplotlib.use('Agg')
+from matplotlib import pyplot
+
+# Graph settings
+Y_LABEL = 'Counts'
+X_LABEL = 'Number of matched replicates'
+TICK_WIDTH = 3
+# Amount to shift the graph to make labels fit, [left, right, top, bottom]
+ADJUST = [0.180, 0.9, 0.9, 0.1]
+# Length of tick marks, use TICK_WIDTH for width
+pyplot.rc('xtick.major', size=10.00)
+pyplot.rc('ytick.major', size=10.00)
+pyplot.rc('lines', linewidth=4.00)
+pyplot.rc('axes', linewidth=3.00)
+pyplot.rc('font', family='Bitstream Vera Sans', size=32.0)
+
+COLORS = 'krb'
+
+
+class Replicate(object):
+
+ def __init__(self, id, dataset_path):
+ self.id = id
+ self.dataset_path = dataset_path
+ self.parse(csv.reader(open(dataset_path, 'rt'), delimiter='\t'))
+
+ def parse(self, reader):
+ self.chromosomes = {}
+ for line in reader:
+ if line[0].startswith("#") or line[0].startswith('"'):
+ continue
+ cname, junk, junk, mid, midplus, value, strand, junk, attrs = line
+ attrs = parse_gff_attrs(attrs)
+ distance = attrs['cw_distance']
+ mid = int(mid)
+ midplus = int(midplus)
+ value = float(value)
+ distance = int(distance)
+ if cname not in self.chromosomes:
+ self.chromosomes[cname] = Chromosome(cname)
+ chrom = self.chromosomes[cname]
+ chrom.add_peak(Peak(cname, mid, value, distance, self))
+ for chrom in self.chromosomes.values():
+ chrom.sort_by_index()
+
+ def filter(self, up_limit, low_limit):
+ for chrom in self.chromosomes.values():
+ chrom.filter(up_limit, low_limit)
+
+ def size(self):
+ return sum([len(c.peaks) for c in self.chromosomes.values()])
+
+
+class Chromosome(object):
+
+ def __init__(self, name):
+ self.name = name
+ self.peaks = []
+
+ def add_peak(self, peak):
+ self.peaks.append(peak)
+
+ def sort_by_index(self):
+ self.peaks.sort(key=lambda peak: peak.midpoint)
+ self.keys = make_keys(self.peaks)
+
+ def remove_peak(self, peak):
+ i = bisect.bisect_left(self.keys, peak.midpoint)
+ # If the peak was actually found
+ if i < len(self.peaks) and self.peaks[i].midpoint == peak.midpoint:
+ del self.keys[i]
+ del self.peaks[i]
+
+ def filter(self, up_limit, low_limit):
+ self.peaks = [p for p in self.peaks if low_limit <= p.distance <= up_limit]
+ self.keys = make_keys(self.peaks)
+
+
+class Peak(object):
+
+ def __init__(self, chrom, midpoint, value, distance, replicate):
+ self.chrom = chrom
+ self.value = value
+ self.midpoint = midpoint
+ self.distance = distance
+ self.replicate = replicate
+
+ def normalized_value(self, med):
+ return self.value * med / self.replicate.median
+
+
+class PeakGroup(object):
+
+ def __init__(self):
+ self.peaks = {}
+
+ def add_peak(self, repid, peak):
+ self.peaks[repid] = peak
+
+ @property
+ def chrom(self):
+ return self.peaks.values()[0].chrom
+
+ @property
+ def midpoint(self):
+ return median([peak.midpoint for peak in self.peaks.values()])
+
+ @property
+ def num_replicates(self):
+ return len(self.peaks)
+
+ @property
+ def median_distance(self):
+ return median([peak.distance for peak in self.peaks.values()])
+
+ @property
+ def value_sum(self):
+ return sum([peak.value for peak in self.peaks.values()])
+
+ def normalized_value(self, med):
+ values = []
+ for peak in self.peaks.values():
+ values.append(peak.normalized_value(med))
+ return median(values)
+
+ @property
+ def peakpeak_distance(self):
+ keys = self.peaks.keys()
+ return abs(self.peaks[keys[0]].midpoint - self.peaks[keys[1]].midpoint)
+
+
+class FrequencyDistribution(object):
+
+ def __init__(self, d=None):
+ self.dist = d or {}
+
+ def add(self, x):
+ self.dist[x] = self.dist.get(x, 0) + 1
+
+ def graph_series(self):
+ x = []
+ y = []
+ for key, val in self.dist.items():
+ x.append(key)
+ y.append(val)
+ return x, y
+
+ def mode(self):
+ return max(self.dist.items(), key=lambda data: data[1])[0]
+
+ def size(self):
+ return sum(self.dist.values())
+
+
+def stop_err(msg):
+ sys.stderr.write(msg)
+ sys.exit(1)
+
+
+def median(data):
+ """
+ Find the integer median of the data set.
+ """
+ if not data:
+ return 0
+ sdata = sorted(data)
+ if len(data) % 2 == 0:
+ return (sdata[len(data)//2] + sdata[len(data)//2-1]) / 2
+ else:
+ return sdata[len(data)//2]
+
+
+def make_keys(peaks):
+ return [data.midpoint for data in peaks]
+
+
+def get_window(chromosome, target_peaks, distance):
+ """
+ Returns a window of all peaks from a replicate within a certain distance of
+ a peak from another replicate.
+ """
+ lower = target_peaks[0].midpoint
+ upper = target_peaks[0].midpoint
+ for peak in target_peaks:
+ lower = min(lower, peak.midpoint - distance)
+ upper = max(upper, peak.midpoint + distance)
+ start_index = bisect.bisect_left(chromosome.keys, lower)
+ end_index = bisect.bisect_right(chromosome.keys, upper)
+ return (chromosome.peaks[start_index: end_index], chromosome.name)
+
+
+def match_largest(window, peak, chrum):
+ if not window:
+ return None
+ if peak.chrom != chrum:
+ return None
+ return max(window, key=lambda cpeak: cpeak.value)
+
+
+def match_closest(window, peak, chrum):
+ if not window:
+ return None
+ if peak.chrom != chrum:
+ return None
+ return min(window, key=lambda match: abs(match.midpoint - peak.midpoint))
+
+
+def frequency_histogram(freqs, dataset_path, labels=[], title=''):
+ pyplot.clf()
+ pyplot.figure(figsize=(10, 10))
+ for i, freq in enumerate(freqs):
+ xvals, yvals = freq.graph_series()
+ # Go from high to low
+ xvals.reverse()
+ pyplot.bar([x-0.4 + 0.8/len(freqs)*i for x in xvals], yvals, width=0.8/len(freqs), color=COLORS[i])
+ pyplot.xticks(range(min(xvals), max(xvals)+1), map(str, reversed(range(min(xvals), max(xvals)+1))))
+ pyplot.xlabel(X_LABEL)
+ pyplot.ylabel(Y_LABEL)
+ pyplot.subplots_adjust(left=ADJUST[0], right=ADJUST[1], top=ADJUST[2], bottom=ADJUST[3])
+ ax = pyplot.gca()
+ for l in ax.get_xticklines() + ax.get_yticklines():
+ l.set_markeredgewidth(TICK_WIDTH)
+ pyplot.savefig(dataset_path)
+
+
+METHODS = {'closest': match_closest, 'largest': match_largest}
+
+
+def gff_attrs(d):
+ if not d:
+ return '.'
+ return ';'.join('%s=%s' % item for item in d.items())
+
+
+def parse_gff_attrs(s):
+ d = {}
+ if s == '.':
+ return d
+ for item in s.split(';'):
+ key, val = item.split('=')
+ d[key] = val
+ return d
+
+
+def gff_row(cname, start, end, score, source, type='.', strand='.', phase='.', attrs={}):
+ return (cname, source, type, start, end, score, strand, phase, gff_attrs(attrs))
+
+
+def get_temporary_plot_path():
+ """
+ Return the path to a temporary file with a valid image format
+ file extension that can be used with bioformats.
+ """
+ tmp_dir = tempfile.mkdtemp(prefix='tmp-repmatch-')
+ fd, name = tempfile.mkstemp(suffix='.pdf', dir=tmp_dir)
+ os.close(fd)
+ return name
+
+
+def process_files(dataset_paths, galaxy_hids, method, distance, step, replicates, up_limit, low_limit, output_files,
+ output_matched_peaks, output_unmatched_peaks, output_detail, output_statistics_table, output_statistics_histogram):
+ output_statistics_histogram_file = output_files in ["all"] and method in ["all"]
+ if len(dataset_paths) < 2:
+ return
+ if method == 'all':
+ match_methods = METHODS.keys()
+ else:
+ match_methods = [method]
+ for match_method in match_methods:
+ statistics = perform_process(dataset_paths,
+ galaxy_hids,
+ match_method,
+ distance,
+ step,
+ replicates,
+ up_limit,
+ low_limit,
+ output_files,
+ output_matched_peaks,
+ output_unmatched_peaks,
+ output_detail,
+ output_statistics_table,
+ output_statistics_histogram)
+ if output_statistics_histogram_file:
+ tmp_statistics_histogram_path = get_temporary_plot_path()
+ frequency_histogram([stat['distribution'] for stat in [statistics]],
+ tmp_statistics_histogram_path,
+ METHODS.keys())
+ shutil.move(tmp_statistics_histogram_path, output_statistics_histogram)
+
+
+def perform_process(dataset_paths, galaxy_hids, method, distance, step, num_required, up_limit, low_limit, output_files,
+ output_matched_peaks, output_unmatched_peaks, output_detail, output_statistics_table, output_statistics_histogram):
+ output_detail_file = output_files in ["all"] and output_detail is not None
+ output_statistics_table_file = output_files in ["all"] and output_statistics_table is not None
+ output_unmatched_peaks_file = output_files in ["all", "matched_peaks_unmatched_peaks"] and output_unmatched_peaks is not None
+ output_statistics_histogram_file = output_files in ["all"] and output_statistics_histogram is not None
+ replicates = []
+ for i, dataset_path in enumerate(dataset_paths):
+ try:
+ galaxy_hid = galaxy_hids[i]
+ r = Replicate(galaxy_hid, dataset_path)
+ replicates.append(r)
+ except Exception, e:
+ stop_err('Unable to parse file "%s", exception: %s' % (dataset_path, str(e)))
+ attrs = 'd%sr%s' % (distance, num_required)
+ if up_limit != 1000:
+ attrs += 'u%d' % up_limit
+ if low_limit != -1000:
+ attrs += 'l%d' % low_limit
+ if step != 0:
+ attrs += 's%d' % step
+
+ def td_writer(file_path):
+ # Returns a tab-delimited writer for a certain output
+ return csv.writer(open(file_path, 'wt'), delimiter='\t')
+
+ labels = ('chrom',
+ 'median midpoint',
+ 'median midpoint+1',
+ 'median normalized reads',
+ 'replicates',
+ 'median c-w distance',
+ 'reads sum')
+ for replicate in replicates:
+ labels += ('chrom',
+ 'median midpoint',
+ 'median midpoint+1',
+ 'c-w sum',
+ 'c-w distance',
+ 'replicate id')
+ matched_peaks_output = td_writer(output_matched_peaks)
+ if output_statistics_table_file:
+ statistics_table_output = td_writer(output_statistics_table)
+ statistics_table_output.writerow(('data', 'median read count'))
+ if output_detail_file:
+ detail_output = td_writer(output_detail)
+ detail_output.writerow(labels)
+ if output_unmatched_peaks_file:
+ unmatched_peaks_output = td_writer(output_unmatched_peaks)
+ unmatched_peaks_output.writerow(('chrom', 'midpoint', 'midpoint+1', 'c-w sum', 'c-w distance', 'replicate id'))
+ # Perform filtering
+ if up_limit < 1000 or low_limit > -1000:
+ for replicate in replicates:
+ replicate.filter(up_limit, low_limit)
+ # Actually merge the peaks
+ peak_groups = []
+ unmatched_peaks = []
+ freq = FrequencyDistribution()
+
+ def do_match(reps, distance):
+ # Copy list because we will mutate it, but keep replicate references.
+ reps = reps[:]
+ while len(reps) > 1:
+ # Iterate over each replicate as "main"
+ main = reps[0]
+ reps.remove(main)
+ for chromosome in main.chromosomes.values():
+ peaks_by_value = chromosome.peaks[:]
+ # Sort main replicate by value
+ peaks_by_value.sort(key=lambda peak: -peak.value)
+
+ def search_for_matches(group):
+ # Here we use multiple passes, expanding the window to be
+ # +- distance from any previously matched peak.
+ while True:
+ new_match = False
+ for replicate in reps:
+ if replicate.id in group.peaks:
+ # Stop if match already found for this replicate
+ continue
+ try:
+ # Lines changed to remove a major bug by Rohit Reja.
+ window, chrum = get_window(replicate.chromosomes[chromosome.name],
+ group.peaks.values(),
+ distance)
+ match = METHODS[method](window, peak, chrum)
+ except KeyError:
+ continue
+ if match:
+ group.add_peak(replicate.id, match)
+ new_match = True
+ if not new_match:
+ break
+ # Attempt to enlarge existing peak groups
+ for group in peak_groups:
+ old_peaks = group.peaks.values()[:]
+ search_for_matches(group)
+ for peak in group.peaks.values():
+ if peak not in old_peaks:
+ peak.replicate.chromosomes[chromosome.name].remove_peak(peak)
+ # Attempt to find new peaks groups. For each peak in the
+ # main replicate, search for matches in the other replicates
+ for peak in peaks_by_value:
+ matches = PeakGroup()
+ matches.add_peak(main.id, peak)
+ search_for_matches(matches)
+ # Were enough replicates matched?
+ if matches.num_replicates >= num_required:
+ for peak in matches.peaks.values():
+ peak.replicate.chromosomes[chromosome.name].remove_peak(peak)
+ peak_groups.append(matches)
+ # Zero or less = no stepping
+ if step <= 0:
+ do_match(replicates, distance)
+ else:
+ for d in range(0, distance, step):
+ do_match(replicates, d)
+ for group in peak_groups:
+ freq.add(group.num_replicates)
+ # Collect together the remaining unmatched_peaks
+ for replicate in replicates:
+ for chromosome in replicate.chromosomes.values():
+ for peak in chromosome.peaks:
+ freq.add(1)
+ unmatched_peaks.append(peak)
+ # Average the unmatched_peaks count in the graph by # replicates
+ med = median([peak.value for group in peak_groups for peak in group.peaks.values()])
+ for replicate in replicates:
+ replicate.median = median([peak.value for group in peak_groups for peak in group.peaks.values() if peak.replicate == replicate])
+ statistics_table_output.writerow((replicate.id, replicate.median))
+ for group in peak_groups:
+ # Output matched_peaks (matched pairs).
+ matched_peaks_output.writerow(gff_row(cname=group.chrom,
+ start=group.midpoint,
+ end=group.midpoint+1,
+ source='repmatch',
+ score=group.normalized_value(med),
+ attrs={'median_distance': group.median_distance,
+ 'replicates': group.num_replicates,
+ 'value_sum': group.value_sum}))
+ if output_detail_file:
+ matched_peaks = (group.chrom,
+ group.midpoint,
+ group.midpoint+1,
+ group.normalized_value(med),
+ group.num_replicates,
+ group.median_distance,
+ group.value_sum)
+ for peak in group.peaks.values():
+ matched_peaks += (peak.chrom, peak.midpoint, peak.midpoint+1, peak.value, peak.distance, peak.replicate.id)
+ detail_output.writerow(matched_peaks)
+ if output_unmatched_peaks_file:
+ for unmatched_peak in unmatched_peaks:
+ unmatched_peaks_output.writerow((unmatched_peak.chrom,
+ unmatched_peak.midpoint,
+ unmatched_peak.midpoint+1,
+ unmatched_peak.value,
+ unmatched_peak.distance,
+ unmatched_peak.replicate.id))
+ if output_statistics_histogram_file:
+ tmp_statistics_histogram_path = get_temporary_plot_path()
+ frequency_histogram([freq], tmp_statistics_histogram_path)
+ shutil.move(tmp_statistics_histogram_path, output_statistics_histogram)
+ return {'distribution': freq}
diff -r 000000000000 -r a072f0f30ea3 static/images/repmatch.png
Binary file static/images/repmatch.png has changed
diff -r 000000000000 -r a072f0f30ea3 test-data/closest_matched_pairs_input1.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/closest_matched_pairs_input1.gff Wed Dec 23 09:25:42 2015 -0500
@@ -0,0 +1,66 @@
+chr1 cwpair . 59 60 2881.0 . . cw_distance=2
+chr1 cwpair . 123 124 4204.0 . . cw_distance=52
+chr1 cwpair . 156 157 2177.0 . . cw_distance=59
+chr1 cwpair . 218 219 4022.0 . . cw_distance=14
+chr1 cwpair . 265 266 2474.0 . . cw_distance=48
+chr1 cwpair . 268 269 4088.0 . . cw_distance=6
+chr1 cwpair . 325 326 1171.0 . . cw_distance=16
+chr1 cwpair . 370 371 899.0 . . cw_distance=25
+chr1 cwpair . 388 389 359.0 . . cw_distance=20
+chr1 cwpair . 452 453 504.0 . . cw_distance=8
+chr1 cwpair . 500 501 569.0 . . cw_distance=-44
+chr1 cwpair . 668 669 319.0 . . cw_distance=-48
+chr1 cwpair . 6218 6219 2125.0 . . cw_distance=91
+chr1 cwpair . 6454 6455 1249.0 . . cw_distance=63
+chr1 cwpair . 6714 6715 433.0 . . cw_distance=-4
+chr1 cwpair . 19213 19214 778.0 . . cw_distance=-25
+chr1 cwpair . 22580 22581 863.0 . . cw_distance=-2
+chr1 cwpair . 25305 25306 1183.0 . . cw_distance=99
+chr1 cwpair . 31670 31671 490.0 . . cw_distance=66
+chr1 cwpair . 32483 32484 478.0 . . cw_distance=48
+chr1 cwpair . 39076 39077 1350.0 . . cw_distance=-29
+chr1 cwpair . 39237 39238 362.0 . . cw_distance=61
+chr1 cwpair . 45670 45671 493.0 . . cw_distance=-35
+chr1 cwpair . 55548 55549 956.0 . . cw_distance=86
+chr1 cwpair . 59228 59229 565.0 . . cw_distance=56
+chr1 cwpair . 65160 65161 618.0 . . cw_distance=-4
+chr1 cwpair . 70792 70793 2146.0 . . cw_distance=12
+chr1 cwpair . 72731 72732 710.0 . . cw_distance=100
+chr1 cwpair . 72805 72806 869.0 . . cw_distance=29
+chr1 cwpair . 86982 86983 2013.0 . . cw_distance=37
+chr1 cwpair . 87044 87045 1191.0 . . cw_distance=30
+chr1 cwpair . 87109 87110 2259.0 . . cw_distance=3
+chr1 cwpair . 87162 87163 5531.0 . . cw_distance=11
+chr1 cwpair . 87194 87195 3643.0 . . cw_distance=27
+chr1 cwpair . 92421 92422 1388.0 . . cw_distance=0
+chr1 cwpair . 92567 92568 789.0 . . cw_distance=28
+chr1 cwpair . 92645 92646 2397.0 . . cw_distance=8
+chr1 cwpair . 95955 95956 689.0 . . cw_distance=51
+chr1 cwpair . 96919 96920 12.0 . . cw_distance=3
+chr1 cwpair . 98551 98552 122.0 . . cw_distance=27
+chr1 cwpair . 101399 101400 2361.0 . . cw_distance=-44
+chr1 cwpair . 106047 106048 572.0 . . cw_distance=7
+chr1 cwpair . 108611 108612 573.0 . . cw_distance=-45
+chr1 cwpair . 113782 113783 716.0 . . cw_distance=-20
+chr1 cwpair . 116649 116650 773.0 . . cw_distance=-41
+chr1 cwpair . 124306 124307 761.0 . . cw_distance=-43
+chr1 cwpair . 134230 134231 659.0 . . cw_distance=100
+chr1 cwpair . 136369 136370 365.0 . . cw_distance=-14
+chr1 cwpair . 138876 138877 711.0 . . cw_distance=-4
+chr1 cwpair . 139230 139231 1179.0 . . cw_distance=15
+chr1 cwpair . 151365 151366 595.0 . . cw_distance=-28
+chr1 cwpair . 155079 155080 1573.0 . . cw_distance=83
+chr1 cwpair . 169095 169096 1887.0 . . cw_distance=-43
+chr1 cwpair . 170134 170135 657.0 . . cw_distance=10
+chr1 cwpair . 173276 173277 546.0 . . cw_distance=8
+chr1 cwpair . 180331 180332 97.0 . . cw_distance=82
+chr1 cwpair . 185109 185110 1371.0 . . cw_distance=46
+chr1 cwpair . 197535 197536 5.0 . . cw_distance=73
+chr1 cwpair . 199413 199414 810.0 . . cw_distance=-30
+chr1 cwpair . 203863 203864 1476.0 . . cw_distance=-37
+chr1 cwpair . 228672 228673 626.0 . . cw_distance=58
+chr1 cwpair . 229759 229760 4531.0 . . cw_distance=16
+chr1 cwpair . 229762 229763 699.0 . . cw_distance=63
+chr1 cwpair . 230125 230126 44.0 . . cw_distance=10
+chr1 cwpair . 230157 230158 15.0 . . cw_distance=5
+chr1 cwpair . 230178 230179 56.0 . . cw_distance=10
diff -r 000000000000 -r a072f0f30ea3 test-data/detail_out1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/detail_out1.tabular Wed Dec 23 09:25:42 2015 -0500
@@ -0,0 +1,65 @@
+chrom median midpoint median midpoint+1 median normalized reads replicates median c-w distance reads sum chrom median midpoint median midpoint+1 c-w sum c-w distance replicate id chrom median midpoint median midpoint+1 c-w sum c-w distance replicate id
+chr1 87168 87169 4488.704113924051 2 -1 9006.0 chr1 87162 87163 5531.0 11 1 chr1 87174 87175 3475.0 -13 2
+chr1 229759 229760 4512.3598101265825 2 16 9062.0 chr1 229759 229760 4531.0 16 1 chr1 229759 229760 4531.0 16 2
+chr1 123 124 4186.70506329114 2 52 8408.0 chr1 123 124 4204.0 52 1 chr1 123 124 4204.0 52 2
+chr1 262 263 3246.0278481012656 2 18 6512.0 chr1 268 269 4088.0 6 1 chr1 256 257 2424.0 30 2
+chr1 231 232 4699.198417721519 2 -13 9443.0 chr1 218 219 4022.0 14 1 chr1 245 246 5421.0 -40 2
+chr1 87188 87189 4647.554746835443 2 39 9342.0 chr1 87194 87195 3643.0 27 1 chr1 87182 87183 5699.0 51 2
+chr1 59 60 2869.1477848101267 2 2 5762.0 chr1 59 60 2881.0 2 1 chr1 59 60 2881.0 2 2
+chr1 257 258 2595.2319620253165 2 63 5213.0 chr1 265 266 2474.0 48 1 chr1 250 251 2739.0 78 2
+chr1 92651 92652 1420.1610759493672 2 20 2844.0 chr1 92645 92646 2397.0 8 1 chr1 92657 92658 447.0 33 2
+chr1 101399 101400 2351.2870253164556 2 -44 4722.0 chr1 101399 101400 2361.0 -44 1 chr1 101399 101400 2361.0 -44 2
+chr1 87109 87110 2249.7066455696204 2 3 4518.0 chr1 87109 87110 2259.0 3 1 chr1 87109 87110 2259.0 3 2
+chr1 156 157 2168.043987341772 2 59 4354.0 chr1 156 157 2177.0 59 1 chr1 156 157 2177.0 59 2
+chr1 70792 70793 2137.171518987342 2 12 4292.0 chr1 70792 70793 2146.0 12 1 chr1 70792 70793 2146.0 12 2
+chr1 6218 6219 2116.257911392405 2 91 4250.0 chr1 6218 6219 2125.0 91 1 chr1 6218 6219 2125.0 91 2
+chr1 86996 86997 2181.75 2 66 4383.0 chr1 86982 86983 2013.0 37 1 chr1 87011 87012 2370.0 95 2
+chr1 169095 169096 1879.2370253164559 2 -43 3774.0 chr1 169095 169096 1887.0 -43 1 chr1 169095 169096 1887.0 -43 2
+chr1 155079 155080 1566.5287974683545 2 83 3146.0 chr1 155079 155080 1573.0 83 1 chr1 155079 155080 1573.0 83 2
+chr1 203863 203864 1469.9278481012657 2 -37 2952.0 chr1 203863 203864 1476.0 -37 1 chr1 203863 203864 1476.0 -37 2
+chr1 92421 92422 1382.2898734177215 2 0 2776.0 chr1 92421 92422 1388.0 0 1 chr1 92421 92422 1388.0 0 2
+chr1 185109 185110 1365.3598101265823 2 46 2742.0 chr1 185109 185110 1371.0 46 1 chr1 185109 185110 1371.0 46 2
+chr1 39076 39077 1344.4462025316457 2 -29 2700.0 chr1 39076 39077 1350.0 -29 1 chr1 39076 39077 1350.0 -29 2
+chr1 6454 6455 1243.8617088607593 2 63 2498.0 chr1 6454 6455 1249.0 63 1 chr1 6454 6455 1249.0 63 2
+chr1 87029 87030 1009.0689873417721 2 1 2025.0 chr1 87044 87045 1191.0 30 1 chr1 87015 87016 834.0 -28 2
+chr1 25305 25306 1178.1332278481013 2 99 2366.0 chr1 25305 25306 1183.0 99 1 chr1 25305 25306 1183.0 99 2
+chr1 139230 139231 1174.1496835443038 2 15 2358.0 chr1 139230 139231 1179.0 15 1 chr1 139230 139231 1179.0 15 2
+chr1 335 336 1173.125 2 -5 2356.0 chr1 325 326 1171.0 16 1 chr1 345 346 1185.0 -25 2
+chr1 55548 55549 952.067088607595 2 86 1912.0 chr1 55548 55549 956.0 86 1 chr1 55548 55549 956.0 86 2
+chr1 360 361 888.3591772151899 2 45 1784.0 chr1 370 371 899.0 25 1 chr1 350 351 885.0 66 2
+chr1 72795 72796 961.6268987341772 2 9 1932.0 chr1 72805 72806 869.0 29 1 chr1 72786 72787 1063.0 -10 2
+chr1 22580 22581 859.4496835443038 2 -2 1726.0 chr1 22580 22581 863.0 -2 1 chr1 22580 22581 863.0 -2 2
+chr1 199413 199414 806.6677215189873 2 -30 1620.0 chr1 199413 199414 810.0 -30 1 chr1 199413 199414 810.0 -30 2
+chr1 92584 92585 1800.832911392405 2 62 3625.0 chr1 92567 92568 789.0 28 1 chr1 92601 92602 2836.0 96 2
+chr1 19213 19214 774.7993670886076 2 -25 1556.0 chr1 19213 19214 778.0 -25 1 chr1 19213 19214 778.0 -25 2
+chr1 116649 116650 769.8199367088607 2 -41 1546.0 chr1 116649 116650 773.0 -41 1 chr1 116649 116650 773.0 -41 2
+chr1 124306 124307 757.8693037974683 2 -43 1522.0 chr1 124306 124307 761.0 -43 1 chr1 124306 124307 761.0 -43 2
+chr1 113782 113783 713.0544303797469 2 -20 1432.0 chr1 113782 113783 716.0 -20 1 chr1 113782 113783 716.0 -20 2
+chr1 138876 138877 708.075 2 -4 1422.0 chr1 138876 138877 711.0 -4 1 chr1 138876 138877 711.0 -4 2
+chr1 229762 229763 696.1243670886076 2 63 1398.0 chr1 229762 229763 699.0 63 1 chr1 229762 229763 699.0 63 2
+chr1 95955 95956 686.1655063291139 2 51 1378.0 chr1 95955 95956 689.0 51 1 chr1 95955 95956 689.0 51 2
+chr1 134230 134231 656.2889240506329 2 100 1318.0 chr1 134230 134231 659.0 100 1 chr1 134230 134231 659.0 100 2
+chr1 170134 170135 654.2971518987342 2 10 1314.0 chr1 170134 170135 657.0 10 1 chr1 170134 170135 657.0 10 2
+chr1 228672 228673 623.4246835443038 2 58 1252.0 chr1 228672 228673 626.0 58 1 chr1 228672 228673 626.0 58 2
+chr1 65160 65161 615.4575949367088 2 -4 1236.0 chr1 65160 65161 618.0 -4 1 chr1 65160 65161 618.0 -4 2
+chr1 151365 151366 592.5522151898734 2 -28 1190.0 chr1 151365 151366 595.0 -28 1 chr1 151365 151366 595.0 -28 2
+chr1 108611 108612 570.6427215189874 2 -45 1146.0 chr1 108611 108612 573.0 -45 1 chr1 108611 108612 573.0 -45 2
+chr1 106047 106048 569.646835443038 2 7 1144.0 chr1 106047 106048 572.0 7 1 chr1 106047 106048 572.0 7 2
+chr1 481 482 682.2006329113924 2 -7 1371.0 chr1 500 501 569.0 -44 1 chr1 463 464 802.0 30 2
+chr1 59228 59229 562.6756329113924 2 56 1130.0 chr1 59228 59229 565.0 56 1 chr1 59228 59229 565.0 56 2
+chr1 173276 173277 543.7537974683544 2 8 1092.0 chr1 173276 173277 546.0 8 1 chr1 173276 173277 546.0 8 2
+chr1 434 435 431.5107594936709 2 43 866.0 chr1 452 453 504.0 8 1 chr1 417 418 362.0 78 2
+chr1 45670 45671 490.971835443038 2 -35 986.0 chr1 45670 45671 493.0 -35 1 chr1 45670 45671 493.0 -35 2
+chr1 31670 31671 487.9841772151899 2 66 980.0 chr1 31670 31671 490.0 66 1 chr1 31670 31671 490.0 66 2
+chr1 32483 32484 476.0335443037975 2 48 956.0 chr1 32483 32484 478.0 48 1 chr1 32483 32484 478.0 48 2
+chr1 6714 6715 431.218670886076 2 -4 866.0 chr1 6714 6715 433.0 -4 1 chr1 6714 6715 433.0 -4 2
+chr1 136369 136370 363.498417721519 2 -14 730.0 chr1 136369 136370 365.0 -14 1 chr1 136369 136370 365.0 -14 2
+chr1 39237 39238 360.5107594936709 2 61 724.0 chr1 39237 39238 362.0 61 1 chr1 39237 39238 362.0 61 2
+chr1 668 669 317.6876582278481 2 -48 638.0 chr1 668 669 319.0 -48 1 chr1 668 669 319.0 -48 2
+chr1 98551 98552 121.49810126582278 2 27 244.0 chr1 98551 98552 122.0 27 1 chr1 98551 98552 122.0 27 2
+chr1 180331 180332 96.60094936708862 2 82 194.0 chr1 180331 180332 97.0 82 1 chr1 180331 180332 97.0 82 2
+chr1 230172 230173 42.87658227848101 2 -2 86.0 chr1 230178 230179 56.0 10 1 chr1 230166 230167 30.0 -13 2
+chr1 230133 230134 26.95886075949367 2 -8 54.0 chr1 230125 230126 44.0 10 1 chr1 230142 230143 10.0 -25 2
+chr1 230154 230155 44.69145569620253 2 34 90.0 chr1 230157 230158 15.0 5 1 chr1 230151 230152 75.0 63 2
+chr1 96919 96920 11.950632911392404 2 3 24.0 chr1 96919 96920 12.0 3 1 chr1 96919 96920 12.0 3 2
+chr1 197535 197536 4.9794303797468356 2 73 10.0 chr1 197535 197536 5.0 73 1 chr1 197535 197536 5.0 73 2
diff -r 000000000000 -r a072f0f30ea3 test-data/largest_matched_pairs_input1.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/largest_matched_pairs_input1.gff Wed Dec 23 09:25:42 2015 -0500
@@ -0,0 +1,64 @@
+chr1 cwpair . 59 60 2881.0 . . cw_distance=2
+chr1 cwpair . 123 124 4204.0 . . cw_distance=52
+chr1 cwpair . 156 157 2177.0 . . cw_distance=59
+chr1 cwpair . 245 246 5421.0 . . cw_distance=-40
+chr1 cwpair . 250 251 2739.0 . . cw_distance=78
+chr1 cwpair . 256 257 2424.0 . . cw_distance=30
+chr1 cwpair . 345 346 1185.0 . . cw_distance=-25
+chr1 cwpair . 350 351 885.0 . . cw_distance=66
+chr1 cwpair . 417 418 362.0 . . cw_distance=78
+chr1 cwpair . 463 464 802.0 . . cw_distance=30
+chr1 cwpair . 668 669 319.0 . . cw_distance=-48
+chr1 cwpair . 6218 6219 2125.0 . . cw_distance=91
+chr1 cwpair . 6454 6455 1249.0 . . cw_distance=63
+chr1 cwpair . 6714 6715 433.0 . . cw_distance=-4
+chr1 cwpair . 19213 19214 778.0 . . cw_distance=-25
+chr1 cwpair . 22580 22581 863.0 . . cw_distance=-2
+chr1 cwpair . 25305 25306 1183.0 . . cw_distance=99
+chr1 cwpair . 31670 31671 490.0 . . cw_distance=66
+chr1 cwpair . 32483 32484 478.0 . . cw_distance=48
+chr1 cwpair . 39076 39077 1350.0 . . cw_distance=-29
+chr1 cwpair . 39237 39238 362.0 . . cw_distance=61
+chr1 cwpair . 45670 45671 493.0 . . cw_distance=-35
+chr1 cwpair . 55548 55549 956.0 . . cw_distance=86
+chr1 cwpair . 59228 59229 565.0 . . cw_distance=56
+chr1 cwpair . 65160 65161 618.0 . . cw_distance=-4
+chr1 cwpair . 70792 70793 2146.0 . . cw_distance=12
+chr1 cwpair . 72786 72787 1063.0 . . cw_distance=-10
+chr1 cwpair . 87011 87012 2370.0 . . cw_distance=95
+chr1 cwpair . 87015 87016 834.0 . . cw_distance=-28
+chr1 cwpair . 87109 87110 2259.0 . . cw_distance=3
+chr1 cwpair . 87174 87175 3475.0 . . cw_distance=-13
+chr1 cwpair . 87182 87183 5699.0 . . cw_distance=51
+chr1 cwpair . 92421 92422 1388.0 . . cw_distance=0
+chr1 cwpair . 92601 92602 2836.0 . . cw_distance=96
+chr1 cwpair . 92657 92658 447.0 . . cw_distance=33
+chr1 cwpair . 95955 95956 689.0 . . cw_distance=51
+chr1 cwpair . 96919 96920 12.0 . . cw_distance=3
+chr1 cwpair . 98551 98552 122.0 . . cw_distance=27
+chr1 cwpair . 101399 101400 2361.0 . . cw_distance=-44
+chr1 cwpair . 106047 106048 572.0 . . cw_distance=7
+chr1 cwpair . 108611 108612 573.0 . . cw_distance=-45
+chr1 cwpair . 113782 113783 716.0 . . cw_distance=-20
+chr1 cwpair . 116649 116650 773.0 . . cw_distance=-41
+chr1 cwpair . 124306 124307 761.0 . . cw_distance=-43
+chr1 cwpair . 134230 134231 659.0 . . cw_distance=100
+chr1 cwpair . 136369 136370 365.0 . . cw_distance=-14
+chr1 cwpair . 138876 138877 711.0 . . cw_distance=-4
+chr1 cwpair . 139230 139231 1179.0 . . cw_distance=15
+chr1 cwpair . 151365 151366 595.0 . . cw_distance=-28
+chr1 cwpair . 155079 155080 1573.0 . . cw_distance=83
+chr1 cwpair . 169095 169096 1887.0 . . cw_distance=-43
+chr1 cwpair . 170134 170135 657.0 . . cw_distance=10
+chr1 cwpair . 173276 173277 546.0 . . cw_distance=8
+chr1 cwpair . 180331 180332 97.0 . . cw_distance=82
+chr1 cwpair . 185109 185110 1371.0 . . cw_distance=46
+chr1 cwpair . 197535 197536 5.0 . . cw_distance=73
+chr1 cwpair . 199413 199414 810.0 . . cw_distance=-30
+chr1 cwpair . 203863 203864 1476.0 . . cw_distance=-37
+chr1 cwpair . 228672 228673 626.0 . . cw_distance=58
+chr1 cwpair . 229759 229760 4531.0 . . cw_distance=16
+chr1 cwpair . 229762 229763 699.0 . . cw_distance=63
+chr1 cwpair . 230142 230143 10.0 . . cw_distance=-25
+chr1 cwpair . 230151 230152 75.0 . . cw_distance=63
+chr1 cwpair . 230166 230167 30.0 . . cw_distance=-13
diff -r 000000000000 -r a072f0f30ea3 test-data/matched_peaks_out1.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/matched_peaks_out1.gff Wed Dec 23 09:25:42 2015 -0500
@@ -0,0 +1,64 @@
+chr1 repmatch . 87168 87169 4488.704113924051 . . median_distance=-1;value_sum=9006.0;replicates=2
+chr1 repmatch . 229759 229760 4512.3598101265825 . . median_distance=16;value_sum=9062.0;replicates=2
+chr1 repmatch . 123 124 4186.70506329114 . . median_distance=52;value_sum=8408.0;replicates=2
+chr1 repmatch . 262 263 3246.0278481012656 . . median_distance=18;value_sum=6512.0;replicates=2
+chr1 repmatch . 231 232 4699.198417721519 . . median_distance=-13;value_sum=9443.0;replicates=2
+chr1 repmatch . 87188 87189 4647.554746835443 . . median_distance=39;value_sum=9342.0;replicates=2
+chr1 repmatch . 59 60 2869.1477848101267 . . median_distance=2;value_sum=5762.0;replicates=2
+chr1 repmatch . 257 258 2595.2319620253165 . . median_distance=63;value_sum=5213.0;replicates=2
+chr1 repmatch . 92651 92652 1420.1610759493672 . . median_distance=20;value_sum=2844.0;replicates=2
+chr1 repmatch . 101399 101400 2351.2870253164556 . . median_distance=-44;value_sum=4722.0;replicates=2
+chr1 repmatch . 87109 87110 2249.7066455696204 . . median_distance=3;value_sum=4518.0;replicates=2
+chr1 repmatch . 156 157 2168.043987341772 . . median_distance=59;value_sum=4354.0;replicates=2
+chr1 repmatch . 70792 70793 2137.171518987342 . . median_distance=12;value_sum=4292.0;replicates=2
+chr1 repmatch . 6218 6219 2116.257911392405 . . median_distance=91;value_sum=4250.0;replicates=2
+chr1 repmatch . 86996 86997 2181.75 . . median_distance=66;value_sum=4383.0;replicates=2
+chr1 repmatch . 169095 169096 1879.2370253164559 . . median_distance=-43;value_sum=3774.0;replicates=2
+chr1 repmatch . 155079 155080 1566.5287974683545 . . median_distance=83;value_sum=3146.0;replicates=2
+chr1 repmatch . 203863 203864 1469.9278481012657 . . median_distance=-37;value_sum=2952.0;replicates=2
+chr1 repmatch . 92421 92422 1382.2898734177215 . . median_distance=0;value_sum=2776.0;replicates=2
+chr1 repmatch . 185109 185110 1365.3598101265823 . . median_distance=46;value_sum=2742.0;replicates=2
+chr1 repmatch . 39076 39077 1344.4462025316457 . . median_distance=-29;value_sum=2700.0;replicates=2
+chr1 repmatch . 6454 6455 1243.8617088607593 . . median_distance=63;value_sum=2498.0;replicates=2
+chr1 repmatch . 87029 87030 1009.0689873417721 . . median_distance=1;value_sum=2025.0;replicates=2
+chr1 repmatch . 25305 25306 1178.1332278481013 . . median_distance=99;value_sum=2366.0;replicates=2
+chr1 repmatch . 139230 139231 1174.1496835443038 . . median_distance=15;value_sum=2358.0;replicates=2
+chr1 repmatch . 335 336 1173.125 . . median_distance=-5;value_sum=2356.0;replicates=2
+chr1 repmatch . 55548 55549 952.067088607595 . . median_distance=86;value_sum=1912.0;replicates=2
+chr1 repmatch . 360 361 888.3591772151899 . . median_distance=45;value_sum=1784.0;replicates=2
+chr1 repmatch . 72795 72796 961.6268987341772 . . median_distance=9;value_sum=1932.0;replicates=2
+chr1 repmatch . 22580 22581 859.4496835443038 . . median_distance=-2;value_sum=1726.0;replicates=2
+chr1 repmatch . 199413 199414 806.6677215189873 . . median_distance=-30;value_sum=1620.0;replicates=2
+chr1 repmatch . 92584 92585 1800.832911392405 . . median_distance=62;value_sum=3625.0;replicates=2
+chr1 repmatch . 19213 19214 774.7993670886076 . . median_distance=-25;value_sum=1556.0;replicates=2
+chr1 repmatch . 116649 116650 769.8199367088607 . . median_distance=-41;value_sum=1546.0;replicates=2
+chr1 repmatch . 124306 124307 757.8693037974683 . . median_distance=-43;value_sum=1522.0;replicates=2
+chr1 repmatch . 113782 113783 713.0544303797469 . . median_distance=-20;value_sum=1432.0;replicates=2
+chr1 repmatch . 138876 138877 708.075 . . median_distance=-4;value_sum=1422.0;replicates=2
+chr1 repmatch . 229762 229763 696.1243670886076 . . median_distance=63;value_sum=1398.0;replicates=2
+chr1 repmatch . 95955 95956 686.1655063291139 . . median_distance=51;value_sum=1378.0;replicates=2
+chr1 repmatch . 134230 134231 656.2889240506329 . . median_distance=100;value_sum=1318.0;replicates=2
+chr1 repmatch . 170134 170135 654.2971518987342 . . median_distance=10;value_sum=1314.0;replicates=2
+chr1 repmatch . 228672 228673 623.4246835443038 . . median_distance=58;value_sum=1252.0;replicates=2
+chr1 repmatch . 65160 65161 615.4575949367088 . . median_distance=-4;value_sum=1236.0;replicates=2
+chr1 repmatch . 151365 151366 592.5522151898734 . . median_distance=-28;value_sum=1190.0;replicates=2
+chr1 repmatch . 108611 108612 570.6427215189874 . . median_distance=-45;value_sum=1146.0;replicates=2
+chr1 repmatch . 106047 106048 569.646835443038 . . median_distance=7;value_sum=1144.0;replicates=2
+chr1 repmatch . 481 482 682.2006329113924 . . median_distance=-7;value_sum=1371.0;replicates=2
+chr1 repmatch . 59228 59229 562.6756329113924 . . median_distance=56;value_sum=1130.0;replicates=2
+chr1 repmatch . 173276 173277 543.7537974683544 . . median_distance=8;value_sum=1092.0;replicates=2
+chr1 repmatch . 434 435 431.5107594936709 . . median_distance=43;value_sum=866.0;replicates=2
+chr1 repmatch . 45670 45671 490.971835443038 . . median_distance=-35;value_sum=986.0;replicates=2
+chr1 repmatch . 31670 31671 487.9841772151899 . . median_distance=66;value_sum=980.0;replicates=2
+chr1 repmatch . 32483 32484 476.0335443037975 . . median_distance=48;value_sum=956.0;replicates=2
+chr1 repmatch . 6714 6715 431.218670886076 . . median_distance=-4;value_sum=866.0;replicates=2
+chr1 repmatch . 136369 136370 363.498417721519 . . median_distance=-14;value_sum=730.0;replicates=2
+chr1 repmatch . 39237 39238 360.5107594936709 . . median_distance=61;value_sum=724.0;replicates=2
+chr1 repmatch . 668 669 317.6876582278481 . . median_distance=-48;value_sum=638.0;replicates=2
+chr1 repmatch . 98551 98552 121.49810126582278 . . median_distance=27;value_sum=244.0;replicates=2
+chr1 repmatch . 180331 180332 96.60094936708862 . . median_distance=82;value_sum=194.0;replicates=2
+chr1 repmatch . 230172 230173 42.87658227848101 . . median_distance=-2;value_sum=86.0;replicates=2
+chr1 repmatch . 230133 230134 26.95886075949367 . . median_distance=-8;value_sum=54.0;replicates=2
+chr1 repmatch . 230154 230155 44.69145569620253 . . median_distance=34;value_sum=90.0;replicates=2
+chr1 repmatch . 96919 96920 11.950632911392404 . . median_distance=3;value_sum=24.0;replicates=2
+chr1 repmatch . 197535 197536 4.9794303797468356 . . median_distance=73;value_sum=10.0;replicates=2
diff -r 000000000000 -r a072f0f30ea3 test-data/statistics_histogram_out1.pdf
Binary file test-data/statistics_histogram_out1.pdf has changed
diff -r 000000000000 -r a072f0f30ea3 test-data/statistics_table_out1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/statistics_table_out1.tabular Wed Dec 23 09:25:42 2015 -0500
@@ -0,0 +1,3 @@
+data median read count
+1 783.5
+2 790.0
diff -r 000000000000 -r a072f0f30ea3 test-data/unmatched_peaks_out1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unmatched_peaks_out1.tabular Wed Dec 23 09:25:42 2015 -0500
@@ -0,0 +1,3 @@
+chrom midpoint midpoint+1 c-w sum c-w distance replicate id
+chr1 388 389 359.0 20 1
+chr1 72731 72732 710.0 100 1
diff -r 000000000000 -r a072f0f30ea3 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed Dec 23 09:25:42 2015 -0500
@@ -0,0 +1,6 @@
+
+
+
+
+
+