changeset 0:4aa1ee5d8510 draft

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 10:52:29 -0400
parents
children 372b2f5668f3
files microsats_mutability.py microsats_mutability.xml test-data/ortho_ms.tab test-data/ortho_ms_mut.tab tool_dependencies.xml
diffstat 5 files changed, 854 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/microsats_mutability.py	Tue Apr 01 10:52:29 2014 -0400
@@ -0,0 +1,494 @@
+#!/usr/bin/env python
+#Guruprasad Ananda
+"""
+This tool computes microsatellite mutability for the orthologous microsatellites fetched from  'Extract Orthologous Microsatellites from pair-wise alignments' tool.
+"""
+import fileinput
+import string
+import sys
+import tempfile
+from galaxy.tools.util.galaxyops import *
+from bx.intervals.io import *
+from bx.intervals.operations import quicksect
+
+fout = open(sys.argv[2],'w')
+p_group = int(sys.argv[3])        #primary "group-by" feature
+p_bin_size = int(sys.argv[4])
+s_group = int(sys.argv[5])        #sub-group by feature
+s_bin_size = int(sys.argv[6])
+mono_threshold = 9
+non_mono_threshold = 4
+p_group_cols = [p_group, p_group+7]
+s_group_cols = [s_group, s_group+7]
+num_generations = int(sys.argv[7])
+region = sys.argv[8]
+int_file = sys.argv[9]
+if int_file != "None": #User has specified an interval file
+    try:
+        fint = open(int_file, 'r')
+        dbkey_i = sys.argv[10]
+        chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[11] )
+    except:
+        stop_err("Unable to open input Interval file")
+
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit()
+
+
+def reverse_complement(text):
+    DNA_COMP = string.maketrans( "ACGTacgt", "TGCAtgca" )
+    comp = [ch for ch in text.translate(DNA_COMP)]
+    comp.reverse()
+    return "".join(comp)
+
+
+def get_unique_elems(elems):
+    seen = set()
+    return[x for x in elems if x not in seen and not seen.add(x)]
+
+
+def get_binned_lists(uniqlist, binsize):
+    binnedlist = []
+    uniqlist.sort()
+    start = int(uniqlist[0])
+    bin_ind = 0
+    l_ind = 0
+    binnedlist.append([])
+    while l_ind < len(uniqlist):
+        elem = int(uniqlist[l_ind])
+        if elem in range(start, start+binsize):
+            binnedlist[bin_ind].append(elem)
+        else:
+            start += binsize
+            bin_ind += 1
+            binnedlist.append([])
+            binnedlist[bin_ind].append(elem)
+        l_ind += 1
+    return binnedlist
+
+
+def fetch_weight(H, C, t):
+    if (H-(C-H)) < t:
+        return 2.0
+    else:
+        return 1.0
+
+
+def mutabilityEstimator(repeats1, repeats2, thresholds):
+    mut_num = 0.0    #Mutability Numerator
+    mut_den = 0.0    #Mutability denominator
+    for ind, H in enumerate(repeats1):
+        C = repeats2[ind]
+        t = thresholds[ind]
+        w = fetch_weight(H, C, t)
+        mut_num += ((H-C)*(H-C)*w)
+        mut_den += w
+    return [mut_num, mut_den]
+
+
+def output_writer(blk, blk_lines):
+    global winspecies, speciesind
+    all_elems_1 = []
+    all_elems_2 = []
+    all_s_elems_1 = []
+    all_s_elems_2 = []
+    for bline in blk_lines:
+        if not(bline):
+            continue
+        items = bline.split('\t')
+        seq1 = items[1]
+        seq2 = items[8]
+        if p_group_cols[0] == 6:
+            items[p_group_cols[0]] = int(items[p_group_cols[0]])
+            items[p_group_cols[1]] = int(items[p_group_cols[1]])
+        if s_group_cols[0] == 6:
+            items[s_group_cols[0]] = int(items[s_group_cols[0]])
+            items[s_group_cols[1]] = int(items[s_group_cols[1]])
+        all_elems_1.append(items[p_group_cols[0]])    #primary col elements for species 1
+        all_elems_2.append(items[p_group_cols[1]])    #primary col elements for species 2
+        if s_group_cols[0] != -1:    #sub-group is not None
+            all_s_elems_1.append(items[s_group_cols[0]])    #secondary col elements for species 1
+            all_s_elems_2.append(items[s_group_cols[1]])    #secondary col elements for species 2
+    uniq_elems_1 = get_unique_elems(all_elems_1)
+    uniq_elems_2 = get_unique_elems(all_elems_2)
+    if s_group_cols[0] != -1:
+        uniq_s_elems_1 = get_unique_elems(all_s_elems_1)
+        uniq_s_elems_2 = get_unique_elems(all_s_elems_2)
+    mut1 = {}
+    mut2 = {}
+    count1 = {}
+    count2 = {}
+    """
+    if p_group_cols[0] == 7:    #i.e. the option chosen is group-by unit(AG, GTC, etc)
+        uniq_elems_1 = get_unique_units(j.sort(lambda x, y: len(x)-len(y)))
+    """
+    if p_group_cols[0] == 6:    #i.e. the option chosen is group-by repeat number.
+        uniq_elems_1 = get_binned_lists( uniq_elems_1, p_bin_size )
+        uniq_elems_2 = get_binned_lists( uniq_elems_2, p_bin_size )
+        
+    if s_group_cols[0] == 6:    #i.e. the option chosen is subgroup-by repeat number.
+        uniq_s_elems_1 = get_binned_lists( uniq_s_elems_1, s_bin_size )
+        uniq_s_elems_2 = get_binned_lists( uniq_s_elems_2, s_bin_size )
+
+    for pitem1 in uniq_elems_1:
+        #repeats1 = []
+        #repeats2 = []
+        thresholds = []
+        if s_group_cols[0] != -1:    #Sub-group by feature is not None
+            for sitem1 in uniq_s_elems_1:
+                repeats1 = []
+                repeats2 = []
+                if type(sitem1) == type(''):
+                    sitem1 = sitem1.strip()
+                for bline in blk_lines:
+                    belems = bline.split('\t')
+                    if type(pitem1) == list:
+                        if p_group_cols[0] == 6:
+                            belems[p_group_cols[0]] = int(belems[p_group_cols[0]])
+                        if belems[p_group_cols[0]] in pitem1:
+                            if belems[s_group_cols[0]] == sitem1:
+                                repeats1.append(int(belems[6]))
+                                repeats2.append(int(belems[13]))
+                                if belems[4] == 'mononucleotide':
+                                    thresholds.append(mono_threshold)
+                                else:
+                                    thresholds.append(non_mono_threshold)
+                                mut1[str(pitem1)+'\t'+str(sitem1)] = mutabilityEstimator( repeats1, repeats2, thresholds )
+                                if region == 'align':
+                                    count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) )
+                                else:
+                                    if winspecies == 1:
+                                        count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats1)
+                                    elif winspecies == 2:
+                                        count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats2)
+                    else:
+                        if type(sitem1) == list:
+                            if s_group_cols[0] == 6:
+                                belems[s_group_cols[0]] = int(belems[s_group_cols[0]])
+                            if belems[p_group_cols[0]] == pitem1 and belems[s_group_cols[0]] in sitem1:
+                                repeats1.append(int(belems[6]))
+                                repeats2.append(int(belems[13]))
+                                if belems[4] == 'mononucleotide':
+                                    thresholds.append(mono_threshold)
+                                else:
+                                    thresholds.append(non_mono_threshold)
+                                mut1["%s\t%s" % ( pitem1, sitem1 )] = mutabilityEstimator( repeats1, repeats2, thresholds )
+                                if region == 'align':
+                                    count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) )
+                                else:
+                                    if winspecies == 1:
+                                        count1[str(pitem1)+'\t'+str(sitem1)] = sum(repeats1)
+                                    elif winspecies == 2:
+                                        count1[str(pitem1)+'\t'+str(sitem1)] = sum(repeats2)
+                        else:
+                            if belems[p_group_cols[0]] == pitem1 and belems[s_group_cols[0]] == sitem1:
+                                repeats1.append(int(belems[6]))
+                                repeats2.append(int(belems[13]))
+                                if belems[4] == 'mononucleotide':
+                                    thresholds.append(mono_threshold)
+                                else:
+                                    thresholds.append(non_mono_threshold)
+                                mut1["%s\t%s" % ( pitem1, sitem1 )] = mutabilityEstimator( repeats1, repeats2, thresholds )
+                                if region == 'align':
+                                    count1[str(pitem1)+'\t'+str(sitem1)] = min( sum(repeats1), sum(repeats2) )
+                                else:
+                                    if winspecies == 1:
+                                        count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats1)
+                                    elif winspecies == 2:
+                                        count1["%s\t%s" % ( pitem1, sitem1 )] = sum(repeats2)
+        else:   #Sub-group by feature is None
+            for bline in blk_lines:
+                belems = bline.split('\t')
+                if type(pitem1) == list:
+                    #print >> sys.stderr, "item: " + str(item1)
+                    if p_group_cols[0] == 6:
+                        belems[p_group_cols[0]] = int(belems[p_group_cols[0]])
+                    if belems[p_group_cols[0]] in pitem1:
+                        repeats1.append(int(belems[6]))
+                        repeats2.append(int(belems[13]))
+                        if belems[4] == 'mononucleotide':
+                            thresholds.append(mono_threshold)
+                        else:
+                            thresholds.append(non_mono_threshold)
+                else:
+                    if belems[p_group_cols[0]] == pitem1:
+                        repeats1.append(int(belems[6]))
+                        repeats2.append(int(belems[13]))
+                        if belems[4] == 'mononucleotide':
+                            thresholds.append(mono_threshold)
+                        else:
+                            thresholds.append(non_mono_threshold)
+            mut1["%s" % (pitem1)] = mutabilityEstimator( repeats1, repeats2, thresholds )
+            if region == 'align':
+                count1["%s" % (pitem1)] = min( sum(repeats1), sum(repeats2) )
+            else:
+                if winspecies == 1:
+                    count1[str(pitem1)] = sum(repeats1)
+                elif winspecies == 2:
+                    count1[str(pitem1)] = sum(repeats2)
+                
+    for pitem2 in uniq_elems_2:
+        #repeats1 = []
+        #repeats2 = []
+        thresholds = []
+        if s_group_cols[0] != -1:    #Sub-group by feature is not None
+            for sitem2 in uniq_s_elems_2:
+                repeats1 = []
+                repeats2 = []
+                if type(sitem2)==type(''):
+                    sitem2 = sitem2.strip()
+                for bline in blk_lines:
+                    belems = bline.split('\t')
+                    if type(pitem2) == list:
+                        if p_group_cols[0] == 6:
+                            belems[p_group_cols[1]] = int(belems[p_group_cols[1]])
+                        if belems[p_group_cols[1]] in pitem2 and belems[s_group_cols[1]] == sitem2:
+                            repeats2.append(int(belems[13]))
+                            repeats1.append(int(belems[6]))
+                            if belems[4] == 'mononucleotide':
+                                thresholds.append(mono_threshold)
+                            else:
+                                thresholds.append(non_mono_threshold)
+                            mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds )
+                            #count2[str(pitem2)+'\t'+str(sitem2)]=len(repeats2)
+                            if region == 'align':
+                                count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) )
+                            else:
+                                if winspecies == 1:
+                                    count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2)
+                                elif winspecies == 2:
+                                    count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1)
+                    else:
+                        if type(sitem2) == list:
+                            if s_group_cols[0] == 6:
+                                belems[s_group_cols[1]] = int(belems[s_group_cols[1]])
+                            if belems[p_group_cols[1]] == pitem2 and belems[s_group_cols[1]] in sitem2:
+                                repeats2.append(int(belems[13]))
+                                repeats1.append(int(belems[6]))
+                                if belems[4] == 'mononucleotide':
+                                    thresholds.append(mono_threshold)
+                                else:
+                                    thresholds.append(non_mono_threshold)
+                                mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds )
+                                if region == 'align':
+                                    count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) )
+                                else:
+                                    if winspecies == 1:
+                                        count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2)
+                                    elif winspecies == 2:
+                                        count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1)
+                        else:
+                            if belems[p_group_cols[1]] == pitem2 and belems[s_group_cols[1]] == sitem2:
+                                repeats1.append(int(belems[13]))
+                                repeats2.append(int(belems[6]))
+                                if belems[4] == 'mononucleotide':
+                                    thresholds.append(mono_threshold)
+                                else:
+                                    thresholds.append(non_mono_threshold)
+                                mut2["%s\t%s" % ( pitem2, sitem2 )] = mutabilityEstimator( repeats2, repeats1, thresholds )
+                                if region == 'align':
+                                    count2["%s\t%s" % ( pitem2, sitem2 )] = min( sum(repeats1), sum(repeats2) )
+                                else:
+                                    if winspecies == 1:
+                                        count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats2)
+                                    elif winspecies == 2:
+                                        count2["%s\t%s" % ( pitem2, sitem2 )] = len(repeats1)
+        else:   #Sub-group by feature is None
+            for bline in blk_lines:
+                belems = bline.split('\t')
+                if type(pitem2) == list:
+                    if p_group_cols[0] == 6:
+                        belems[p_group_cols[1]] = int(belems[p_group_cols[1]])
+                    if belems[p_group_cols[1]] in pitem2:
+                        repeats2.append(int(belems[13]))
+                        repeats1.append(int(belems[6]))
+                        if belems[4] == 'mononucleotide':
+                            thresholds.append(mono_threshold)
+                        else:
+                            thresholds.append(non_mono_threshold)
+                else:
+                    if belems[p_group_cols[1]] == pitem2:
+                        repeats2.append(int(belems[13]))
+                        repeats1.append(int(belems[6]))
+                        if belems[4] == 'mononucleotide':
+                            thresholds.append(mono_threshold)
+                        else:
+                            thresholds.append(non_mono_threshold)
+            mut2["%s" % (pitem2)] = mutabilityEstimator( repeats2, repeats1, thresholds )
+            if region == 'align':
+                count2["%s" % (pitem2)] = min( sum(repeats1), sum(repeats2) )
+            else:
+                if winspecies == 1:
+                    count2["%s" % (pitem2)] = sum(repeats2)
+                elif winspecies == 2:
+                    count2["%s" % (pitem2)] = sum(repeats1)
+    for key in mut1.keys():
+        if key in mut2.keys():
+            mut = (mut1[key][0]+mut2[key][0])/(mut1[key][1]+mut2[key][1])
+            count = count1[key]
+            del mut2[key]
+        else:
+            unit_found = False
+            if p_group_cols[0] == 7 or s_group_cols[0] == 7: #if it is Repeat Unit (AG, GCT etc.) check for reverse-complements too
+                if p_group_cols[0] == 7:
+                    this, other = 0, 1
+                else:
+                    this, other = 1, 0
+                groups1 = key.split('\t')
+                mutn = mut1[key][0]
+                mutd = mut1[key][1]
+                count = 0
+                for key2 in mut2.keys():
+                    groups2 = key2.split('\t')
+                    if groups1[other] == groups2[other]:
+                        if groups1[this] in groups2[this]*2 or reverse_complement(groups1[this]) in groups2[this]*2:
+                            #mut = (mut1[key][0]+mut2[key2][0])/(mut1[key][1]+mut2[key2][1])
+                            mutn += mut2[key2][0]
+                            mutd += mut2[key2][1]
+                            count += int(count2[key2])
+                            unit_found = True
+                            del mut2[key2]
+                            #break
+            if unit_found:
+                mut = mutn/mutd
+            else:
+                mut = mut1[key][0]/mut1[key][1]
+                count = count1[key]
+        mut = "%.2e" % (mut/num_generations)
+        if region == 'align':
+            print >> fout, str(blk) + '\t'+seq1 + '\t' + seq2 + '\t' +key.strip()+ '\t'+str(mut) + '\t'+ str(count)
+        elif region == 'win':
+            fout.write("%s\t%s\t%s\t%s\n" % ( blk, key.strip(), mut, count ))
+            fout.flush()
+            
+    #catch any remaining repeats, for instance if the orthologous position contained different repeat units
+    for remaining_key in mut2.keys():
+        mut = mut2[remaining_key][0]/mut2[remaining_key][1]
+        mut = "%.2e" % (mut/num_generations)
+        count = count2[remaining_key]
+        if region == 'align':
+            print >> fout, str(blk) + '\t'+seq1 + '\t'+seq2 + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count)
+        elif region == 'win':
+            fout.write("%s\t%s\t%s\t%s\n" % ( blk, remaining_key.strip(), mut, count ))
+            fout.flush()
+            #print >> fout, blk + '\t'+remaining_key.strip()+ '\t'+str(mut)+ '\t'+ str(count)
+
+
+def counter(node, start, end, report_func):
+    if start <= node.start < end and start < node.end <= end:
+        report_func(node)
+        if node.right:
+            counter(node.right, start, end, report_func)
+        if node.left:
+            counter(node.left, start, end, report_func)
+    elif node.start < start and node.right:
+        counter(node.right, start, end, report_func)
+    elif node.start >= end and node.left and node.left.maxend > start:
+        counter(node.left, start, end, report_func)
+
+
+def main():
+    infile = sys.argv[1]
+    
+    for i, line in enumerate( file ( infile )):
+        line = line.rstrip('\r\n')
+        if len( line )>0 and not line.startswith( '#' ):
+            elems = line.split( '\t' )
+            break
+        if i == 30:
+            break # Hopefully we'll never get here...
+    
+    if len( elems ) != 15:
+        stop_err( "This tool only works on tabular data output by 'Extract Orthologous Microsatellites from pair-wise alignments' tool. The data in your input dataset is either missing or not formatted properly." )
+    global winspecies, speciesind
+    if region == 'win':
+        if dbkey_i in elems[1]:
+            winspecies = 1
+            speciesind = 1
+        elif dbkey_i in elems[8]:
+            winspecies = 2
+            speciesind = 8
+        else:
+            stop_err("The species build corresponding to your interval file is not present in the Microsatellite file.")
+        
+    fin = open(infile, 'r')
+    skipped = 0
+    linestr = ""
+    
+    if region == 'win':
+        msats = NiceReaderWrapper( fileinput.FileInput( infile ),
+                                chrom_col = speciesind,
+                                start_col = speciesind+1,
+                                end_col = speciesind+2,
+                                strand_col = -1,
+                                fix_strand = True)
+        msatTree = quicksect.IntervalTree()
+        for item in msats:
+            if type( item ) is GenomicInterval:
+                msatTree.insert( item, msats.linenum, item.fields )
+        
+        for iline in fint:
+            try:
+                iline = iline.rstrip('\r\n')
+                if not(iline) or iline == "":
+                    continue
+                ielems = iline.strip("\r\n").split('\t')
+                ichr = ielems[chr_col_i]
+                istart = int(ielems[start_col_i])
+                iend = int(ielems[end_col_i])
+                isrc = "%s.%s" % ( dbkey_i, ichr )
+                if isrc not in msatTree.chroms:
+                    continue
+                result = []
+                root = msatTree.chroms[isrc]    #root node for the chrom
+                counter(root, istart, iend, lambda node: result.append( node ))
+                if not(result):
+                    continue
+                tmpfile1 = tempfile.NamedTemporaryFile('wb+')
+                for node in result:
+                    tmpfile1.write("%s\n" % "\t".join( node.other ))
+                
+                tmpfile1.seek(0)
+                output_writer(iline, tmpfile1.readlines())
+            except:
+                skipped += 1
+        if skipped:
+            print "Skipped %d intervals as invalid." % (skipped)
+    elif region == 'align':
+        if s_group_cols[0] != -1:
+            print >> fout, "#Window\tSpecies_1\tSpecies_2\tGroupby_Feature\tSubGroupby_Feature\tMutability\tCount"
+        else:
+            print >> fout, "#Window\tSpecies_1\tWindow_Start\tWindow_End\tSpecies_2\tGroupby_Feature\tMutability\tCount"
+        prev_bnum = -1
+        try:
+            for line in fin:
+                line = line.strip("\r\n")
+                if not(line) or line == "":
+                    continue
+                elems = line.split('\t')
+                try:
+                    assert int(elems[0])
+                    assert len(elems) == 15
+                except:
+                    continue
+                new_bnum = int(elems[0])
+                if new_bnum != prev_bnum:
+                    if prev_bnum != -1:
+                        output_writer(prev_bnum, linestr.strip().replace('\r','\n').split('\n'))
+                    linestr = line + "\n"
+                else:
+                    linestr += line
+                    linestr += "\n"
+                prev_bnum = new_bnum
+            output_writer(prev_bnum, linestr.strip().replace('\r','\n').split('\n'))
+        except Exception, ea:
+            print >> sys.stderr, ea
+            skipped += 1
+        if skipped:
+            print "Skipped %d lines as invalid." % (skipped)
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/microsats_mutability.xml	Tue Apr 01 10:52:29 2014 -0400
@@ -0,0 +1,125 @@
+<tool id="microsats_mutability1" name="Estimate microsatellite mutability" version="1.1.0">
+  <description>by specified attributes</description>
+  <requirements>
+    <requirement type="package" version="0.7.1">bx-python</requirement>
+    <requirement type="package" version="1.0.0">galaxy-ops</requirement>
+  </requirements>
+  <command interpreter="python">
+  	microsats_mutability.py 
+  	$input1 
+  	$out_file1 
+  	${pri_condition.primary_group} 
+  	#if $pri_condition.primary_group == "6":
+      ${pri_condition.binsize} ${pri_condition.subgroup} -1 
+    #else:
+      0 ${pri_condition.sub_condition.subgroup} 
+      #if $pri_condition.sub_condition.subgroup == "6":
+       ${pri_condition.sub_condition.s_binsize}
+      #else:
+       -1
+      #end if
+    #end if
+  	$gens
+    ${region.type}
+    #if $region.type == "win":
+      ${region.input2} $input2.dbkey $input2.metadata.chromCol,$input2.metadata.startCol,$input2.metadata.endCol,$input2.metadata.strandCol
+    #else:
+      "None"
+    #end if
+  </command>
+  <inputs>
+    <page>
+      <param name="input1" type="data" format="tabular" label="Select dataset containing Orthologous microsatellites"/>
+      <conditional name="region">
+	      <param name="type" type="select" label="Estimate rates corresponding to" multiple="false">
+	         <option value="align">Alignment block</option>
+	         <option value="win">Intervals in your history</option>
+	     </param>
+	     <when value="win">
+	      	<param format="interval" name="input2" type="data" label="Choose intervals">
+	      		<validator type="unspecified_build" />
+	    	</param>
+	      </when>
+	      <when value="align" />
+      </conditional>
+      <param name="gens" size="10" type="integer" value="1" label="Number of generations between the two species in input file"/>
+      <conditional name="pri_condition">
+	      <param name="primary_group" type="select" label="Group by" multiple="false">
+	         <option value="4">Motif type (mono/di/tri etc.)</option>
+	         <option value="7">Repeat Unit (AG, GCT etc.)</option>
+	         <option value="6">Repeat Number </option>
+	      </param>
+	      <when value="6">
+	      	<param name="binsize" size="10" type="integer" value="1" label="Bin-size" help="Bin-size denotes the number of repeat numbers to be considered as a group. Bin-size of 5 will group every 5 consecutive repeat numbers into a group."/>
+	      	<param name="subgroup" type="select" label="Sub-group by" multiple="false">
+		      <option value="-1">None</option>
+			  <option value="4">Motif type (mono/di/tri etc.)</option>
+			  <option value="7">Repeat Unit (AG, GCT etc.)</option>
+			</param>
+	      </when>
+	      <when value="7">
+	        <conditional name="sub_condition">
+	    	   <param name="subgroup" type="select" label="Sub-group by" multiple="false">
+		    	 <option value="-1">None</option>
+				 <option value="4">Motif type (mono/di/tri etc.)</option>
+				 <option value="6">Repeat Number </option>
+			   </param>
+			   <when value="-1"></when>
+		       <when value="4"></when>
+		       <when value="6">
+		      	  <param name="s_binsize" size="10" type="integer" value="1" label="Bin size" help="Bin-size denotes the number of repeat numbers to be considered as a group. Bin-size of 5 will group every 5 consecutive repeat numbers into a group."/>
+		       </when>
+			</conditional>
+	      </when>
+	      <when value="4">
+			<conditional name="sub_condition">
+	    	   <param name="subgroup" type="select" label="Sub-group by" multiple="false">
+		    	 <option value="-1">None</option>
+				 <option value="7">Repeat Unit (AG, GCT etc.)</option>
+				 <option value="6">Repeat Number </option>
+			   </param>
+			   <when value="-1"></when>
+		       <when value="7"></when>
+			   <when value="6">
+		      	  <param name="s_binsize" size="10" type="integer" value="1" label="Bin size" help="Bin-size denotes the number of repeat numbers to be considered as a group. Bin-size of 5 will group every 5 consecutive repeat numbers into a group."/>
+		       </when>
+			</conditional>
+	      </when>
+      </conditional>
+    </page>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="out_file1" />
+  </outputs>
+  <!-- 
+  <tests>
+    <test>
+      <param name="input1" value="ortho_ms.tab"/>
+      <param name="type" value="align"/>
+      <param name="gens" value="1"/>
+      <param name="primary_group" value="4"/>
+      <param name="sub_condition|subgroup" value="7"/>
+      <output name="out_file1" file="ortho_ms_mut.tab"/>
+    </test>
+  </tests>
+   -->
+<help>
+.. class:: infomark
+
+**What it does**
+
+This tool computes microsatellite mutability for the orthologous microsatellites fetched from  'Extract Orthologous Microsatellites from pair-wise alignments' tool.
+
+Mutability is computed according to the method described in the following paper:
+
+*Webster et al., Microsatellite evolution inferred from human-chimpanzee genomic  sequence alignments, Proc Natl Acad Sci 2002 June 25; 99(13): 8748-8753*
+
+-----
+
+.. class:: warningmark
+
+**Note**
+
+The user selected group and subgroup by features, the computed mutability and the count of the number of repeats used to compute that mutability are added as columns to the output.
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ortho_ms.tab	Tue Apr 01 10:52:29 2014 -0400
@@ -0,0 +1,151 @@
+#Block	Seq1_Name	Seq1_Start	Seq1_End	Seq1_Type	Seq1_Length	Seq1_RepeatNumber	Seq1_Unit	Seq2_Name	Seq2_Start	Seq2_End	Seq2_Type	Seq2_Length	Seq2_RepeatNumber	Seq2_Unit
+5	hg18.chr1	6483	6496	trinucleotide	13	4	GCT	panTro2.chr15	100042575	100042588	trinucleotide	13	4	GCT
+5	hg18.chr1	7111	7119	dinucleotide	8	4	CT	panTro2.chr15	100043212	100043220	dinucleotide	8	4	CT
+5	hg18.chr1	6483	6496	trinucleotide	13	4	GCT	panTro2.chr15	100042575	100042588	trinucleotide	13	4	GCT
+5	hg18.chr1	7111	7119	dinucleotide	8	4	CT	panTro2.chr15	100043212	100043220	dinucleotide	8	4	CT
+9	hg18.chr1	11258	11267	dinucleotide	9	4	CT	panTro2.chr9_random	4677431	4677440	dinucleotide	9	4	CT
+10	hg18.chr1	11881	11889	dinucleotide	8	4	TC	panTro2.chr15	100031465	100031473	dinucleotide	8	4	TC
+10	hg18.chr1	18275	18284	mononucleotide	9	9	A	panTro2.chr15	100037854	100037863	mononucleotide	9	9	A
+10	hg18.chr1	11881	11889	dinucleotide	8	4	TC	panTro2.chr15	100031465	100031473	dinucleotide	8	4	TC
+10	hg18.chr1	16317	16329	dinucleotide	12	6	GT	panTro2.chr15	100035910	100035921	dinucleotide	11	5	TG
+10	hg18.chr1	18275	18284	mononucleotide	9	9	A	panTro2.chr15	100037854	100037863	mononucleotide	9	9	A
+10	hg18.chr1	18452	18467	mononucleotide	15	15	T	panTro2.chr15	100038029	100038042	mononucleotide	13	13	T
+12	hg18.chr1	20736	20756	dinucleotide	20	10	TC	panTro2.chr15_random	1091184	1091198	dinucleotide	14	7	TC
+13	hg18.chr1	20799	20812	dinucleotide	13	6	TC	panTro2.chrUn	1510811	1510831	dinucleotide	20	10	TC
+13	hg18.chr1	21563	21572	dinucleotide	9	4	AG	panTro2.chrUn	1511572	1511581	dinucleotide	9	4	AG
+13	hg18.chr1	21671	21681	dinucleotide	10	5	AC	panTro2.chrUn	1511678	1511688	dinucleotide	10	5	AC
+14	hg18.chr1	23313	23328	mononucleotide	15	15	A	panTro2.chrUn	1508926	1508942	mononucleotide	16	16	A
+16	hg18.chr1	26215	26228	mononucleotide	13	13	A	panTro2.chrUn	135175	135189	mononucleotide	14	14	A
+16	hg18.chr1	26657	26667	dinucleotide	10	5	TG	panTro2.chrUn	135618	135628	dinucleotide	10	5	TG
+16	hg18.chr1	27104	27114	mononucleotide	10	10	T	panTro2.chrUn	136064	136073	mononucleotide	9	9	T
+16	hg18.chr1	27291	27299	dinucleotide	8	4	CT	panTro2.chrUn	136251	136259	dinucleotide	8	4	CT
+16	hg18.chr1	30483	30495	trinucleotide	12	4	TTC	panTro2.chrUn	139435	139447	trinucleotide	12	4	TTC
+16	hg18.chr1	30503	30522	mononucleotide	19	19	T	panTro2.chrUn	139457	139482	mononucleotide	25	25	T
+16	hg18.chr1	26657	26667	dinucleotide	10	5	TG	panTro2.chrUn	135618	135628	dinucleotide	10	5	TG
+16	hg18.chr1	27104	27114	mononucleotide	10	10	T	panTro2.chrUn	136064	136073	mononucleotide	9	9	T
+16	hg18.chr1	27291	27299	dinucleotide	8	4	CT	panTro2.chrUn	136251	136259	dinucleotide	8	4	CT
+16	hg18.chr1	30483	30495	trinucleotide	12	4	TTC	panTro2.chrUn	139435	139447	trinucleotide	12	4	TTC
+17	hg18.chr1	33660	33676	mononucleotide	16	16	A	panTro2.chrUn	9698149	9698162	mononucleotide	13	13	A
+17	hg18.chr1	33660	33676	mononucleotide	16	16	A	panTro2.chrUn	9698149	9698162	mononucleotide	13	13	A
+19	hg18.chr1	35586	35595	dinucleotide	9	4	AT	panTro2.chrUn	9700318	9700327	dinucleotide	9	4	AT
+19	hg18.chr1	36427	36435	dinucleotide	8	4	GT	panTro2.chrUn	9701160	9701168	dinucleotide	8	4	GT
+19	hg18.chr1	37182	37192	mononucleotide	10	10	A	panTro2.chrUn	9701917	9701928	mononucleotide	11	11	A
+19	hg18.chr1	37620	37628	dinucleotide	8	4	TA	panTro2.chrUn	9702357	9702365	dinucleotide	8	4	TA
+19	hg18.chr1	41199	41207	dinucleotide	8	4	CT	panTro2.chrUn	9705949	9705957	dinucleotide	8	4	CT
+19	hg18.chr1	41728	41741	mononucleotide	13	13	A	panTro2.chrUn	9706479	9706495	mononucleotide	16	16	A
+19	hg18.chr1	35586	35595	dinucleotide	9	4	AT	panTro2.chrUn	9700318	9700327	dinucleotide	9	4	AT
+19	hg18.chr1	36427	36435	dinucleotide	8	4	GT	panTro2.chrUn	9701160	9701168	dinucleotide	8	4	GT
+19	hg18.chr1	37182	37192	mononucleotide	10	10	A	panTro2.chrUn	9701917	9701928	mononucleotide	11	11	A
+19	hg18.chr1	37620	37628	dinucleotide	8	4	TA	panTro2.chrUn	9702357	9702365	dinucleotide	8	4	TA
+19	hg18.chr1	41199	41207	dinucleotide	8	4	CT	panTro2.chrUn	9705949	9705957	dinucleotide	8	4	CT
+19	hg18.chr1	41728	41741	mononucleotide	13	13	A	panTro2.chrUn	9706479	9706495	mononucleotide	16	16	A
+21	hg18.chr1	47418	47426	dinucleotide	8	4	TA	panTro2.chrUn	9713027	9713035	dinucleotide	8	4	TA
+21	hg18.chr1	47950	47959	dinucleotide	9	4	TC	panTro2.chrUn	9713559	9713568	dinucleotide	9	4	TC
+21	hg18.chr1	47418	47426	dinucleotide	8	4	TA	panTro2.chrUn	9713027	9713035	dinucleotide	8	4	TA
+21	hg18.chr1	47950	47959	dinucleotide	9	4	TC	panTro2.chrUn	9713559	9713568	dinucleotide	9	4	TC
+25	hg18.chr1	56744	56752	dinucleotide	8	4	GA	panTro2.chrUn	9723923	9723931	dinucleotide	8	4	GA
+25	hg18.chr1	60215	60224	mononucleotide	9	9	A	panTro2.chrUn	9727396	9727405	mononucleotide	9	9	A
+25	hg18.chr1	56744	56752	dinucleotide	8	4	GA	panTro2.chrUn	9723923	9723931	dinucleotide	8	4	GA
+25	hg18.chr1	60215	60224	mononucleotide	9	9	A	panTro2.chrUn	9727396	9727405	mononucleotide	9	9	A
+25	hg18.chr1	61039	61050	mononucleotide	11	11	A	panTro2.chrUn	9728220	9728230	mononucleotide	10	10	A
+25	hg18.chr1	61710	61725	mononucleotide	15	15	T	panTro2.chrUn	9728890	9728903	mononucleotide	13	13	T
+26	hg18.chr1	67953	67961	dinucleotide	8	4	AT	panTro2.chrUn	9735669	9735677	dinucleotide	8	4	AT
+26	hg18.chr1	68606	68614	dinucleotide	8	4	AT	panTro2.chrUn	9736324	9736332	dinucleotide	8	4	AT
+26	hg18.chr1	67038	67059	mononucleotide	21	21	A	panTro2.chrUn	9734760	9734777	mononucleotide	17	17	A
+26	hg18.chr1	67953	67961	dinucleotide	8	4	AT	panTro2.chrUn	9735669	9735677	dinucleotide	8	4	AT
+26	hg18.chr1	68606	68614	dinucleotide	8	4	AT	panTro2.chrUn	9736324	9736332	dinucleotide	8	4	AT
+27	hg18.chr1	72077	72088	dinucleotide	11	5	AC	panTro2.chrUn	9742946	9742957	dinucleotide	11	5	AC
+32	hg18.chr1	81043	81052	dinucleotide	9	4	GA	panTro2.chrUn	1797450	1797459	dinucleotide	9	4	GA
+39	hg18.chr1	87721	87729	dinucleotide	8	4	AG	panTro2.chr1_random	7074333	7074341	dinucleotide	8	4	AG
+39	hg18.chr1	88697	88707	dinucleotide	10	5	TG	panTro2.chr1_random	7075309	7075319	dinucleotide	10	5	TG
+39	hg18.chr1	87721	87729	dinucleotide	8	4	AG	panTro2.chr1_random	7074333	7074341	dinucleotide	8	4	AG
+39	hg18.chr1	88697	88707	dinucleotide	10	5	TG	panTro2.chr1_random	7075309	7075319	dinucleotide	10	5	TG
+40	hg18.chr1	91199	91212	mononucleotide	13	13	A	panTro2.chr1_random	7089889	7089900	mononucleotide	11	11	A
+42	hg18.chr1	93838	93847	dinucleotide	9	4	CT	panTro2.chr1_random	7090757	7090768	dinucleotide	11	5	CT
+42	hg18.chr1	93838	93847	dinucleotide	9	4	CT	panTro2.chr1_random	7090757	7090768	dinucleotide	11	5	CT
+43	hg18.chr1	97391	97399	dinucleotide	8	4	AG	panTro2.chr1	244578	244586	dinucleotide	8	4	AG
+44	hg18.chr1	98409	98425	mononucleotide	16	16	A	panTro2.chr1_random	7095323	7095342	mononucleotide	19	19	A
+44	hg18.chr1	98409	98425	mononucleotide	16	16	A	panTro2.chr1_random	7095323	7095342	mononucleotide	19	19	A
+46	hg18.chr1	101851	101859	dinucleotide	8	4	CA	panTro2.chr1_random	7109861	7109869	dinucleotide	8	4	CA
+47	hg18.chr1	103743	103753	dinucleotide	10	5	CT	panTro2.chr1	250633	250643	dinucleotide	10	5	CT
+47	hg18.chr1	104174	104182	dinucleotide	8	4	GA	panTro2.chr1	251064	251072	dinucleotide	8	4	GA
+47	hg18.chr1	107676	107685	dinucleotide	9	4	AG	panTro2.chr1	254594	254603	dinucleotide	9	4	AG
+47	hg18.chr1	107735	107743	dinucleotide	8	4	AG	panTro2.chr1	254653	254661	dinucleotide	8	4	AG
+47	hg18.chr1	108222	108231	dinucleotide	9	4	AT	panTro2.chr1	255140	255148	dinucleotide	8	4	AT
+47	hg18.chr1	108253	108261	dinucleotide	8	4	AT	panTro2.chr1	255171	255179	dinucleotide	8	4	AT
+47	hg18.chr1	103743	103753	dinucleotide	10	5	CT	panTro2.chr1	250633	250643	dinucleotide	10	5	CT
+47	hg18.chr1	104174	104182	dinucleotide	8	4	GA	panTro2.chr1	251064	251072	dinucleotide	8	4	GA
+47	hg18.chr1	107676	107685	dinucleotide	9	4	AG	panTro2.chr1	254594	254603	dinucleotide	9	4	AG
+47	hg18.chr1	107735	107743	dinucleotide	8	4	AG	panTro2.chr1	254653	254661	dinucleotide	8	4	AG
+47	hg18.chr1	108222	108231	dinucleotide	9	4	AT	panTro2.chr1	255140	255148	dinucleotide	8	4	AT
+47	hg18.chr1	108253	108261	dinucleotide	8	4	AT	panTro2.chr1	255171	255179	dinucleotide	8	4	AT
+48	hg18.chr1	110072	110081	dinucleotide	9	4	TG	panTro2.chr1	258778	258787	dinucleotide	9	4	TG
+48	hg18.chr1	110072	110081	dinucleotide	9	4	TG	panTro2.chr1	258778	258787	dinucleotide	9	4	TG
+49	hg18.chr1	111133	111148	mononucleotide	15	15	T	panTro2.chr1	260455	260473	mononucleotide	18	18	T
+49	hg18.chr1	111503	111515	trinucleotide	12	4	TAA	panTro2.chr1	260828	260846	trinucleotide	18	6	TAA
+51	hg18.chr1	113280	113290	dinucleotide	10	5	AT	panTro2.chr1_random	8247726	8247736	dinucleotide	10	5	AT
+55	hg18.chr1	114709	114728	mononucleotide	19	19	A	panTro2.chr1	262958	262973	mononucleotide	15	15	A
+55	hg18.chr1	115520	115529	mononucleotide	9	9	A	panTro2.chr1	263765	263775	mononucleotide	10	10	A
+55	hg18.chr1	114709	114728	mononucleotide	19	19	A	panTro2.chr1	262958	262973	mononucleotide	15	15	A
+56	hg18.chr1	118460	118475	mononucleotide	15	15	T	panTro2.chr1	267602	267617	mononucleotide	15	15	T
+56	hg18.chr1	119541	119556	mononucleotide	15	15	T	panTro2.chr1	268682	268695	mononucleotide	13	13	T
+56	hg18.chr1	119775	119783	dinucleotide	8	4	GA	panTro2.chr1	268914	268922	dinucleotide	8	4	GA
+56	hg18.chr1	123929	123958	mononucleotide	29	29	T	panTro2.chr1	273095	273130	mononucleotide	35	35	T
+72	hg18.chr1	134751	134763	mononucleotide	12	12	A	panTro2.chr1_random	7189181	7189197	mononucleotide	16	16	A
+72	hg18.chr1	134994	135006	trinucleotide	12	4	GTG	panTro2.chr1_random	7189427	7189439	trinucleotide	12	4	GTG
+72	hg18.chr1	135805	135813	dinucleotide	8	4	GT	panTro2.chr1_random	7190238	7190246	dinucleotide	8	4	GT
+72	hg18.chr1	136433	136442	dinucleotide	9	4	TC	panTro2.chr1_random	7190866	7190875	dinucleotide	9	4	TC
+72	hg18.chr1	137771	137781	mononucleotide	10	10	A	panTro2.chr1_random	7192213	7192224	mononucleotide	11	11	A
+72	hg18.chr1	138639	138652	trinucleotide	13	4	AAT	panTro2.chr1_random	7193082	7193095	trinucleotide	13	4	AAT
+72	hg18.chr1	140196	140205	dinucleotide	9	4	AG	panTro2.chr1_random	7194642	7194651	dinucleotide	9	4	AG
+72	hg18.chr1	141348	141361	mononucleotide	13	13	T	panTro2.chr1_random	7195790	7195814	mononucleotide	24	24	T
+72	hg18.chr1	134994	135006	trinucleotide	12	4	GTG	panTro2.chr1_random	7189427	7189439	trinucleotide	12	4	GTG
+72	hg18.chr1	135805	135813	dinucleotide	8	4	GT	panTro2.chr1_random	7190238	7190246	dinucleotide	8	4	GT
+72	hg18.chr1	136433	136442	dinucleotide	9	4	TC	panTro2.chr1_random	7190866	7190875	dinucleotide	9	4	TC
+72	hg18.chr1	138639	138652	trinucleotide	13	4	AAT	panTro2.chr1_random	7193082	7193095	trinucleotide	13	4	AAT
+72	hg18.chr1	140196	140205	dinucleotide	9	4	AG	panTro2.chr1_random	7194642	7194651	dinucleotide	9	4	AG
+72	hg18.chr1	142689	142699	mononucleotide	10	10	T	panTro2.chr1_random	7197149	7197158	mononucleotide	9	9	T
+73	hg18.chr1	145718	145733	mononucleotide	15	15	A	panTro2.chr1_random	7202121	7202135	mononucleotide	14	14	A
+74	hg18.chr1	146143	146155	mononucleotide	12	12	T	panTro2.chr1_random	7208622	7208632	mononucleotide	10	10	T
+74	hg18.chr1	146971	146985	mononucleotide	14	14	A	panTro2.chr1_random	7209449	7209463	mononucleotide	14	14	A
+75	hg18.chr1	150335	150351	mononucleotide	16	16	T	panTro2.chr1_random	7213029	7213047	mononucleotide	18	18	T
+75	hg18.chr1	151431	151440	mononucleotide	9	9	A	panTro2.chr1_random	7214127	7214137	mononucleotide	10	10	A
+75	hg18.chr1	153993	154017	pentanucleotide	24	4	AAAAC	panTro2.chr1_random	7216686	7216711	pentanucleotide	25	5	AAAAC
+75	hg18.chr1	155151	155174	mononucleotide	23	23	A	panTro2.chr1_random	7217843	7217867	mononucleotide	24	24	A
+75	hg18.chr1	156454	156463	mononucleotide	9	9	A	panTro2.chr1_random	7219148	7219157	mononucleotide	9	9	A
+75	hg18.chr1	156998	157017	tetranucleotide	19	4	TTTA	panTro2.chr1_random	7219691	7219710	tetranucleotide	19	4	TTTA
+75	hg18.chr1	151431	151440	mononucleotide	9	9	A	panTro2.chr1_random	7214127	7214137	mononucleotide	10	10	A
+75	hg18.chr1	153993	154017	pentanucleotide	24	4	AAAAC	panTro2.chr1_random	7216686	7216711	pentanucleotide	25	5	AAAAC
+75	hg18.chr1	154537	154559	mononucleotide	22	22	A	panTro2.chr1_random	7217231	7217251	mononucleotide	20	20	A
+75	hg18.chr1	156454	156463	mononucleotide	9	9	A	panTro2.chr1_random	7219148	7219157	mononucleotide	9	9	A
+75	hg18.chr1	156998	157017	tetranucleotide	19	4	TTTA	panTro2.chr1_random	7219691	7219710	tetranucleotide	19	4	TTTA
+76	hg18.chr1	159723	159736	mononucleotide	13	13	T	panTro2.chr1	224074251	224074269	mononucleotide	18	18	T
+76	hg18.chr1	160798	160818	pentanucleotide	20	4	GTTTT	panTro2.chr1	224075335	224075355	pentanucleotide	20	4	GTTTT
+76	hg18.chr1	159723	159736	mononucleotide	13	13	T	panTro2.chr1	224074251	224074269	mononucleotide	18	18	T
+76	hg18.chr1	160798	160818	pentanucleotide	20	4	GTTTT	panTro2.chr1	224075335	224075355	pentanucleotide	20	4	GTTTT
+77	hg18.chr1	163076	163084	dinucleotide	8	4	AT	panTro2.chr1_random	7293459	7293467	dinucleotide	8	4	AT
+77	hg18.chr1	165179	165187	dinucleotide	8	4	GC	panTro2.chr1_random	7295617	7295625	dinucleotide	8	4	GC
+77	hg18.chr1	165310	165322	mononucleotide	12	12	A	panTro2.chr1_random	7295748	7295770	mononucleotide	22	22	A
+77	hg18.chr1	163076	163084	dinucleotide	8	4	AT	panTro2.chr1_random	7293459	7293467	dinucleotide	8	4	AT
+77	hg18.chr1	165179	165187	dinucleotide	8	4	GC	panTro2.chr1_random	7295617	7295625	dinucleotide	8	4	GC
+77	hg18.chr1	165310	165322	mononucleotide	12	12	A	panTro2.chr1_random	7295748	7295770	mononucleotide	22	22	A
+83	hg18.chr1	219668	219689	tetranucleotide	21	5	TAAA	panTro2.chr3	77587413	77587435	tetranucleotide	22	5	TAAA
+83	hg18.chr1	219668	219689	tetranucleotide	21	5	TAAA	panTro2.chr3	77587413	77587435	tetranucleotide	22	5	TAAA
+87	hg18.chr1	222298	222309	mononucleotide	11	11	T	panTro2.chrUn	1781936	1781946	mononucleotide	10	10	T
+87	hg18.chr1	222298	222309	mononucleotide	11	11	T	panTro2.chrUn	1781936	1781946	mononucleotide	10	10	T
+89	hg18.chr1	224906	224915	mononucleotide	9	9	T	panTro2.chrUn	1780175	1780184	mononucleotide	9	9	T
+89	hg18.chr1	224906	224915	mononucleotide	9	9	T	panTro2.chrUn	1780175	1780184	mononucleotide	9	9	T
+93	hg18.chr1	227371	227380	dinucleotide	9	4	GA	panTro2.chr1_random	7325572	7325581	dinucleotide	9	4	GA
+93	hg18.chr1	227392	227408	mononucleotide	16	16	A	panTro2.chr1_random	7325593	7325616	mononucleotide	23	23	A
+93	hg18.chr1	227371	227380	dinucleotide	9	4	GA	panTro2.chr1_random	7325572	7325581	dinucleotide	9	4	GA
+96	hg18.chr1	234056	234064	dinucleotide	8	4	AG	panTro2.chr1_random	8589985	8589993	dinucleotide	8	4	AG
+97	hg18.chr1	235032	235042	dinucleotide	10	5	TG	panTro2.chr1_random	7331012	7331022	dinucleotide	10	5	TG
+97	hg18.chr1	237516	237529	mononucleotide	13	13	A	panTro2.chr1_random	7333512	7333527	mononucleotide	15	15	A
+99	hg18.chr1	240155	240164	dinucleotide	9	4	CT	panTro2.chr1_random	7334380	7334389	dinucleotide	9	4	CT
+99	hg18.chr1	243706	243714	dinucleotide	8	4	AG	panTro2.chr1_random	7337953	7337961	dinucleotide	8	4	AG
+99	hg18.chr1	244724	244746	mononucleotide	22	22	A	panTro2.chr1_random	7338970	7338987	mononucleotide	17	17	A
+99	hg18.chr1	240155	240164	dinucleotide	9	4	CT	panTro2.chr1_random	7334380	7334389	dinucleotide	9	4	CT
+99	hg18.chr1	243706	243714	dinucleotide	8	4	AG	panTro2.chr1_random	7337953	7337961	dinucleotide	8	4	AG
+99	hg18.chr1	244724	244746	mononucleotide	22	22	A	panTro2.chr1_random	7338970	7338987	mononucleotide	17	17	A
+100	hg18.chr1	248168	248176	dinucleotide	8	4	CA	panTro2.chr1_random	7109861	7109869	dinucleotide	8	4	CA
+100	hg18.chr1	248168	248176	dinucleotide	8	4	CA	panTro2.chr1_random	7109861	7109869	dinucleotide	8	4	CA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ortho_ms_mut.tab	Tue Apr 01 10:52:29 2014 -0400
@@ -0,0 +1,75 @@
+#Window	Species_1	Window_Start	Window_End	Species_2	Groupby_Feature	SubGroupby_Feature	Mutability	Count
+5	hg18.chr1	6483	6496	panTro2.chr15	100042575	100042588	trinucleotide	GCT	0.00e+00	4
+10	hg18.chr1	18452	18467	panTro2.chr15	100038029	100038042	mononucleotide	T	4.00e+00	13
+10	hg18.chr1	18452	18467	panTro2.chr15	100038029	100038042	dinucleotide	GT	1.00e+00	5
+12	hg18.chr1	20736	20756	panTro2.chr15_random	1091184	1091198	dinucleotide	TC	9.00e+00	7
+13	hg18.chr1	20799	20812	panTro2.chrUn	1510811	1510831	dinucleotide	TC	1.10e+01	24
+14	hg18.chr1	23384	23405	panTro2.chrUn	1508999	1509025	mononucleotide	A	1.00e+00	15
+14	hg18.chr1	23384	23405	panTro2.chrUn	1508999	1509025	mononucleotide	T	1.30e+01	36
+15	hg18.chr1	25352	25371	panTro2.chr15_random	1087905	1087924	tetranucleotide	AAAT	0.00e+00	4
+16	hg18.chr1	30503	30522	panTro2.chrUn	139457	139482	trinucleotide	TTC	0.00e+00	4
+16	hg18.chr1	30503	30522	panTro2.chrUn	139457	139482	mononucleotide	A	1.00e+00	13
+16	hg18.chr1	30503	30522	panTro2.chrUn	139457	139482	mononucleotide	T	1.85e+01	32
+17	hg18.chr1	34037	34047	panTro2.chrUn	9698527	9698537	mononucleotide	A	4.50e+00	23
+19	hg18.chr1	41728	41741	panTro2.chrUn	9706479	9706495	mononucleotide	A	5.00e+00	23
+19	hg18.chr1	41728	41741	panTro2.chrUn	9706479	9706495	dinucleotide	GT	0.00e+00	16
+20	hg18.chr1	44654	44681	panTro2.chrUn	9709915	9709942	tetranucleotide	TTTC	4.50e+00	21
+22	hg18.chr1	52103	52120	panTro2.chrUn	9718011	9718024	mononucleotide	T	1.00e+00	18
+22	hg18.chr1	52103	52120	panTro2.chrUn	9718011	9718024	dinucleotide	AC	4.00e+00	6
+25	hg18.chr1	63706	63720	panTro2.chrUn	9730896	9730914	mononucleotide	T	1.60e+01	14
+25	hg18.chr1	63706	63720	panTro2.chrUn	9730896	9730914	dinucleotide	TA	0.00e+00	6
+25	hg18.chr1	63706	63720	panTro2.chrUn	9730896	9730914	tetranucleotide	ATAC	9.00e+00	5
+26	hg18.chr1	67038	67059	panTro2.chrUn	9734760	9734777	mononucleotide	A	1.60e+01	17
+27	hg18.chr1	72077	72088	panTro2.chrUn	9742946	9742957	dinucleotide	AC	0.00e+00	5
+28	hg18.chr1	73838	73906	panTro2.chr15	99975357	99975380	tetranucleotide	AAAG	1.44e+02	10
+32	hg18.chr1	81064	81077	panTro2.chrUn	1797471	1797489	mononucleotide	A	2.50e+01	13
+35	hg18.chr1	82527	82541	panTro2.chr1_random	7070707	7070721	mononucleotide	A	0.00e+00	14
+40	hg18.chr1	91199	91212	panTro2.chr1_random	7089889	7089900	mononucleotide	A	4.00e+00	11
+41	hg18.chr1	91538	91554	panTro2.chr1	223998154	223998167	mononucleotide	A	9.00e+00	13
+42	hg18.chr1	95472	95491	panTro2.chr1_random	7092383	7092404	dinucleotide	AT	1.85e+01	22
+42	hg18.chr1	95472	95491	panTro2.chr1_random	7092383	7092404	dinucleotide	AC	3.60e+01	12
+43	hg18.chr1	96802	96815	panTro2.chr1	243988	244001	dinucleotide	TC	0.00e+00	6
+44	hg18.chr1	98409	98425	panTro2.chr1_random	7095323	7095342	mononucleotide	A	9.00e+00	16
+45	hg18.chr1	101230	101250	panTro2.chr1	248453	248467	mononucleotide	A	1.85e+01	25
+45	hg18.chr1	101230	101250	panTro2.chr1	248453	248467	dinucleotide	GT	4.00e+00	17
+49	hg18.chr1	112974	112995	panTro2.chr1	262299	262320	trinucleotide	TAA	4.00e+00	4
+49	hg18.chr1	112974	112995	panTro2.chr1	262299	262320	dinucleotide	TA	5.00e-01	15
+49	hg18.chr1	112974	112995	panTro2.chr1	262299	262320	mononucleotide	T	9.00e+00	15
+49	hg18.chr1	112974	112995	panTro2.chr1	262299	262320	dinucleotide	CA	1.00e+00	5
+55	hg18.chr1	114709	114728	panTro2.chr1	262958	262973	mononucleotide	A	1.60e+01	30
+56	hg18.chr1	120154	120164	panTro2.chr1	269287	269300	mononucleotide	A	8.89e+00	109
+56	hg18.chr1	120154	120164	panTro2.chr1	269287	269300	mononucleotide	T	8.80e+00	89
+72	hg18.chr1	141348	141361	panTro2.chr1_random	7195790	7195814	mononucleotide	T	3.65e+01	65
+72	hg18.chr1	141348	141361	panTro2.chr1_random	7195790	7195814	trinucleotide	GTG	0.00e+00	8
+72	hg18.chr1	141348	141361	panTro2.chr1_random	7195790	7195814	trinucleotide	AAC	0.00e+00	21
+72	hg18.chr1	141348	141361	panTro2.chr1_random	7195790	7195814	mononucleotide	A	8.33e+00	52
+72	hg18.chr1	141348	141361	panTro2.chr1_random	7195790	7195814	trinucleotide	TTA	0.00e+00	4
+72	hg18.chr1	141348	141361	panTro2.chr1_random	7195790	7195814	trinucleotide	AAT	0.00e+00	25
+73	hg18.chr1	145718	145733	panTro2.chr1_random	7202121	7202135	mononucleotide	A	1.00e+00	14
+74	hg18.chr1	146971	146985	panTro2.chr1_random	7209449	7209463	mononucleotide	A	2.00e+00	24
+74	hg18.chr1	146971	146985	panTro2.chr1_random	7209449	7209463	mononucleotide	T	4.00e+00	10
+75	hg18.chr1	156998	157017	panTro2.chr1_random	7219691	7219710	mononucleotide	T	4.00e+00	16
+75	hg18.chr1	156998	157017	panTro2.chr1_random	7219691	7219710	tetranucleotide	GGAG	0.00e+00	16
+75	hg18.chr1	156998	157017	panTro2.chr1_random	7219691	7219710	tetranucleotide	TTTA	0.00e+00	24
+75	hg18.chr1	156998	157017	panTro2.chr1_random	7219691	7219710	pentanucleotide	AAAAC	1.00e+00	8
+75	hg18.chr1	156998	157017	panTro2.chr1_random	7219691	7219710	mononucleotide	A	3.00e+00	61
+75	hg18.chr1	156998	157017	panTro2.chr1_random	7219691	7219710	dinucleotide	AC	0.00e+00	34
+76	hg18.chr1	160798	160818	panTro2.chr1	224075335	224075355	mononucleotide	T	2.50e+01	13
+76	hg18.chr1	160798	160818	panTro2.chr1	224075335	224075355	pentanucleotide	GTTTT	0.00e+00	4
+77	hg18.chr1	165310	165322	panTro2.chr1_random	7295748	7295770	mononucleotide	A	1.00e+02	24
+77	hg18.chr1	165310	165322	panTro2.chr1_random	7295748	7295770	dinucleotide	CA	0.00e+00	10
+78	hg18.chr1	166066	166095	panTro2.chr1	224080667	224080691	pentanucleotide	AAAAC	1.00e+00	4
+83	hg18.chr1	219668	219689	panTro2.chr3	77587413	77587435	tetranucleotide	TAAA	0.00e+00	5
+87	hg18.chr1	222298	222309	panTro2.chrUn	1781936	1781946	mononucleotide	T	1.00e+00	20
+93	hg18.chr1	228854	228869	panTro2.chr1_random	7327066	7327078	mononucleotide	A	2.90e+01	31
+94	hg18.chr1	231193	231209	panTro2.chr1	223990552	223990572	mononucleotide	T	1.60e+01	16
+97	hg18.chr1	237516	237529	panTro2.chr1_random	7333512	7333527	mononucleotide	A	1.00e+01	28
+97	hg18.chr1	237516	237529	panTro2.chr1_random	7333512	7333527	mononucleotide	T	1.60e+01	15
+97	hg18.chr1	237516	237529	panTro2.chr1_random	7333512	7333527	tetranucleotide	TTTA	9.00e+00	7
+98	hg18.chr1	237855	237870	panTro2.chr1	223998154	223998167	mononucleotide	A	4.00e+00	13
+99	hg18.chr1	245760	245793	panTro2.chr1_random	7340006	7340026	dinucleotide	AT	2.50e+01	52
+99	hg18.chr1	245760	245793	panTro2.chr1_random	7340006	7340026	dinucleotide	TC	1.67e+01	64
+99	hg18.chr1	245760	245793	panTro2.chr1_random	7340006	7340026	mononucleotide	A	2.50e+01	34
+99	hg18.chr1	245760	245793	panTro2.chr1_random	7340006	7340026	dinucleotide	GT	2.31e+01	114
+99	hg18.chr1	245760	245793	panTro2.chr1_random	7340006	7340026	dinucleotide	AC	4.90e+01	34
+100	hg18.chr1	247545	247565	panTro2.chr1_random	7109243	7109259	mononucleotide	A	8.00e+00	26
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Tue Apr 01 10:52:29 2014 -0400
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="bx-python" version="0.7.1">
+      <repository changeset_revision="41eb9d9f667d" name="package_bx_python_0_7" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="galaxy-ops" version="1.0.0">
+      <repository changeset_revision="4e39032e4ec6" name="package_galaxy_ops_1_0_0" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>