Mercurial > repos > arkarachai-fungtammasan > microsatellite_ngs

Binary file test-data/.DS_Store has changed
--- a/test-data/GenotypeTRcorrection.py	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,250 +0,0 @@
-### import libraries ###
-import sys
-import collections, math
-import heapq
-from galaxy import eggs
-
-
-
-
-
-### basic function ###
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def averagelist(a,b,expectedlevelofminor):
-    product=[]
-    for i in range(len(a)):
-        product.append((1-expectedlevelofminor)*a[i]+expectedlevelofminor*b[i])
-
-    return product
-
-def complement_base(read):
-    collect=''
-    for i in read:
-        if i.upper()=='A':
-            collect+='T'
-        elif i.upper()=='T':
-            collect+='A'
-        elif i.upper()=='C':
-            collect+='G'
-        elif i.upper()=='G':
-            collect+='C'
-    return collect
-def makeallpossible(read):
-    collect=[]
-    for i in range(len(read)):
-        tmp= read[i:]+read[:i]
-        collect.append(tmp)
-        collect.append(complement_base(tmp))
-    return collect
-
-def motifsimplify(base):
-    '''str--> str
-    '''
-    motiflength=len(base)
-    temp=list(set(ALLMOTIF[motiflength]).intersection(set(makeallpossible(base))))
-
-    return temp[0]
-
-def majorallele(seq):
-    binseq=list(set(seq))
-    binseq.sort(reverse=True)   # highly mutate mode
-    #binseq.sort()              # majority mode
-    storeform=''
-    storevalue=0
-    for i in binseq:
-        if seq.count(i)>storevalue:
-            storeform=i
-            storevalue=seq.count(i)
-
-    return int(storeform)
-
-### decide global parameter ###
-COORDINATECOLUMN=1
-ALLELECOLUMN=2
-MOTIFCOLUMN=3
-  ##(0.01-0.5)
-MINIMUMMUTABLE=1.2*(1.0/(10**8))  #http://www.ncbi.nlm.nih.gov/pubmed/22914163 Kong et al 2012
-
-
-## Fixed global variable
-inputname=sys.argv[1]
-errorprofile=sys.argv[2]
-Genotypingcorrected=sys.argv[3]
-EXPECTEDLEVELOFMINOR=float(sys.argv[4])
-if EXPECTEDLEVELOFMINOR >0.5:
-	try:
-		expected_contribution_of_minor_allele=int('expected_contribution_of_minor_allele')
-	except Exception, eee:
-		print eee
-		stop_err("Expected contribution of minor allele must be at least 0 and not more than 0.5")
-ALLREPEATTYPE=[1,2,3,4]
-ALLREPEATTYPENAME=['mono','di','tri','tetra']
-monomotif=['A','C']
-dimotif=['AC','AG','AT','CG']
-trimotif=['AAC','AAG','AAT','ACC','ACG','ACT','AGC','AGG','ATC','CCG']
-tetramotif=['AAAC','AAAG','AAAT','AACC','AACG','AACT','AAGC','AAGG','AAGT','AATC','AATG','AATT',\
-'ACAG','ACAT','ACCC','ACCG','ACCT','ACGC','ACGG','ACGT','ACTC','ACTG','AGAT','AGCC','AGCG','AGCT',\
-'AGGC','AGGG','ATCC','ATCG','ATGC','CCCG','CCGG','AGTC']
-ALLMOTIF={1:monomotif,2:dimotif,3:trimotif,4:tetramotif}
-monorange=range(5,60)
-dirange=range(6,60)
-trirange=range(9,60)
-tetrarange=range(12,80)
-ALLRANGE={1:monorange,2:dirange,3:trirange,4:tetrarange}
-
-#########################################
-######## Prob calculation sector ########
-#########################################
-def multinomial_prob(majorallele,STRlength,motif,probdatabase):
-    '''int,int,str,dict-->int
-    ### get prob for each STRlength to be generated from major allele
-    '''
-    #print (majorallele,STRlength,motif)
-    prob=probdatabase[len(motif)][motif][majorallele][STRlength]
-    return prob
-
-################################################
-######## error model database sector ###########
-################################################
-
-## structure generator
-errormodeldatabase={1:{},2:{},3:{},4:{}}
-sumbymajoralleledatabase={1:{},2:{},3:{},4:{}}
-for repeattype in ALLREPEATTYPE:
-    for motif in ALLMOTIF[repeattype]:
-        errormodeldatabase[repeattype][motif]={}
-        sumbymajoralleledatabase[repeattype][motif]={}
-        for motifsize1 in ALLRANGE[repeattype]:
-            errormodeldatabase[repeattype][motif][motifsize1]={}
-            sumbymajoralleledatabase[repeattype][motif][motifsize1]=0
-            for motifsize2 in ALLRANGE[repeattype]:
-                errormodeldatabase[repeattype][motif][motifsize1][motifsize2]=MINIMUMMUTABLE
-
-#print errormodeldatabase
-## read database
-
-
-## get read count for each major allele
-fd=open(errorprofile)
-lines=fd.readlines()
-for line in lines:
-    temp=line.strip().split('\t')
-    t_major=int(temp[0])
-    t_count=int(temp[2])
-    motif=temp[3]
-    sumbymajoralleledatabase[len(motif)][motif][t_major]+=t_count
-fd.close()
-##print sumbymajoralleledatabase
-
-## get probability
-fd=open(errorprofile)
-lines=fd.readlines()
-for line in lines:
-    temp=line.strip().split('\t')
-    t_major=int(temp[0])
-    t_read=int(temp[1])
-    t_count=int(temp[2])
-    motif=temp[3]
-    if sumbymajoralleledatabase[len(motif)][motif][t_major]>0:
-        errormodeldatabase[len(motif)][motif][t_major][t_read]=t_count/(sumbymajoralleledatabase[len(motif)][motif][t_major]*1.0)
-        #errormodeldatabase[repeattype][motif][t_major][t_read]=math.log(t_count/(sumbymajorallele[t_major]*1.0))
-
-    #else:
-    #    errormodeldatabase[repeattype][motif][t_major][t_read]=0
-fd.close()
-
-#########################################
-######## input reading sector ###########
-#########################################
-fdout=open(Genotypingcorrected,'w')
-
-fd = open(inputname)
-
-lines=fd.xreadlines()
-for line in lines:
-    i_read=[]
-    i2_read=[]
-    temp=line.strip().split('\t')
-    i_coordinate=temp[COORDINATECOLUMN-1]
-    i_motif=motifsimplify(temp[MOTIFCOLUMN-1])
-    i_read=temp[ALLELECOLUMN-1].split(',')
-    i_read=map(int,i_read)
-    coverage=len(i_read)
-
-### Evaluate 1 major allele ###
-    i_all_allele=list(set(i_read))
-    i_major_allele=majorallele(i_read)
-    f_majorallele=i_read.count(i_major_allele)
-### Evaluate 2 major allele ###
-    if len(i_all_allele)>1:
-        i2_read=filter(lambda a: a != i_major_allele, i_read)
-        i_major2_allele=majorallele(i2_read)
-        f_majorallele2=i_read.count(i_major2_allele)
-        ### Evaluate 3 major allele ###
-        if len(i_all_allele)>2:
-            i3_read=filter(lambda a: a != i_major2_allele, i2_read)
-            i_major3_allele=majorallele(i3_read)
-            f_majorallele3=i_read.count(i_major3_allele)
-        ### No 3 major allele ###
-        elif len(i_all_allele)==2:
-            i_major3_allele=i_major2_allele
-    ### No 2 major allele ###
-    elif len(i_all_allele)==1:
-        #i_major2_allele=majorallele(i_read)
-        i_major2_allele=i_major_allele+len(i_motif)
-        i_major3_allele=i_major2_allele
-        #print line.strip()+'\t'+'\t'.join(['homo','only',str(i_major_allele),str(i_major_allele),'NA'])
-        #continue
-    else:
-        print("no allele is reading")
-        sys.exit()
-
-## scope filter
-
-#########################################
-######## prob calculation option ########
-#########################################
-    homozygous_collector=0
-    heterozygous_collector=0
-
-
-    alist=[multinomial_prob(i_major_allele,x,i_motif,errormodeldatabase)for x in i_read]
-    blist=[multinomial_prob(i_major2_allele,x,i_motif,errormodeldatabase)for x in i_read]
-    clist=[multinomial_prob(i_major3_allele,x,i_motif,errormodeldatabase)for x in i_read]
-
-    ablist=averagelist(alist,blist,EXPECTEDLEVELOFMINOR)
-    bclist=averagelist(blist,clist,EXPECTEDLEVELOFMINOR)
-    aclist=averagelist(alist,clist,EXPECTEDLEVELOFMINOR)
-
-    #print alist,blist,clist
-    majora=sum([math.log(i,10) for i in alist])
-    majorb=sum([math.log(i,10) for i in blist])
-    majorc=sum([math.log(i,10) for i in clist])
-    homozygous_collector=max(majora,majorb,majorc)
-
-    homomajor1=max([(majora,i_major_allele),(majorb,i_major2_allele),(majorc,i_major3_allele)])[1]
-    homomajordict={i_major_allele:majora,i_major2_allele:majorb,i_major3_allele:majorc}
-
-    majorab=sum([math.log(i,10) for i in ablist])
-    majorbc=sum([math.log(i,10) for i in bclist])
-    majorac=sum([math.log(i,10) for i in aclist])
-    heterozygous_collector=max(majorab,majorbc,majorac)
-    bothheteromajor=max([(majorab,(i_major_allele,i_major2_allele)),(majorbc,(i_major2_allele,i_major3_allele)),(majorac,(i_major_allele,i_major3_allele))])[1]
-    ##heteromajor1=max(bothheteromajor)
-    ##heteromajor2=min(bothheteromajor)
-    pre_heteromajor1=bothheteromajor[0]
-    pre_heteromajor2=bothheteromajor[1]
-    heteromajor1=max((homomajordict[pre_heteromajor1],pre_heteromajor1),(homomajordict[pre_heteromajor2],pre_heteromajor2))[1]
-    heteromajor2=min((homomajordict[pre_heteromajor1],pre_heteromajor1),(homomajordict[pre_heteromajor2],pre_heteromajor2))[1]
-
-    logratio_homo=homozygous_collector-heterozygous_collector
-
-    if logratio_homo>0:
-        fdout.writelines(line.strip()+'\t'+'\t'.join(['homo',str(logratio_homo),str(homomajor1),str(heteromajor1),str(heteromajor2)])+'\n')
-    elif logratio_homo<0:
-        fdout.writelines(line.strip()+'\t'+'\t'.join(['hetero',str(logratio_homo),str(homomajor1),str(heteromajor1),str(heteromajor2)])+'\n')
-fd.close()
-fdout.close()
--- a/test-data/GenotypingSTR.xml	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-<tool id="GenotypeSTR" name="Correct genotype for microsatellite errors" version="2.0.0">
-  <description> during sequencing and library prep </description>
-  <command interpreter="python2.7">GenotypeTRcorrection.py  $microsat_raw $microsat_error_profile $microsat_corrected  $expectedminorallele </command>
-
-  <inputs>
-    <param name="microsat_raw" type="data" label="Select microsatellite length profile that need to refine genotyping" />
-    <param name="microsat_error_profile" type="data" label="Select microsatellite error profile that correspond to this dataset" />
-	<param name="expectedminorallele" type="float" value="0.5" label="Expected contribution of minor allele when present (0.5 for genotyping)" />
-
-  </inputs>
-  <outputs>
-    <data name="microsat_corrected" format="tabular" />
-  </outputs>
-  <tests>
-    <!-- Test data with valid values -->
-    <test>
-      <param name="microsat_raw" value="sampleTRprofile_C.txt"/>
-      <param name="microsat_error_profile" value="PCRinclude.allrate.bymajorallele"/>
-      <param name="expectedminorallele" value="0.5"/>
-      <output name="microsat_corrected" file="sampleTRgenotypingcorrection"/>
-    </test>
-
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**What it does**
-
-- This tool will correct for microsatellite sequencing and library preparation errors using error rates estimated from hemizygous male X chromosome or any rates provided by user. The read profile for each locus will be processed independently.
-- First, this tool will find three most common read lengths from input read length profile. If the read profile has only one length of TR, the length of one motif longer than the observed length will be used as the second most common read length.
-- Second, it will calculate probability of three forms of homozygous and use the form which give the highest probability. The same goes for heterozygous.
-- Third, this tools will calculate log based 10 of (the probability of homozygous/the probability of heterozygous). If this value is more than 0, it will predict this locus to homozygous. If this value is less than 0, it will predict this locus to heterozygous. If this value is 0, read profile at this locus will be discard.
-
-**Citation**
-
-When you use this tool, please cite **Fungtammasan A, Ananda G, Hile SE, Su MS, Sun C, Harris R, Medvedev P, Eckert K, Makova KD. 2015. Accurate Typing of Short Tandem Repeats from Genome-wide Sequencing Data and its Applications, Genome Research**
-
-**Input**
-
-- The input files need to contain at least three columns.
-- Column 1 = location of microsatellite locus.
-- Column 2 = length profile (length of microsatellite in each read that mapped to this location in comma separated format).
-- Column 3 = motif of microsatellite in this locus. The input file can contain more than three column.
-
-**Output**
-
-The output will be contain original three (or more) column as the input. However, it will also have these following columns.
-
-- Additional column 1 = homozygous/heterozygous label.
-- Additional column 2 = log based 10 of (the probability of homozygous/the probability of heterozygous)
-- Additional column 3 = Allele for most probable homozygous form.
-- Additional column 4 = Allele 1 for most probable heterozygous form.
-- Additional column 5 = Allele 2 for most probable heterozygous form.
-
-**Example**
-
-- Suppose that we sequence one locus of microsatellite with NGS. This locus has **A** motif and the following length (bp) profile. ::
-
-	chr1_100_106	5, 6, 6, 6, 6, 7, 7, 8, 8	A
-
-- We want to figure out if this locus is a homolozygous or heterozygous and the corresponding allele(s). Therefore, we use this tool to refine genotype.
-- This tool will calculate the probability of homozygous A6A6, A7A7, and A8A8 to generate observed length profile. Among this A7A7 has the highest probability. Therefore, we use this form as the representative for homozygous.
-- Then, this tool will calculate the probability of heterozygous A6A7, A7A8, and A6A8 to generate observed length profile. Among this A6A8 has the highest probability. Therefore, we use this form as the representative for heterozygous.
-- The A6A7 has higher probability than A7A7. Therefore, the program will report that this locus is a heterozygous locus. ::
-
-	chr1	5,6,6,6,6,7,7,8,8	A	hetero	-14.8744881854	7	6	8
-
-
-</help>
-</tool>
--- a/test-data/PEsortedSAM2readprofile.py	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-from galaxy import eggs
-import pkg_resources
-pkg_resources.require( "bx-python" )
-import bx.seq.twobit
-
-##output columns: read_name chr prefix_start    prefix_end  TR_start    TR_end  suffix_start    suffix_end  TR_length   TR_sequence
-
-samf = open(sys.argv[1],'r') #assumes sam file is sorted by readname
-seq_path = sys.argv[2] #Path to the reference genome in 2bit format
-
-##maxTRlength=int(sys.argv[4])
-##maxoriginalreadlength=int(sys.argv[5])
-maxTRlength=int(sys.argv[3])
-maxoriginalreadlength=int(sys.argv[4])
-outfile=sys.argv[5]
-fout = open(outfile,'w')
-
-twobitfile = bx.seq.twobit.TwoBitFile( file( seq_path ) )
-
-skipped=0
-while True:
-    read = samf.readline().strip()
-    if not(read): #EOF reached
-        break
-    if read[0] == "@":
-        #print read
-        continue
-    mate = samf.readline().strip()
-    if not(mate): #EOF reached
-        break
-    read_elems = read.split()
-    mate_elems = mate.split()
-    read_name = read_elems[0].strip()
-    mate_name = mate_elems[0].strip()
-    while True:
-        if read_name == mate_name:
-            break
-        elif read_name != mate_name:
-            #print >>sys.stderr, "Input SAM file doesn't seem to be sorted by readname. Please sort and retry."
-            #break
-            skipped += 1
-            read = mate
-            read_elems = mate_elems
-            mate = samf.readline().strip()
-            read_name = read_elems[0].strip()
-            mate_name = mate_elems[0].strip()
-            if not(mate): #EOF reached
-                break
-            mate_elems = mate.split()
-    #extract XT:A tag
-    #for e in  read_elems:
-    #    if e.startswith('XT:A'):
-    #        read_xt = e
-    #for e in  mate_elems:
-    #    if e.startswith('XT:A'):
-    #        mate_xt = e
-    #if 'XT:A:U' not in read_elems or 'XT:A:U' not in mate_elems:   #both read and it's mate need to be mapped uniquely
-    #    continue
-    read_chr = read_elems[2]
-    read_start = int(read_elems[3])
-    read_cigar = read_elems[5]
-    if len(read_cigar.split('M')) != 2:     #we want perfect matches only..cigar= <someInt>M
-        continue
-    read_len = int(read_cigar.split('M')[0])
-    mate_chr = mate_elems[2]
-    mate_start = int(mate_elems[3])
-    mate_cigar = mate_elems[5]
-    if len(mate_cigar.split('M')) != 2:     #we want perfect matches only..cigar= <someInt>M
-        continue
-    mate_len = int(mate_cigar.split('M')[0])
-    if read_chr != mate_chr:            # check that they were mapped to the same chromosome
-        continue
-    if abs(read_start - mate_start) > (maxoriginalreadlength+maxTRlength):
-        continue
-    if read_start < mate_start:
-        pre_s = read_start-1
-        pre_e = read_start-1+read_len
-        tr_s = read_start-1+read_len
-        tr_e = mate_start-1
-        suf_s = mate_start-1
-        suf_e = mate_start-1+mate_len
-    else:
-        pre_s = mate_start-1
-        pre_e = mate_start-1+mate_len
-        tr_s = mate_start-1+mate_len
-        tr_e = read_start-1
-        suf_s = read_start-1
-        suf_e = read_start-1+read_len
-    tr_len = abs(tr_e - tr_s)
-    if tr_len > maxTRlength:
-        continue
-    if pre_e >= suf_s:  #overlapping prefix and suffix
-        continue
-    tr_ref_seq = twobitfile[read_chr][tr_s:tr_e]
-    ##print >>fout, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" %(read_name,read_chr,pre_s,pre_e,tr_s,tr_e,suf_s,suf_e,tr_len,tr_ref_seq)
-    fout.writelines('\t'.join(map(str,[read_name,read_chr,pre_s,pre_e,tr_s,tr_e,suf_s,suf_e,tr_len,tr_ref_seq]))+'\n')
-
-print  "Skipped %d unpaired reads" %(skipped)
--- a/test-data/PEsortedSAM2readprofile.xml	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-<tool id="PEsortedSAM2readprofile" name="Combine mapped flaked bases" version="1.0.0">
-  <description> from SAM file sorted by readname  </description>
-  <command interpreter="python2.7">PEsortedSAM2readprofile.py  $flankedbasesSAM $twobitref $maxTRlength $maxoriginalreadlength $output </command>
-
-  <inputs>
-    <param name="flankedbasesSAM" type="data" format="sam" label="Select sorted SAM file (by readname) of flaked bases" />
-    <param name="twobitref" type="data" label="Select twobit file reference genome" />
-	<param name="maxTRlength" type="integer" value="100" label="Maximum expected microsatellite length (bp)" />
-	<param name="maxoriginalreadlength" type="integer" value="101" label="Maxinum original read length" />
-
-  </inputs>
-  <outputs>
-    <data name="output" format="tabular" />
-  </outputs>
-  <tests>
-    <!-- Test data with valid values -->
-    <test>
-      <param name="flankedbasesSAM" value="samplesortedPESAM_C.sam"/>
-      <param name="twobitref" value="shifted.2bit"/>
-      <param name="maxTRlength" value="100"/>
-      <param name="maxoriginalreadlength" value="250"/>
-      <output name="output" file="samplePESAM_2_profile_C.txt"/>
-    </test>
-
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**What it does**
-
-- This tool will take SAM file sorted by read name, remove unpaired reads, report microsatellites sequences in the reference genome that correspond to the space between paired end reads. Coordinate of start and stop for left and right flanking regions of microsatellites and microsatellite itself as inferred from paired end reads will also be reported.
-- These microsatellites in reference can be used to filter out reads that do not contain microsatellites that concur with microsatellites in reference where the reads mapped to.
-
-**Citation**
-
-When you use this tool, please cite **Fungtammasan A, Ananda G, Hile SE, Su MS, Sun C, Harris R, Medvedev P, Eckert K, Makova KD. 2015. Accurate Typing of Short Tandem Repeats from Genome-wide Sequencing Data and its Applications, Genome Research**
-
-**Input**
-
-- Sorted SAM files by read name
-
-**Output**
-
-The output will combined two lines of input which are paired. The output format is as follow.
-
-- Column 1 = read name
-- Column 2 = chromosome
-- Column 3 = left flanking region start
-- Column 4 = left flanking region stop
-- Column 5 = microsatellite start
-- Column 6 = microsatellite stop
-- Column 7 = right flanking region start
-- Column 8 = right flanking region stop
-- Column 9 = microsatellite length in reference
-- Column 10= microsatellite sequence in reference
-
-
-
-</help>
-</tool>
--- a/test-data/README.md	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-# STR-FM
Binary file test-data/STR-FM/.DS_Store has changed
--- a/test-data/STR-FM/.git/COMMIT_EDITMSG	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-initial commit
-
-# Please enter the commit message for your changes. Lines starting
-# with '#' will be ignored, and an empty message aborts the commit.
-#
-# Committer: Arkarachai Fungtammasan <arkarachaifungtammasan@Arkarachai-Fungtammasans-MacBook-Pro.local>
-#
-# On branch master
-# Changes to be committed:
-#   (use "git reset HEAD <file>..." to unstage)
-#
-#	new file:   GenotypeTRcorrection.py
-#	new file:   GenotypingSTR.xml
-#	new file:   PEsortedSAM2readprofile.py
-#	new file:   PEsortedSAM2readprofile.xml
-#	new file:   changespacetounderscore_readname.py
-#	new file:   combinedprobforallelecombination.py
-#	new file:   combineprobforallelecombination.xml
-#	new file:   fetchflank.xml
-#	new file:   heteroprob.py
-#	new file:   microsatcompat.py
-#	new file:   microsatcompat.xml
-#	new file:   microsatellite.py
-#	new file:   microsatellite.xml
-#	new file:   microsatpurity.py
-#	new file:   microsatpurity.xml
-#	new file:   pair_fetch_DNA_ff.py
-#	new file:   probvalueforhetero.xml
-#	new file:   profilegenerator.py
-#	new file:   profilegenerator.xml
-#	new file:   readdepth2sequencingdepth.xml
-#	new file:   sequencingdepthconversion_G.py
-#	new file:   space2underscore_readname.xml
-#	new file:   test-data/.DS_Store
-#	new file:   test-data/C_sample_fastq
-#	new file:   test-data/C_sample_snoope
-#	new file:   test-data/PCRinclude.allrate.bymajorallele
-#	new file:   test-data/combineprob_out.txt
-#	new file:   test-data/microsatcompat_in.txt
-#	new file:   test-data/microsatcompat_out.txt
-#	new file:   test-data/microsatellite_flanking_L.fastq
-#	new file:   test-data/microsatellite_flanking_R.fastq
-#	new file:   test-data/microsatpurity_in.txt
-#	new file:   test-data/microsatpurity_out.txt
-#	new file:   test-data/nice1tab.py
-#	new file:   test-data/probvalueforhetero_in.txt
-#	new file:   test-data/probvalueforhetero_out.txt
-#	new file:   test-data/profilegenerator_in.txt
-#	new file:   test-data/profilegenerator_out.txt
-#	new file:   test-data/readdepth2seqdepth.out
-#	new file:   test-data/samplePESAM_2_profile_C.txt
-#	new file:   test-data/sampleTRgenotypingcorrection
-#	new file:   test-data/sampleTRprofile_C.txt
-#	new file:   test-data/samplefq.snoope
-#	new file:   test-data/samplefq.snoope.new
-#	new file:   test-data/sampleprofilegenerator_in
-#	new file:   test-data/sampleprofilegenerator_out
-#	new file:   test-data/samplesortedPESAM_C.sam
-#	new file:   test-data/shifted.2bit
-#
--- a/test-data/STR-FM/.git/FETCH_HEAD	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-cebc3ab80ab25aa2af4ae265bd89387c2225a708		branch 'master' of https://github.com/Arkarachai/STR-FM
--- a/test-data/STR-FM/.git/HEAD	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-ref: refs/heads/master
--- a/test-data/STR-FM/.git/ORIG_HEAD	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-d1b92cb33cf7d2942655e776f5499c5bbff18bde
--- a/test-data/STR-FM/.git/config	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-[core]
-	repositoryformatversion = 0
-	filemode = true
-	bare = false
-	logallrefupdates = true
-	ignorecase = true
-	precomposeunicode = false
-[remote "origin"]
-	url = https://github.com/Arkarachai/STR-FM.git
-	fetch = +refs/heads/*:refs/remotes/origin/*
-[branch "master"]
-	remote = origin
-	merge = refs/heads/master
--- a/test-data/STR-FM/.git/description	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-Unnamed repository; edit this file 'description' to name the repository.
--- a/test-data/STR-FM/.git/hooks/applypatch-msg.sample	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-#!/bin/sh
-#
-# An example hook script to check the commit log message taken by
-# applypatch from an e-mail message.
-#
-# The hook should exit with non-zero status after issuing an
-# appropriate message if it wants to stop the commit.  The hook is
-# allowed to edit the commit message file.
-#
-# To enable this hook, rename this file to "applypatch-msg".
-
-. git-sh-setup
-test -x "$GIT_DIR/hooks/commit-msg" &&
-	exec "$GIT_DIR/hooks/commit-msg" ${1+"$@"}
-:
--- a/test-data/STR-FM/.git/hooks/commit-msg.sample	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-#!/bin/sh
-#
-# An example hook script to check the commit log message.
-# Called by "git commit" with one argument, the name of the file
-# that has the commit message.  The hook should exit with non-zero
-# status after issuing an appropriate message if it wants to stop the
-# commit.  The hook is allowed to edit the commit message file.
-#
-# To enable this hook, rename this file to "commit-msg".
-
-# Uncomment the below to add a Signed-off-by line to the message.
-# Doing this in a hook is a bad idea in general, but the prepare-commit-msg
-# hook is more suited to it.
-#
-# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p')
-# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1"
-
-# This example catches duplicate Signed-off-by lines.
-
-test "" = "$(grep '^Signed-off-by: ' "$1" |
-	 sort | uniq -c | sed -e '/^[ 	]*1[ 	]/d')" || {
-	echo >&2 Duplicate Signed-off-by lines.
-	exit 1
-}
--- a/test-data/STR-FM/.git/hooks/post-update.sample	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-#!/bin/sh
-#
-# An example hook script to prepare a packed repository for use over
-# dumb transports.
-#
-# To enable this hook, rename this file to "post-update".
-
-exec git update-server-info
--- a/test-data/STR-FM/.git/hooks/pre-applypatch.sample	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-#!/bin/sh
-#
-# An example hook script to verify what is about to be committed
-# by applypatch from an e-mail message.
-#
-# The hook should exit with non-zero status after issuing an
-# appropriate message if it wants to stop the commit.
-#
-# To enable this hook, rename this file to "pre-applypatch".
-
-. git-sh-setup
-test -x "$GIT_DIR/hooks/pre-commit" &&
-	exec "$GIT_DIR/hooks/pre-commit" ${1+"$@"}
-:
--- a/test-data/STR-FM/.git/hooks/pre-commit.sample	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-#!/bin/sh
-#
-# An example hook script to verify what is about to be committed.
-# Called by "git commit" with no arguments.  The hook should
-# exit with non-zero status after issuing an appropriate message if
-# it wants to stop the commit.
-#
-# To enable this hook, rename this file to "pre-commit".
-
-if git rev-parse --verify HEAD >/dev/null 2>&1
-then
-	against=HEAD
-else
-	# Initial commit: diff against an empty tree object
-	against=4b825dc642cb6eb9a060e54bf8d69288fbee4904
-fi
-
-# If you want to allow non-ascii filenames set this variable to true.
-allownonascii=$(git config hooks.allownonascii)
-
-# Redirect output to stderr.
-exec 1>&2
-
-# Cross platform projects tend to avoid non-ascii filenames; prevent
-# them from being added to the repository. We exploit the fact that the
-# printable range starts at the space character and ends with tilde.
-if [ "$allownonascii" != "true" ] &&
-	# Note that the use of brackets around a tr range is ok here, (it's
-	# even required, for portability to Solaris 10's /usr/bin/tr), since
-	# the square bracket bytes happen to fall in the designated range.
-	test $(git diff --cached --name-only --diff-filter=A -z $against |
-	  LC_ALL=C tr -d '[ -~]\0' | wc -c) != 0
-then
-	echo "Error: Attempt to add a non-ascii file name."
-	echo
-	echo "This can cause problems if you want to work"
-	echo "with people on other platforms."
-	echo
-	echo "To be portable it is advisable to rename the file ..."
-	echo
-	echo "If you know what you are doing you can disable this"
-	echo "check using:"
-	echo
-	echo "  git config hooks.allownonascii true"
-	echo
-	exit 1
-fi
-
-# If there are whitespace errors, print the offending file names and fail.
-exec git diff-index --check --cached $against --
--- a/test-data/STR-FM/.git/hooks/pre-push.sample	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-#!/bin/sh
-
-# An example hook script to verify what is about to be pushed.  Called by "git
-# push" after it has checked the remote status, but before anything has been
-# pushed.  If this script exits with a non-zero status nothing will be pushed.
-#
-# This hook is called with the following parameters:
-#
-# $1 -- Name of the remote to which the push is being done
-# $2 -- URL to which the push is being done
-#
-# If pushing without using a named remote those arguments will be equal.
-#
-# Information about the commits which are being pushed is supplied as lines to
-# the standard input in the form:
-#
-#   <local ref> <local sha1> <remote ref> <remote sha1>
-#
-# This sample shows how to prevent push of commits where the log message starts
-# with "WIP" (work in progress).
-
-remote="$1"
-url="$2"
-
-z40=0000000000000000000000000000000000000000
-
-IFS=' '
-while read local_ref local_sha remote_ref remote_sha
-do
-	if [ "$local_sha" = $z40 ]
-	then
-		# Handle delete
-	else
-		if [ "$remote_sha" = $z40 ]
-		then
-			# New branch, examine all commits
-			range="$local_sha"
-		else
-			# Update to existing branch, examine new commits
-			range="$remote_sha..$local_sha"
-		fi
-
-		# Check for WIP commit
-		commit=`git rev-list -n 1 --grep '^WIP' "$range"`
-		if [ -n "$commit" ]
-		then
-			echo "Found WIP commit in $local_ref, not pushing"
-			exit 1
-		fi
-	fi
-done
-
-exit 0
--- a/test-data/STR-FM/.git/hooks/pre-rebase.sample	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,169 +0,0 @@
-#!/bin/sh
-#
-# Copyright (c) 2006, 2008 Junio C Hamano
-#
-# The "pre-rebase" hook is run just before "git rebase" starts doing
-# its job, and can prevent the command from running by exiting with
-# non-zero status.
-#
-# The hook is called with the following parameters:
-#
-# $1 -- the upstream the series was forked from.
-# $2 -- the branch being rebased (or empty when rebasing the current branch).
-#
-# This sample shows how to prevent topic branches that are already
-# merged to 'next' branch from getting rebased, because allowing it
-# would result in rebasing already published history.
-
-publish=next
-basebranch="$1"
-if test "$#" = 2
-then
-	topic="refs/heads/$2"
-else
-	topic=`git symbolic-ref HEAD` ||
-	exit 0 ;# we do not interrupt rebasing detached HEAD
-fi
-
-case "$topic" in
-refs/heads/??/*)
-	;;
-*)
-	exit 0 ;# we do not interrupt others.
-	;;
-esac
-
-# Now we are dealing with a topic branch being rebased
-# on top of master.  Is it OK to rebase it?
-
-# Does the topic really exist?
-git show-ref -q "$topic" || {
-	echo >&2 "No such branch $topic"
-	exit 1
-}
-
-# Is topic fully merged to master?
-not_in_master=`git rev-list --pretty=oneline ^master "$topic"`
-if test -z "$not_in_master"
-then
-	echo >&2 "$topic is fully merged to master; better remove it."
-	exit 1 ;# we could allow it, but there is no point.
-fi
-
-# Is topic ever merged to next?  If so you should not be rebasing it.
-only_next_1=`git rev-list ^master "^$topic" ${publish} | sort`
-only_next_2=`git rev-list ^master           ${publish} | sort`
-if test "$only_next_1" = "$only_next_2"
-then
-	not_in_topic=`git rev-list "^$topic" master`
-	if test -z "$not_in_topic"
-	then
-		echo >&2 "$topic is already up-to-date with master"
-		exit 1 ;# we could allow it, but there is no point.
-	else
-		exit 0
-	fi
-else
-	not_in_next=`git rev-list --pretty=oneline ^${publish} "$topic"`
-	/usr/bin/perl -e '
-		my $topic = $ARGV[0];
-		my $msg = "* $topic has commits already merged to public branch:\n";
-		my (%not_in_next) = map {
-			/^([0-9a-f]+) /;
-			($1 => 1);
-		} split(/\n/, $ARGV[1]);
-		for my $elem (map {
-				/^([0-9a-f]+) (.*)$/;
-				[$1 => $2];
-			} split(/\n/, $ARGV[2])) {
-			if (!exists $not_in_next{$elem->[0]}) {
-				if ($msg) {
-					print STDERR $msg;
-					undef $msg;
-				}
-				print STDERR " $elem->[1]\n";
-			}
-		}
-	' "$topic" "$not_in_next" "$not_in_master"
-	exit 1
-fi
-
-exit 0
-
-################################################################
-
-This sample hook safeguards topic branches that have been
-published from being rewound.
-
-The workflow assumed here is:
-
- * Once a topic branch forks from "master", "master" is never
-   merged into it again (either directly or indirectly).
-
- * Once a topic branch is fully cooked and merged into "master",
-   it is deleted.  If you need to build on top of it to correct
-   earlier mistakes, a new topic branch is created by forking at
-   the tip of the "master".  This is not strictly necessary, but
-   it makes it easier to keep your history simple.
-
- * Whenever you need to test or publish your changes to topic
-   branches, merge them into "next" branch.
-
-The script, being an example, hardcodes the publish branch name
-to be "next", but it is trivial to make it configurable via
-$GIT_DIR/config mechanism.
-
-With this workflow, you would want to know:
-
-(1) ... if a topic branch has ever been merged to "next".  Young
-    topic branches can have stupid mistakes you would rather
-    clean up before publishing, and things that have not been
-    merged into other branches can be easily rebased without
-    affecting other people.  But once it is published, you would
-    not want to rewind it.
-
-(2) ... if a topic branch has been fully merged to "master".
-    Then you can delete it.  More importantly, you should not
-    build on top of it -- other people may already want to
-    change things related to the topic as patches against your
-    "master", so if you need further changes, it is better to
-    fork the topic (perhaps with the same name) afresh from the
-    tip of "master".
-
-Let's look at this example:
-
-		   o---o---o---o---o---o---o---o---o---o "next"
-		  /       /           /           /
-		 /   a---a---b A     /           /
-		/   /               /           /
-	       /   /   c---c---c---c B         /
-	      /   /   /             \         /
-	     /   /   /   b---b C     \       /
-	    /   /   /   /             \     /
-    ---o---o---o---o---o---o---o---o---o---o---o "master"
-
-
-A, B and C are topic branches.
-
- * A has one fix since it was merged up to "next".
-
- * B has finished.  It has been fully merged up to "master" and "next",
-   and is ready to be deleted.
-
- * C has not merged to "next" at all.
-
-We would want to allow C to be rebased, refuse A, and encourage
-B to be deleted.
-
-To compute (1):
-
-	git rev-list ^master ^topic next
-	git rev-list ^master        next
-
-	if these match, topic has not merged in next at all.
-
-To compute (2):
-
-	git rev-list master..topic
-
-	if this is empty, it is fully merged to "master".
--- a/test-data/STR-FM/.git/hooks/prepare-commit-msg.sample	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-#!/bin/sh
-#
-# An example hook script to prepare the commit log message.
-# Called by "git commit" with the name of the file that has the
-# commit message, followed by the description of the commit
-# message's source.  The hook's purpose is to edit the commit
-# message file.  If the hook fails with a non-zero status,
-# the commit is aborted.
-#
-# To enable this hook, rename this file to "prepare-commit-msg".
-
-# This hook includes three examples.  The first comments out the
-# "Conflicts:" part of a merge commit.
-#
-# The second includes the output of "git diff --name-status -r"
-# into the message, just before the "git status" output.  It is
-# commented because it doesn't cope with --amend or with squashed
-# commits.
-#
-# The third example adds a Signed-off-by line to the message, that can
-# still be edited.  This is rarely a good idea.
-
-case "$2,$3" in
-  merge,)
-    /usr/bin/perl -i.bak -ne 's/^/# /, s/^# #/#/ if /^Conflicts/ .. /#/; print' "$1" ;;
-
-# ,|template,)
-#   /usr/bin/perl -i.bak -pe '
-#      print "\n" . `git diff --cached --name-status -r`
-#	 if /^#/ && $first++ == 0' "$1" ;;
-
-  *) ;;
-esac
-
-# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p')
-# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1"
--- a/test-data/STR-FM/.git/hooks/update.sample	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,128 +0,0 @@
-#!/bin/sh
-#
-# An example hook script to blocks unannotated tags from entering.
-# Called by "git receive-pack" with arguments: refname sha1-old sha1-new
-#
-# To enable this hook, rename this file to "update".
-#
-# Config
-# ------
-# hooks.allowunannotated
-#   This boolean sets whether unannotated tags will be allowed into the
-#   repository.  By default they won't be.
-# hooks.allowdeletetag
-#   This boolean sets whether deleting tags will be allowed in the
-#   repository.  By default they won't be.
-# hooks.allowmodifytag
-#   This boolean sets whether a tag may be modified after creation. By default
-#   it won't be.
-# hooks.allowdeletebranch
-#   This boolean sets whether deleting branches will be allowed in the
-#   repository.  By default they won't be.
-# hooks.denycreatebranch
-#   This boolean sets whether remotely creating branches will be denied
-#   in the repository.  By default this is allowed.
-#
-
-# --- Command line
-refname="$1"
-oldrev="$2"
-newrev="$3"
-
-# --- Safety check
-if [ -z "$GIT_DIR" ]; then
-	echo "Don't run this script from the command line." >&2
-	echo " (if you want, you could supply GIT_DIR then run" >&2
-	echo "  $0 <ref> <oldrev> <newrev>)" >&2
-	exit 1
-fi
-
-if [ -z "$refname" -o -z "$oldrev" -o -z "$newrev" ]; then
-	echo "usage: $0 <ref> <oldrev> <newrev>" >&2
-	exit 1
-fi
-
-# --- Config
-allowunannotated=$(git config --bool hooks.allowunannotated)
-allowdeletebranch=$(git config --bool hooks.allowdeletebranch)
-denycreatebranch=$(git config --bool hooks.denycreatebranch)
-allowdeletetag=$(git config --bool hooks.allowdeletetag)
-allowmodifytag=$(git config --bool hooks.allowmodifytag)
-
-# check for no description
-projectdesc=$(sed -e '1q' "$GIT_DIR/description")
-case "$projectdesc" in
-"Unnamed repository"* | "")
-	echo "*** Project description file hasn't been set" >&2
-	exit 1
-	;;
-esac
-
-# --- Check types
-# if $newrev is 0000...0000, it's a commit to delete a ref.
-zero="0000000000000000000000000000000000000000"
-if [ "$newrev" = "$zero" ]; then
-	newrev_type=delete
-else
-	newrev_type=$(git cat-file -t $newrev)
-fi
-
-case "$refname","$newrev_type" in
-	refs/tags/*,commit)
-		# un-annotated tag
-		short_refname=${refname##refs/tags/}
-		if [ "$allowunannotated" != "true" ]; then
-			echo "*** The un-annotated tag, $short_refname, is not allowed in this repository" >&2
-			echo "*** Use 'git tag [ -a | -s ]' for tags you want to propagate." >&2
-			exit 1
-		fi
-		;;
-	refs/tags/*,delete)
-		# delete tag
-		if [ "$allowdeletetag" != "true" ]; then
-			echo "*** Deleting a tag is not allowed in this repository" >&2
-			exit 1
-		fi
-		;;
-	refs/tags/*,tag)
-		# annotated tag
-		if [ "$allowmodifytag" != "true" ] && git rev-parse $refname > /dev/null 2>&1
-		then
-			echo "*** Tag '$refname' already exists." >&2
-			echo "*** Modifying a tag is not allowed in this repository." >&2
-			exit 1
-		fi
-		;;
-	refs/heads/*,commit)
-		# branch
-		if [ "$oldrev" = "$zero" -a "$denycreatebranch" = "true" ]; then
-			echo "*** Creating a branch is not allowed in this repository" >&2
-			exit 1
-		fi
-		;;
-	refs/heads/*,delete)
-		# delete branch
-		if [ "$allowdeletebranch" != "true" ]; then
-			echo "*** Deleting a branch is not allowed in this repository" >&2
-			exit 1
-		fi
-		;;
-	refs/remotes/*,commit)
-		# tracking branch
-		;;
-	refs/remotes/*,delete)
-		# delete tracking branch
-		if [ "$allowdeletebranch" != "true" ]; then
-			echo "*** Deleting a tracking branch is not allowed in this repository" >&2
-			exit 1
-		fi
-		;;
-	*)
-		# Anything else (is there anything else?)
-		echo "*** Update hook: unknown type of update to ref $refname of type $newrev_type" >&2
-		exit 1
-		;;
-esac
-
-# --- Finished
-exit 0
Binary file test-data/STR-FM/.git/index has changed
--- a/test-data/STR-FM/.git/info/exclude	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-# git ls-files --others --exclude-from=.git/info/exclude
-# Lines that start with '#' are comments.
-# For a project mostly in C, the following would be a good set of
-# exclude patterns (uncomment them if you want to use them):
-# *.[oa]
-# *~
--- a/test-data/STR-FM/.git/logs/HEAD	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-0000000000000000000000000000000000000000 3b65f228ef1817a991dcd2a7f0bc35eeabf56cd9 Arkarachai Fungtammasan <arkarachaifungtammasan@Arkarachai-Fungtammasans-MacBook-Pro.local> 1427821659 -0400	clone: from https://github.com/Arkarachai/STR-FM.git
-3b65f228ef1817a991dcd2a7f0bc35eeabf56cd9 d1b92cb33cf7d2942655e776f5499c5bbff18bde Arkarachai Fungtammasan <arkarachaifungtammasan@Arkarachai-Fungtammasans-MacBook-Pro.local> 1427821928 -0400	commit: initial commit
-d1b92cb33cf7d2942655e776f5499c5bbff18bde cebc3ab80ab25aa2af4ae265bd89387c2225a708 Arkarachai Fungtammasan <arkarachaifungtammasan@Arkarachai-Fungtammasans-MacBook-Pro.local> 1427900437 -0400	pull: Fast-forward
--- a/test-data/STR-FM/.git/logs/refs/heads/master	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-0000000000000000000000000000000000000000 3b65f228ef1817a991dcd2a7f0bc35eeabf56cd9 Arkarachai Fungtammasan <arkarachaifungtammasan@Arkarachai-Fungtammasans-MacBook-Pro.local> 1427821659 -0400	clone: from https://github.com/Arkarachai/STR-FM.git
-3b65f228ef1817a991dcd2a7f0bc35eeabf56cd9 d1b92cb33cf7d2942655e776f5499c5bbff18bde Arkarachai Fungtammasan <arkarachaifungtammasan@Arkarachai-Fungtammasans-MacBook-Pro.local> 1427821928 -0400	commit: initial commit
-d1b92cb33cf7d2942655e776f5499c5bbff18bde cebc3ab80ab25aa2af4ae265bd89387c2225a708 Arkarachai Fungtammasan <arkarachaifungtammasan@Arkarachai-Fungtammasans-MacBook-Pro.local> 1427900437 -0400	pull: Fast-forward
--- a/test-data/STR-FM/.git/logs/refs/remotes/origin/HEAD	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-0000000000000000000000000000000000000000 3b65f228ef1817a991dcd2a7f0bc35eeabf56cd9 Arkarachai Fungtammasan <arkarachaifungtammasan@Arkarachai-Fungtammasans-MacBook-Pro.local> 1427821659 -0400	clone: from https://github.com/Arkarachai/STR-FM.git
--- a/test-data/STR-FM/.git/logs/refs/remotes/origin/master	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-3b65f228ef1817a991dcd2a7f0bc35eeabf56cd9 d1b92cb33cf7d2942655e776f5499c5bbff18bde Arkarachai Fungtammasan <arkarachaifungtammasan@Arkarachai-Fungtammasans-MacBook-Pro.local> 1427822061 -0400	update by push
-d1b92cb33cf7d2942655e776f5499c5bbff18bde cebc3ab80ab25aa2af4ae265bd89387c2225a708 Arkarachai Fungtammasan <arkarachaifungtammasan@Arkarachai-Fungtammasans-MacBook-Pro.local> 1427900437 -0400	pull: fast-forward
Binary file test-data/STR-FM/.git/objects/00/65e52bd133e27ebd75aa0d23697f7e1dc887cb has changed
Binary file test-data/STR-FM/.git/objects/00/9d1901c2a9f8ce435cda891df89b4e2d11e895 has changed
Binary file test-data/STR-FM/.git/objects/0c/532395e56679a7f181e9ff6e329bf88b55030d has changed
Binary file test-data/STR-FM/.git/objects/15/6730cc56c5c045e7f73a940d0da23aa5083add has changed
--- a/test-data/STR-FM/.git/objects/19/637cfe7d85e2a9b41e6272003dcca6e01d9b58	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-x���M�0�aΩ�
��cK+:���c6qG�|����ӟ{k�T��e�Z�4.%�Al�Z<fr�(o��`C�l��G=����i͑{�X�K0'��@)_s�C}�o�W���u<&��?|��e	��}6��[��Mi�"E�+X��Y����_Z��³�st����G<�����l�U�
\ No newline at end of file
Binary file test-data/STR-FM/.git/objects/1c/4b24809f0d53625b803af1d880f737aef0cb5f has changed
Binary file test-data/STR-FM/.git/objects/1f/006fe62353d7163b93e72d28b5c15df1ebf982 has changed
Binary file test-data/STR-FM/.git/objects/1f/235bb203a8f4961f62c75d77cd64b50848b1a3 has changed
Binary file test-data/STR-FM/.git/objects/21/e77deba49b6ba73853e753f679e832d70cbb73 has changed
Binary file test-data/STR-FM/.git/objects/23/36b40e39da86d2c5537e56e722e3bb2ef8dae1 has changed
Binary file test-data/STR-FM/.git/objects/26/987f13c24ba62274e026714f47e223412b5714 has changed
Binary file test-data/STR-FM/.git/objects/27/6728965898e68eea856d7384f3744b9417c070 has changed
Binary file test-data/STR-FM/.git/objects/2e/5efab77e486426b7703146615f5ac0c7ff363c has changed
Binary file test-data/STR-FM/.git/objects/38/071906fb65431e06428693afbfc71bade5ec99 has changed
Binary file test-data/STR-FM/.git/objects/3b/65f228ef1817a991dcd2a7f0bc35eeabf56cd9 has changed
Binary file test-data/STR-FM/.git/objects/3c/72d9651d5cd1ad49875fe7326a541e1916737f has changed
Binary file test-data/STR-FM/.git/objects/3e/350089364d34ede5dd5f5311551f26a11c1c39 has changed
--- a/test-data/STR-FM/.git/objects/44/5252a9df64ead9f59e0ebe4004b53091d614ad	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-x��KN�0@Q�]E6����g	!����I\ZA�*ϕX>e
L����ڦ=�1�#�Dc�Rh)�1�D�gN��a:xȮ&Uk)dɾZ	6��TO	Ҍ\�͞g�i�S�>�����e�ͼ���rk|��<�.��/k�������l��[���<���z���_�ޏ�*�me�;�%��ԛ��/TU�
\ No newline at end of file
--- a/test-data/STR-FM/.git/objects/45/945a50d69e1f140444ed42393e6b2b08429a30	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-x�MA�@��+�i0*]�Vv�n=C�$$�L4��`��4�43i��{IU�v�4�E�u�Ε�W��1)�'h�ye����Z)����a�<�0C��QJ�c"b���P<qܶ��|r���"�b��Q�ч�!���}ֵ��pi�޴��[�,;
\ No newline at end of file
Binary file test-data/STR-FM/.git/objects/45/c0d4d9bacbf7a9a39119ad31423d6e55540a99 has changed
--- a/test-data/STR-FM/.git/objects/4c/f8e3de5255184c12e076995a8bce030669a5f0	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-xM�1
-�0Fa�x
-��?M�J��A��G������L�t{�B��6*����1L�W���k$Y��Q�_�������
p(�(��7��g
\ No newline at end of file
Binary file test-data/STR-FM/.git/objects/4e/28ca0263259df475c9bed544b4e89709a1ad9d has changed
Binary file test-data/STR-FM/.git/objects/4f/bb7af3188879a4ac1012b6a61424b52854ccb1 has changed
Binary file test-data/STR-FM/.git/objects/50/97abc625c65fa11f10b487a7f66f8ac9367b0b has changed
Binary file test-data/STR-FM/.git/objects/52/5f703fc64ce9e2010ea74b42dcbe596aded3c0 has changed
Binary file test-data/STR-FM/.git/objects/52/ab2a0c6715c5c4536b514b47e0ff74d9eb2404 has changed
Binary file test-data/STR-FM/.git/objects/59/4efff9886e723fd607f8735f1cd2d46d8b84a1 has changed
Binary file test-data/STR-FM/.git/objects/5a/fbc673a9b7e0f3279225900b7516750c60a863 has changed
Binary file test-data/STR-FM/.git/objects/60/5088643574c402f0d98b5ad2831dfe4580bd41 has changed
--- a/test-data/STR-FM/.git/objects/62/9c7d519f699ff169c0574d47f18907fb537a40	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-x}RMO�0ݳ�H��(�{��n��e���L�Uɞ�����QE-9D��{�7�og��~��6�H�ޓ���ֿ!(c�SM@{E�W\::�h܃�c�I��=��μ��h~1\i�b�Fo�A����T�6�����18�l�r�}
ڕ�VDF�tc�I�l���ɐkټ�1��f�z���|�z��z���t��XY����y��Ud׃OI��Y����)�^�5読�f�Y��z{�Mֺ�s���^��njݕ�gz���
-^�!�<I��ZR���۰�v"g"�y��e��vb�1�8Z'��/����]��/�p�n:�Y!��93%t�2�������>Qԡ������CM�^�6ız����Bm�x�d.'2�y�y_��
-k�SW�9����ԓ;
-�7/�?e�&1�
\ No newline at end of file
Binary file test-data/STR-FM/.git/objects/64/004ca2ec103e185ebb71abc7ebad45ad8da295 has changed
Binary file test-data/STR-FM/.git/objects/66/0a528da974f41924da3279a17a654eacd8e19f has changed
Binary file test-data/STR-FM/.git/objects/67/eff41a50756d383233a2c06aa4876562dfe223 has changed
Binary file test-data/STR-FM/.git/objects/6c/5f01970c2bcf5cfa391c41f6e9fe79c9afa1e7 has changed
Binary file test-data/STR-FM/.git/objects/6d/9c8eedb886195285e7a3f805ff494e5fe5f374 has changed
Binary file test-data/STR-FM/.git/objects/6e/cb2682fd362b24ddbb479fce5682a4ef49234a has changed
--- a/test-data/STR-FM/.git/objects/71/4758ec266030e060e7bf7881c32167a3fbd5b5	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-xMO�
�0c-Wp@��b��+�0 !��@���7;ql�|�s�E���R�5kG*5`P%�xJ�]�4o��%p����X���y�8��HI���J5�1L��/��,?�A�Jw:Z#��a�?�Vd
\ No newline at end of file
Binary file test-data/STR-FM/.git/objects/73/cd9f1c6004a5d97926a236e0c42eebf4984dad has changed
Binary file test-data/STR-FM/.git/objects/77/324ab9d4e51c9fb4e3e88df6aae9f9b25206ed has changed
Binary file test-data/STR-FM/.git/objects/79/1e14959d2b0f0e2531fe801a34c06fe908bf32 has changed
Binary file test-data/STR-FM/.git/objects/7f/48062955b52948798594aac973f23c749979a8 has changed
Binary file test-data/STR-FM/.git/objects/82/ef3b9f212da4d35b867d6e2a152ac42f7e3ff0 has changed
--- a/test-data/STR-FM/.git/objects/84/c85bf7959dbcbae4ae3a4bb528f262adc1c352	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-x��
� �~�)�$������,?N�׷W�|����KG�q�2�W�NN^:��~�gV
\ No newline at end of file
Binary file test-data/STR-FM/.git/objects/86/d2c135b7d3998986c837d8b6f66973f2aa9f71 has changed
Binary file test-data/STR-FM/.git/objects/88/37d328adbc2637a71678090382aae604eef6ab has changed
Binary file test-data/STR-FM/.git/objects/8a/c88565d8e69ce00536510540ee9aa228d5d39d has changed
--- a/test-data/STR-FM/.git/objects/8a/df7c6fbfab0ca7297f7081c30ce8a0d91f8500	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-x��An� @Ѯs
-.0#c �RU��ޠphP�1�4�}s�n����ZU�sxё�!��z|�
-dΖ���	��RN������#��!,�G�؋$&W�%���'9t��|��V���ؾUZ��l���	�׮M���z{3�#��`.�������/M_�M4�V����U��v���S�
\ No newline at end of file
Binary file test-data/STR-FM/.git/objects/8c/668ef6ee5d9eacd62eed1d651c7c771d9207d5 has changed
--- a/test-data/STR-FM/.git/objects/8d/1196beb5d1e6185dfd5980832adc1b59923258	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-x��MN�0@a�=E.�(q�%�`�
X#�qi4��2����ؾŧG��&
-Bz�ɬ�%�K�)�
zJ��4��3�j�v9p�.��������
-�A�c�w9�%��I��lc��yǉ�aS��-�;>pW/���C�oې���F��*� �2x�����U�_��K��QQX�Fs<P.�@�j�M~���W%
\ No newline at end of file
--- a/test-data/STR-FM/.git/objects/8e/fad9a0398c76c27ed5a32d2823c2291a694461	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-xULA
-�0�_�M�&M�6Z�6M�s�{� ^���T)8����쎏��\h&3�$C����s�T%���t��x�+yVntnT)��.yy|��<r�Z�a���dh6%�q���N����T'J�`�H{�2����n�����/"�U����ֹ���`�mSW�͡Vk]KӜ�N���Iv
\ No newline at end of file
Binary file test-data/STR-FM/.git/objects/96/bef7c4ed4ab4d1f4d8d610f0e28fc2fe44d81f has changed
--- a/test-data/STR-FM/.git/objects/98/365f3b89ce6a5102ba19e722cc0fa3fadba996	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-xK;
-�0��Sh��ҥP�I�	���c	Iz��y���vX��Lb�?u5'����q������k؋����-��WI`j�~��X98�<�	�Q����lY�,O[⇰��<R,�
\ No newline at end of file
Binary file test-data/STR-FM/.git/objects/98/c5f2bb6708714fe4914c2cc8503a57e6231ee0 has changed
Binary file test-data/STR-FM/.git/objects/9e/bdf5b88ba4eb8b5516c300edbcb7fc81f1882d has changed
--- a/test-data/STR-FM/.git/objects/9f/02b33db90fbe53f6b1cb8e1624946b90c91336	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-xe�A� E��Sxhb��,�/��ח4v�����o�i���(Ȱ�"$��*���Y�T6���z�ܓ9&wT��WvT���$m��=���he�VS�2�p��0h
\ No newline at end of file
Binary file test-data/STR-FM/.git/objects/a3/952a3a70cde7d0f67dc810da0f0e4b15010d84 has changed
Binary file test-data/STR-FM/.git/objects/a4/88e9604e7379ec68377aad8da69d3198e8d6ea has changed
Binary file test-data/STR-FM/.git/objects/a5/03245a189f1705559c7a0542e0e60b505cb72d has changed
Binary file test-data/STR-FM/.git/objects/a5/15ea668b78dc84569274a8297e02a882b6b38b has changed
Binary file test-data/STR-FM/.git/objects/a7/be6e358d1016c259ee3a4b99f0acfd8c0096a8 has changed
Binary file test-data/STR-FM/.git/objects/ab/bd57d29285d2253408bf5dea4cffd6f9e8aca6 has changed
Binary file test-data/STR-FM/.git/objects/b0/5e976751d04a76c3b333ae9d152a71c4089b7f has changed
Binary file test-data/STR-FM/.git/objects/b1/2233971833efeaec764f1e797cf103a97166dd has changed
Binary file test-data/STR-FM/.git/objects/b3/c05bb3894d71a5c88e2c0eb062dc417cdb2cd3 has changed
Binary file test-data/STR-FM/.git/objects/b4/cf9ef6300314d493478c391b89b962cfd48f0c has changed
Binary file test-data/STR-FM/.git/objects/b7/a1e182da880e8ab9991819f5054b7c19fa718d has changed
Binary file test-data/STR-FM/.git/objects/ba/74660506d90c514ee1d32bf4eb5ecb0af14075 has changed
Binary file test-data/STR-FM/.git/objects/bb/60aa1c10e6b0b84b68039eff7a5f9e8ccb8dd8 has changed
Binary file test-data/STR-FM/.git/objects/c4/b1eccc8a4e7795d12c385b3b3df67a9d16751f has changed
Binary file test-data/STR-FM/.git/objects/c6/5837b5335b15de2371ea6a394b76851a1e416c has changed
--- a/test-data/STR-FM/.git/objects/c9/ba1d8a454cbc8fd3ebbf002e33ced5f51b363f	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-x]PAn� �5�����i{����Ī�N�0 X7���ۨ>�;��3�ɸ}����]@�#�t,O�����P�q�	WDݖ��
-�;3�]���3Hm�o�]E�#U0t�A�#}�U�m!�A�k�N�:^���k���
� �^fPD��BDo4rvĤ>Ӄ6��9e�_�Z����a+x���e%{�G|pj�qoV���|�F�|��o	��/���VM�o3�'.A#�_��ŧӖ/�Ŗm��鍋��X�����
\ No newline at end of file
Binary file test-data/STR-FM/.git/objects/ca/fed8793e168e785956ccbbe2a7852ae5d0ca77 has changed
Binary file test-data/STR-FM/.git/objects/cc/1e650dbc118a79699d69196d6a1862d0dd1275 has changed
Binary file test-data/STR-FM/.git/objects/ce/013d9a241a080e0eaeca400d1919baf1630cb1 has changed
--- a/test-data/STR-FM/.git/objects/ce/bc3ab80ab25aa2af4ae265bd89387c2225a708	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-x��KN�0Y�}F�O۱�l�h�1���s"����z���)ؐ�t����B�PAO��8y�X�:f��q���Cv�%�9���,~����˱`M.��XS�ħ�}�����e�
>��K�5~�/��H6��ڵ�v���^�x����#N�zU�o�>��*0�#���!?��O�5V&
\ No newline at end of file
--- a/test-data/STR-FM/.git/objects/cf/0fbf511a978a6292d3205bf82cd5f71f30dd70	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-x��AN�0@Q�=E.��q�:�F��p�&6�h�*�"�O���/�~�����_z1E�-f�ٍs�(O�lG��QI�*7ٻ	��Ҥ���	#)A��A�����0|��6�h��8-���s��\
-?y7���H��^x�n��7c=R��5��/��o��K�瑹�9Z�x;Ek[����l"}W�
\ No newline at end of file
--- a/test-data/STR-FM/.git/objects/d0/4a2cd3a9a020709cacd39e082f3159804e01b8	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-xe�A� D��S���tӳ� ���P@�~��U��d��M!NO=�	�R�������2Ǽ��;���)��V�$X��R�g�XO��!q���v�W��U
-c��m0�
\ No newline at end of file
Binary file test-data/STR-FM/.git/objects/d0/ea4935294ab6ea1fe192fa929c308422f2699b has changed
Binary file test-data/STR-FM/.git/objects/d1/07d24e434cae7bc804d9a96bf8de7b8a094547 has changed
Binary file test-data/STR-FM/.git/objects/d1/6c4dcdc10fe565a1753d6eca3e3043f659d746 has changed
--- a/test-data/STR-FM/.git/objects/d1/b92cb33cf7d2942655e776f5499c5bbff18bde	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-x��AN�0EY��@�=��c�B�;$�0����$�\�� ��'`���z�R��t;"=��j	8;��'!A�Du9\�&@���M��a�(̚����Y�CvIFR�i�%��Sm����XN\��m��m|�����w��?�����������Z��'��>�l���>������^��|�svE
\ No newline at end of file
Binary file test-data/STR-FM/.git/objects/d6/51c39b1765fef9046cb5efab6918cf52f4f7a8 has changed
Binary file test-data/STR-FM/.git/objects/d6/684d880aa770620623907bddf1e6e984aab45b has changed
Binary file test-data/STR-FM/.git/objects/dc/98cf2e45c7b7b0ad9077b3392c109e13b5db1c has changed
Binary file test-data/STR-FM/.git/objects/de/31e8f100b06c28c0aeda43b0b98af94a393779 has changed
Binary file test-data/STR-FM/.git/objects/e4/0412b7fab11a34ee539a0f62dd97c2506fd453 has changed
Binary file test-data/STR-FM/.git/objects/e4/80084541ff270bec24fdedbbfc92354734a380 has changed
Binary file test-data/STR-FM/.git/objects/e8/fc4566e4342552693ed4ab5f9907db2952ac72 has changed
--- a/test-data/STR-FM/.git/objects/ea/6162c79ad57153ed2a5961f77875f075a878d3	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-x��Kn�0��)x��ϔ��h7�A@Ktl4��z��Y�f�R-eS�c��&�`�1a�B��r�3��g��臃��
-�c�`�l�K2g
-̘�'ZHL�#�y�S����q����~W.�������~�Uo�K����'�~�O�C�������vdV���V���|�^���+<U9
\ No newline at end of file
Binary file test-data/STR-FM/.git/objects/f0/7fd90b9063f41cf2428844ca34fb9c2c0a956d has changed
Binary file test-data/STR-FM/.git/objects/f1/973b380e2d4bd1e8adbb352b53f06f576b3312 has changed
Binary file test-data/STR-FM/.git/objects/f3/2cfaaeb649eae94f69b3d2cd199e748bc2261b has changed
Binary file test-data/STR-FM/.git/objects/f3/c9246e5c4686a47e86f36f854691d5e99ef68d has changed
Binary file test-data/STR-FM/.git/objects/f4/6d272789e77b9cc9fc9b282e7153a890203416 has changed
Binary file test-data/STR-FM/.git/objects/f6/acfc018f5b2cd0fdf9b8b1e809af07cef5c98f has changed
Binary file test-data/STR-FM/.git/objects/f6/b8b5549bbef48e5d212b463b7c0d93617f21d9 has changed
Binary file test-data/STR-FM/.git/objects/f6/d325b93ac860b1a7228d7bb18af956b941c0a7 has changed
Binary file test-data/STR-FM/.git/objects/f6/db9f6752293744129977b97ff76beb250d5eeb has changed
Binary file test-data/STR-FM/.git/objects/fd/b47064525be57239d995de06c60f9301b759d4 has changed
--- a/test-data/STR-FM/.git/objects/ff/125a58870195e28b6927b4a492f1df3dd5c97d	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-x���m�0@{�Z E}(E�\�A`$:6ɮB�AW����+[k����C�D�y��P��%��1��>Q�,�n�yHWcstTf��� 竷���R8
-ؚ�!M|�
s_<�,����ߔ[�w�zx���˦����l��X��2��`z����K��^Y��Ze��}H/k������' X�
\ No newline at end of file
--- a/test-data/STR-FM/.git/packed-refs	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-# pack-refs with: peeled fully-peeled
-3b65f228ef1817a991dcd2a7f0bc35eeabf56cd9 refs/remotes/origin/master
--- a/test-data/STR-FM/.git/refs/heads/master	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-cebc3ab80ab25aa2af4ae265bd89387c2225a708
--- a/test-data/STR-FM/.git/refs/remotes/origin/HEAD	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-ref: refs/remotes/origin/master
--- a/test-data/STR-FM/.git/refs/remotes/origin/master	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-cebc3ab80ab25aa2af4ae265bd89387c2225a708
Binary file test-data/STR-FM/test-data/.DS_Store has changed
--- a/test-data/changespacetounderscore_readname.py	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-import sys
-fd=open(sys.argv[1])
-output=open(sys.argv[2],'w')
-columntochange=int(sys.argv[3])-1  # default is 6-1=5
-lines=fd.xreadlines()
-for line in lines:
-	temp=line.strip().split('\t')
-	temp=filter(None,temp)
-	temp2=temp[columntochange].replace(' ','_')
-	product=temp[:columntochange]
-	product.append(temp2)
-	product.extend(temp[columntochange+1:])
-	output.writelines('\t'.join(product)+'\n')
-fd.close()
-output.close()
\ No newline at end of file
--- a/test-data/combinedprobforallelecombination.py	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-import sys
-import collections
-import math
-SAMPLINGCOL=11
-ALLELE1COL=7
-ALLELE2COL=8
-SIGNCOL=4
-readprofileCOL=2
-motifCOL=3
-filaname=sys.argv[1]
-fd=open(filaname)
-lines=fd.readlines()
-binomialcombine=collections.defaultdict(list)
-for line in lines:
-    temp=line.strip().split('\t')
-    allelelist=[]
-    allelelist.append(int(temp[ALLELE1COL-1]))
-    allelelist.append(int(temp[ALLELE2COL-1]))
-    allelelist.sort()
-    #allelelist=map(str,allelelist)
-    alleleave=str(allelelist[0])+'_'+str(allelelist[1])
-    #alleleave=str(sum(allelelist)/2.0)
-    ##alleleave=str(allelelist[0])+'_'+str(allelelist[1])
-    totalcov=len(temp[readprofileCOL-1].split(','))
-    motif=temp[motifCOL-1]
-    samplingvalue=float(temp[SAMPLINGCOL-1])
-    SIGN=1
-    binomialcombine[(totalcov,alleleave,motif)].append(SIGN*samplingvalue)
-allkeys= binomialcombine.keys()
-allkeys.sort()
-##print allkeys
-print 'read_depth'+'\t'+'allele'+'\t'+'heterozygous_prob'+'\t'+'motif'
-for key in allkeys:
-    ##templist=[str(key[0]),key[1],str(sum(binomialcombine[key])),key[2],str(map(str,(binomialcombine[key])))]
-    templist=[str(key[0]),key[1],str(sum(binomialcombine[key])),key[2]]
-
-    print '\t'.join(templist)
-#print allkeys#,binomialcombine
-
-
-
--- a/test-data/combineprobforallelecombination.xml	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,67 +0,0 @@
-<tool id="combineproballelecom" name="Combine probability to generate read profile " version="2.0.0">
-  <description>from the same allele combination</description>
-  <command interpreter="python2.7">combinedprobforallelecombination.py  $input > $output </command>
-
-  <inputs>
-    <param name="input" type="data" label="Select microsatellite length profile" />
-
-  </inputs>
-  <outputs>
-    <data name="output" format="tabular" />
-  </outputs>
-  <tests>
-    <!-- Test data with valid values -->
-    <test>
-      <param name="input" value="probvalueforhetero_out.txt"/>
-      <output name="output" file="combineprob_out.txt"/>
-    </test>
-
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**What it does**
-
-- This tool will combine probability that the allele combination can generated any read profile in the input. This is the last step to calculate probability to detect heterozygous for each allele combination and each depth.
-
-**Citation**
-
-When you use this tool, please cite **Fungtammasan A, Ananda G, Hile SE, Su MS, Sun C, Harris R, Medvedev P, Eckert K, Makova KD. 2015. Accurate Typing of Short Tandem Repeats from Genome-wide Sequencing Data and its Applications, Genome Research**
-
-**Input**
-
-The input format is the same as output from **Evaluate the probability of the allele combination to generate read profile** tool.
-
-- Column 1 = location of microsatellite locus.
-- Column 2 = length profile (length of microsatellite in each read that mapped to this location in comma separated format).
-- Column 3 = motif of microsatellite in this locus. The input file can contain more than three column.
-- Column 4 = homozygous/heterozygous label.
-- Column 5 = log based 10 of (the probability of homozygous/the probability of heterozygous)
-- Column 6 = Allele for most probable homozygous form.
-- Column 7 = Allele 1 for most probable heterozygous form.
-- Column 8 = Allele 2 for most probable heterozygous form.
-- Column 9 = Probability of the allele combination to generate given read profile.
-- Column 10 = Number of possible rearrangement of given read profile.
-- Column 11 = Probability of the allele combination to generate read profile with any rearrangement (Product of column 9 and column 10)
-- Column 12 = Read depth
-
-Only column 2,3,4,7,8,11 were used in calculation.
-
-**Output**
-
-
-The output will contain the following header and column
-
-- Line 1 header: read_depth	allele	heterozygous_prob	motif
-- Column 1 = read depth
-- Column 2 = allele combination
-- Column 3 = probability to detect heterozygous of that allele combination
-- Column 4 = motif
-
-
-
-
-</help>
-</tool>
--- a/test-data/fetchflank.xml	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-<tool id="fetchflank" name="Fetch flanking bases" version="1.0.0">
-  <description> of microsatellites and output as two fastq files in forward-forward orientation</description>
-  <command interpreter="python">pair_fetch_DNA_ff.py  $microsat_in_read $Leftflanking $Rightflanking $qualitycutoff $lengthofbasetocheckquality  </command>
-
-  <inputs>
-    <param name="microsat_in_read" type="data" label="Select data of microsatellites in reads" />
-    <param name="qualitycutoff" type="integer" value="20" label="Minimum quality score (Phred+33) for microsatellites and flanking regions" />
-    <param name="lengthofbasetocheckquality" type="integer" value="20" label="Length of flanking regions that require quality screening" />
-  </inputs>
-  <outputs>
-    <data format="fastq" name="Leftflanking" />
-    <data format="fastq" name="Rightflanking" />
-  </outputs>
-  <tests>
-    <!-- Test data with valid values -->
-    <test>
-      <param name="microsat_in_read" value="samplefq.snoope"/>
-      <param name="qualitycutoff" value="20"/>
-      <param name="lengthofbasetocheckquality" value="20"/>
-      <output name="Leftflanking" file="microsatellite_flanking_L.fastq"/>
-      <output name="Rightflanking" file="microsatellite_flanking_R.fastq"/>
-    </test>
-
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool will fetch flanking regions around microsatellites, screen for quality score at microsatellites and adjacent flanking regions, and output two fastq files containing flanking regions in forward-forward direction.
-
-- This tool assumes that the quality score is Phred+33, such as Sanger fastq.
-- Reads that have either left or right flanking regions shorter than the length of flanking regions that require quality screening will be removed.
-
-**Citation**
-When you use this tool, please cite **Fungtammasan A, Ananda G, Hile SE, Su MS, Sun C, Harris R, Medvedev P, Eckert K, Makova KD. 2015. Accurate Typing of Short Tandem Repeats from Genome-wide Sequencing Data and its Applications, Genome Research**
-
-**Input**
-
-The input files need to be in the same format as output from **microsatellite detection program**. This format contains **length of repeat**, **length of left flanking region**, **length of right flanking region**, **repeat motif**, **hamming (editing) distance**, **read name**, **read sequence**, **read quality score**
-
-**Output**
-
-The output will be the two fastq files. The first file contains left flank regions. The second file contains right flanking regions.
-
-**Example**
-
-- Suppose we detected the microsatellites from short reads ::
-
-	6	40	54	G	0	SRR345592.75000006 HS2000-192_107:1:63:5822:176818_1_per1_1	TACCCTCCTGTCTTCCCAGACTGATTTCTGTTCCTGCCCTggggggTTCTTGACTCCTCTGAATGGGTACGGGAGTGTGGACCTCAGGGAGGCCCCCTTG	GGGGGGGGGGGGGGGGGFGGGGGGGGGFEGGGGGGGGGGG?FFDFGGGGGG?FFFGGGGGDEGGEFFBEFCEEBD@BACB*?=99(/=5'6=4:CCC*AA
-
-
-- We want to get fastq files of flanking regions around microsatellite with quality score at least 20 on Phred +33
-
-- Then the program will report these two fastq files ::
-
-	@SRR345592.75000006 HS2000-192_107:1:63:5822:176818_1_per1_1
-	TACCCTCCTGTCTTCCCAGACTGATTTCTGTTCCTGCCCT
-	+SRR345592.75000006 HS2000-192_107:1:63:5822:176818_1_per1_1
-	GGGGGGGGGGGGGGGGGFGGGGGGGGGFEGGGGGGGGGGG
-
-
-	@SRR345592.75000006 HS2000-192_107:1:63:5822:176818_1_per1_1
-	TTCTTGACTCCTCTGAATGGGTACGGGAGTGTGGACCTCAGGGAGGCCCCCTTG
-	+SRR345592.75000006 HS2000-192_107:1:63:5822:176818_1_per1_1
-	GGGGG?FFFGGGGGDEGGEFFBEFCEEBD@BACB*?=99(/=5'6=4:CCC*AA
-
-
-
-</help>
-</tool>
--- a/test-data/heteroprob.py	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,199 +0,0 @@
-### import libraries ###
-import sys
-import collections, math
-import heapq
-import itertools
-
-
-
-### basic function ###
-def permuterepeat(n,rlist):
-    f = math.factorial
-    nfac=f(n)
-    rfaclist=[f(i) for i in rlist]
-    for rfac in rfaclist:
-        nfac=nfac/rfac
-    return nfac
-
-def nCr(n,r):
-    f = math.factorial
-    return f(n) / f(r) / f(n-r)
-
-def averagelist(a,b,expectedlevelofminor):
-    product=[]
-    for i in range(len(a)):
-        product.append((1-expectedlevelofminor)*a[i]+expectedlevelofminor*b[i])
-
-    return product
-
-def complement_base(read):
-    collect=''
-    for i in read:
-        if i.upper()=='A':
-            collect+='T'
-        elif i.upper()=='T':
-            collect+='A'
-        elif i.upper()=='C':
-            collect+='G'
-        elif i.upper()=='G':
-            collect+='C'
-    return collect
-def makeallpossible(read):
-    collect=[]
-    for i in range(len(read)):
-        tmp= read[i:]+read[:i]
-        collect.append(tmp)
-        collect.append(complement_base(tmp))
-    return collect
-
-def motifsimplify(base):
-    '''str--> str
-    '''
-    motiflength=len(base)
-    temp=list(set(ALLMOTIF[motiflength]).intersection(set(makeallpossible(base))))
-
-    return temp[0]
-
-def majorallele(seq):
-    binseq=list(set(seq))
-    binseq.sort(reverse=True)   # highly mutate mode
-    #binseq.sort()              # majority mode
-    storeform=''
-    storevalue=0
-    for i in binseq:
-        if seq.count(i)>storevalue:
-            storeform=i
-            storevalue=seq.count(i)
-
-    return int(storeform)
-
-### decide global parameter ###
-COORDINATECOLUMN=1
-ALLELECOLUMN=2
-MOTIFCOLUMN=3
-inputname=sys.argv[1]
-errorprofile=sys.argv[2]
-EXPECTEDLEVELOFMINOR=float(sys.argv[3])
-if EXPECTEDLEVELOFMINOR >0.5:
-	try:
-		errorexpectcontribution=int('a')
-	except Exception, eee:
-		print eee
-		stop_err("Expected contribution of minor allele must be at least 0 and not more than 0.5")
-MINIMUMMUTABLE=0 ###1.2*(1.0/(10**8))  #http://www.ncbi.nlm.nih.gov/pubmed/22914163 Kong et al 2012
-
-
-## Fixed global variable
-ALLREPEATTYPE=[1,2,3,4]
-ALLREPEATTYPENAME=['mono','di','tri','tetra']
-monomotif=['A','C']
-dimotif=['AC','AG','AT','CG']
-trimotif=['AAC','AAG','AAT','ACC','ACG','ACT','AGC','AGG','ATC','CCG']
-tetramotif=['AAAC','AAAG','AAAT','AACC','AACG','AACT','AAGC','AAGG','AAGT','AATC','AATG','AATT',\
-'ACAG','ACAT','ACCC','ACCG','ACCT','ACGC','ACGG','ACGT','ACTC','ACTG','AGAT','AGCC','AGCG','AGCT',\
-'AGGC','AGGG','ATCC','ATCG','ATGC','CCCG','CCGG','AGTC']
-ALLMOTIF={1:monomotif,2:dimotif,3:trimotif,4:tetramotif}
-monorange=range(5,60)
-dirange=range(6,60)
-trirange=range(9,60)
-tetrarange=range(12,80)
-ALLRANGE={1:monorange,2:dirange,3:trirange,4:tetrarange}
-
-#########################################
-######## Prob calculation sector ########
-#########################################
-def multinomial_prob(majorallele,STRlength,motif,probdatabase):
-    '''int,int,str,dict-->int
-    ### get prob for each STRlength to be generated from major allele
-    '''
-    #print (majorallele,STRlength,motif)
-    prob=probdatabase[len(motif)][motif][majorallele][STRlength]
-    return prob
-
-################################################
-######## error model database sector ###########
-################################################
-
-## structure generator
-errormodeldatabase={1:{},2:{},3:{},4:{}}
-sumbymajoralleledatabase={1:{},2:{},3:{},4:{}}
-for repeattype in ALLREPEATTYPE:
-    for motif in ALLMOTIF[repeattype]:
-        errormodeldatabase[repeattype][motif]={}
-        sumbymajoralleledatabase[repeattype][motif]={}
-        for motifsize1 in ALLRANGE[repeattype]:
-            errormodeldatabase[repeattype][motif][motifsize1]={}
-            sumbymajoralleledatabase[repeattype][motif][motifsize1]=0
-            for motifsize2 in ALLRANGE[repeattype]:
-                errormodeldatabase[repeattype][motif][motifsize1][motifsize2]=MINIMUMMUTABLE
-#print errormodeldatabase
-## read database
-
-## get read count for each major allele
-fd=open(errorprofile)
-lines=fd.readlines()
-for line in lines:
-    temp=line.strip().split('\t')
-    t_major=int(temp[0])
-    t_count=int(temp[2])
-    motif=temp[3]
-    sumbymajoralleledatabase[len(motif)][motif][t_major]+=t_count
-fd.close()
-##print sumbymajoralleledatabase
-
-## get probability
-fd=open(errorprofile)
-lines=fd.readlines()
-for line in lines:
-    temp=line.strip().split('\t')
-    t_major=int(temp[0])
-    t_read=int(temp[1])
-    t_count=int(temp[2])
-    motif=temp[3]
-    if sumbymajoralleledatabase[len(motif)][motif][t_major]>0:
-        errormodeldatabase[len(motif)][motif][t_major][t_read]=t_count/(sumbymajoralleledatabase[len(motif)][motif][t_major]*1.0)
-        #errormodeldatabase[repeattype][motif][t_major][t_read]=math.log(t_count/(sumbymajorallele[t_major]*1.0))
-
-    #else:
-    #    errormodeldatabase[repeattype][motif][t_major][t_read]=0
-fd.close()
-#print errormodeldatabase
-#print math.log(100,10)
-#########################################
-######## input reading sector ###########
-#########################################
-
-
-
-fd = open(inputname)
-##fd=open('sampleinput_C.txt')
-lines=fd.xreadlines()
-for line in lines:
-    i_read=[]
-    i2_read=[]
-    temp=line.strip().split('\t')
-    i_coordinate=temp[COORDINATECOLUMN-1]
-    i_motif=motifsimplify(temp[MOTIFCOLUMN-1])
-    i_read=temp[ALLELECOLUMN-1].split(',')
-    i_read=map(int,i_read)
-    depth=len(i_read)
-    heteromajor1=int(temp[6])
-    heteromajor2=int(temp[7])
-
-### calculate the change to detect combination (using error profile)
-    heterozygous_collector=0
-    alist=[multinomial_prob(heteromajor1,x,i_motif,errormodeldatabase)for x in i_read]
-    blist=[multinomial_prob(heteromajor2,x,i_motif,errormodeldatabase)for x in i_read]
-
-    ablist=averagelist(alist,blist,EXPECTEDLEVELOFMINOR)
-
-    if 0 in ablist:
-        continue
-    heterozygous_collector=reduce(lambda y, z: y*z,ablist )
-
-### prob of combination (using multinomial distribution)
-    frequency_distribution=[len(list(group)) for key, group in itertools.groupby(i_read)]
-    ## print frequency_distribution
-    expandbypermutation=permuterepeat(depth,frequency_distribution)
-
-    print line.strip()+'\t'+str(heterozygous_collector)+'\t'+str(expandbypermutation)+'\t'+str(expandbypermutation*heterozygous_collector)+'\t'+str(depth)
--- a/test-data/microsatcompat.py	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-import sys
-# remove all read that have unmatch microsat
-# check only one line at a time
-def complement_base(read):
-    collect=''
-    for i in read:
-        if i.upper()=='A':
-            collect+='T'
-        elif i.upper()=='T':
-            collect+='A'
-        elif i.upper()=='C':
-            collect+='G'
-        elif i.upper()=='G':
-            collect+='C'
-    return collect
-
-def makeallpossible(read):
-    collect=[]
-    for i in range(len(read)):
-        tmp= read[i:]+read[:i]
-        collect.append(tmp)
-        collect.append(complement_base(tmp))
-    return collect
-
-
-fd=open(sys.argv[1])
-lines=fd.xreadlines()
-firstcolumn=int(sys.argv[2])-1 #4
-secondcolumn=int(sys.argv[3])-1 # 10
-for line in lines:
-    temp=line.strip().split('\t')
-    temp=filter(None,temp)
-    micro1=temp[firstcolumn]
-    micro2=temp[secondcolumn]
-    if micro1 in makeallpossible(micro2):
-        print line.strip()
\ No newline at end of file
--- a/test-data/microsatcompat.xml	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-<tool id="microsatcompat" name="Check microsatellites motif compatibility" version="1.0.0">
-  <description> </description>
-  <command interpreter="python">microsatcompat.py $input $column1 $column2 > $output </command>
-
-  <inputs>
-    <param name="input" type="data" label="Select input" />
-    <param name="column1" type="integer" value="4" label="First column number" />
-    <param name="column2" type="integer" value="10" label="Second column number" />
-  </inputs>
-  <outputs>
-    <data format="tabular" name="output" />
-
-  </outputs>
-  <tests>
-    <!-- Test data with valid values -->
-    <test>
-      <param name="input" value="microsatcompat_in.txt"/>
-      <param name="column1" value="4"/>
-      <param name="column2" value="10"/>
-      <output name="output" file="microsatcompat_out.txt"/>
-    </test>
-
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool is used to select only the input lines which have compatible microsatellite motifs between two columns. Compatible here is defined as the microsatellites motif that are complementary or have the same sequence when change starting point of motif. For example, **A** is the same as **T**. Also, **AGG** is the same as **GAG**.
-
-For TRFM pipeline (profiling microsatellites in short read data), this tool can be used to make sure that the microsatellites in the reads have the same motif as the microsatellites in the reference at the corresponding mapped location.
-
-**Citation**
-
-When you use this tool, please cite **Fungtammasan A, Ananda G, Hile SE, Su MS, Sun C, Harris R, Medvedev P, Eckert K, Makova KD. 2015. Accurate Typing of Short Tandem Repeats from Genome-wide Sequencing Data and its Applications, Genome Research**
-
-**Input**
-
-The input files can be any tab delimited file.
-
-If this tool is used in TRFM microsatellite profiling, it should contains:
-
-- Column 1 = microsatellite location in reference chromosome
-- Column 2 = microsatellite location in reference start
-- Column 3 = microsatellite location in reference stop
-- Column 4 = microsatellite location in reference motif
-- Column 5 = microsatellite location in reference length
-- Column 6 = microsatellite location in reference motif size
-- Column 7 = length of microsatellites (bp)
-- Column 8 = length of left flanking regions (bp)
-- Column 9 = length of right flanking regions (bp)
-- Column 10 = repeat motif (bp)
-- Column 11 = hamming distance
-- Column 12 = read name
-- Column 13 = read sequence with soft masking of microsatellites
-- Column 14 = read quality (the same Phred score scale as input)
-- Column 15 = read name (The same as column 12)
-- Column 16 = chromosome
-- Column 17 = left flanking region start
-- Column 18 = left flanking region stop
-- Column 19 = microsatellite start as infer from pair-end
-- Column 20 = microsatellite stop as infer from pair-end
-- Column 21 = right flanking region start
-- Column 22 = right flanking region stop
-- Column 23 = microsatellite length in reference
-- Column 24 = microsatellite sequence in reference
-
-**Output**
-
-The same as input format.
-
-
-</help>
-</tool>
--- a/test-data/microsatellite.py	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1271 +0,0 @@
-#!/usr/bin/env python
-"""
-Snoop thru a fasta file looking for microsatellite repeats of given periods
-Output format: length_of_repeat left_flank_length right_flank_length  repeat_motif  hamming_distance  read_name read_sequence read_quality  (additional columns)
-
-If --r option turned on, output format will have additional columns behind:
-read_name read_chr  pre_s pre_e tr_s  tr_e  suf_s suf_e tr_len  tr_ref_seq
-
-pre_s           where the read start
-pre_e           the last position before microsatellite
-tr_s            where microsatellite start
-tr_e            where microsatellite end
-suf_s           first base after microsatellite
-tr_ref_seq      reference sequence corresponding to microsatellite
-
-* output positions are 0 based
-
-:Author: Chen Sun (cxs1031@cse.psu.edu); Bob Harris (rsharris@bx.psu.edu)
-
-modifing log:
-
-09/27/2013
-replace function dense_intervals with function non_negative_intervals, which do not need to import such file.
-
-10/18/2013
-modify function find_repeat_element to get a quick speed, under the condition that hamming_distance = 0, which means do not allowed any mutation/indel
-
-02/25/2014
-add function that can deal with mapped reads
-with additional output
-
-02/28/2014
-modify the 0-based end point, as in 0-base area, it is half-open [ )
-so the 0-based site, should always be added by 1
-
-03/05/2014
-deal with multi-fasta
-"""
-from sys          import argv,stdin,stderr,exit
-from string       import maketrans
-from md5          import new as md5_new
-import re
-#from pyfracluster import dense_intervals
-
-def usage(s=None):
-    message = """
-usage: microsat_snoop [fasta_file] [options]
-  <fasta_file>                Name of file to read sequences from;  if absent,
-                              sequences are read from stdin
-  --fasta                     Input file is in fasta format
-                              (this is the default)
-  --fastq                     Input file is in fastq format
-                              (default is fasta unless filename is .fastq)
-  --fastq:noquals             Input file is in fastq format, but discard quals
-  --sam                       Input file is SAM file
-  --r                         Indicate additional output information, if indicated,
-                              --ref option is mendatory
-  --ref=<filepath>            Reference file (absolute) path
-  --period=<length>           (mandatory,cumulative) repeat length(s) to be
-                              searched for
-                              <length> is expected to be small, less than 10
-                              <length> can also be a comma-separated list, or
-                              a range <low>..<high>
-  --rate=<fraction>           control the candidate repeat interval detector;
-                              it will consider intervals with at least
-                              <fraction> of matches when shifted by the period;
-                              <fraction> is between 0 and 1 and can be either a
-                              real number or <n>/<d>
-                              (default is 6/7)
-  --minlength=<length>        minimum length of intervals reported, in bp
-                              (default is 20)
-  --progress=<count>          how often to report the sequence we're searching
-                              (default is no progress report)
-  --allowduplicates           process all input sequences
-                              (this is the default)
-  --noduplicates              ignore any input sequence that's the same as an
-                              earlier sequence
-  --nonearduplicates          ignore any input sequence that has the same first
-                              100 bp as an earlier sequence
-  --nonearduplicate=<length>  ignore any input sequence that has the same first
-                              <length> bp as an earlier sequence
-  --hamming=<count>           Don't report candidate repeat intervals that have
-                              more than <count> mismatches
-                              (default is to do no such filtering)
-  --prefix=<length>           Don't report candidate repeat intervals that
-                              start within <length> of the sequence start
-                              (default is to do no such filtering)
-  --suffix=<length>           Don't report candidate repeat intervals that
-                              end within <length> of the sequence end
-                              (default is to do no such filtering)
-  --subsample=<k>/<n>         Process only the <k>th sequence of every group of
-                              <n> sequences;  <k> ranges from 1 to <n>
-  --multipleruns              Consider all candidate intervals in a sequence
-                              (default is to consider only the longest)
-  --partialmotifs             Consider microatelites with a partial motif
-                              (default is to consider only whole motifs)
-  --splitbyvalidity           Preprocess sequences, splitting at Ns;  this
-                              prevents candidates from including Ns
-                              (default is not to split)
-  --noflankdisplay            Show entire sequence as flanking regions
-                              (this is the default)
-  --flankdisplay=<length>     Limit length of flanking regions shown
-  --readnamesuffix=<string>   Root of suffix to append to read names;  e.g. 1
-                              for forward, 2 for reverse;  this triggers other
-                              info to be included in the suffix
-                              (default is "1" for fastq;  no suffix for fasta)
-  --head=<number>             limit the number of sequences processed
-  --markend                   Write a marker line upon completion
-                              (default is not to write a marker)
-  --help=details              Describe the process, and quit"""
-
-    if (s == None): exit (message)
-    else:           exit ("%s\n%s" % (s,message))
-
-
-detailedDescription = """In broad terms, the process works as follows:
-
-(1) Identify intervals that are highly correlated with the interval shifted by
-    P (the repeat period).  These intervals are called "runs" or "candidates".
-    The level of correlation required is controlled by rateThreshold.
-    Depending on whether we want to look for more than one microsat, we either
-    find the longest such run (simple algorithm) or many runs (more complicated
-    algorithm). The following steps are then performed on each run.
-
-(2) Find the most likely repeat motif in the run.  This is done by counting
-    all kmers (of length P) and choosing the most frequent.  If that kmer is
-    itself covered by a sub-repeat we discard this run.  The idea is that we
-    can ignore a 6-mer like ACGACG because we will find it when we are looking
-    for 3-mers.
-
-(3) Once we identify the most likely repeat motif, we then modify the
-    interval, adjusting start and end to find the interval that has the fewest
-    mismatches vs. a sequence of the motif repeated (hamming distance).  Only
-    whole copies of the motif are considered.
-
-(4) At this point we have a valid microsat interval (in the eyes of the
-    program). It is subjected to some filtering stages (hamming distance or too
-    close to an end), and if it satisfies those conditions, it's reported to
-    the user."""
-
-def main():
-    global debug
-
-    #=== parse the command line ===
-
-    inputFilename         = None
-    referenceFileName     = None #add by Chen Sun on 02/25
-    inputFormat           = None
-    repeatPeriods         = []
-    rateThreshold         = 6 / 7.0
-    lengthThreshold       = 20
-    reportProgress        = None
-    discardDuplicates     = False
-    discardNearDuplicates = False
-    nearDuplicatePrefix   = 100
-    hammingThreshold      = 0
-    prefixThreshold       = None
-    suffixThreshold       = None
-    subsampleK            = None
-    subsampleN            = None
-    reportMultipleRuns    = False
-    allowPartialMotifs    = False
-    splitByValidity       = False
-    flankDisplayLimit     = None
-    readNameSuffix        = None
-    headLimit             = None
-    markEndOfFile         = False
-    additionalInfo        = False
-    debug                 = []
-
-    for arg in argv[1:]:
-        if (arg == "--fasta"):
-            inputFormat = "fasta"
-        elif (arg == "--fastq"):
-            inputFormat = "fastq"
-        elif (arg == "--fastq:noquals"):
-            inputFormat = "fastq:noquals"
-        elif (arg == "--sam"):
-            inputFormat = "sam"
-        elif (arg == "--r"):
-            additionalInfo = True
-        elif (arg.startswith("--ref=")):
-            referenceFileName = arg.split("=",1)[1]
-        elif (arg.startswith("--period=")):
-            val = arg.split("=",1)[1]
-            for period in val.split(","):
-                if (".." in period):
-                    (lowPeriod,highPeriod) = period.split("..",1)
-                    lowPeriod  = int(lowPeriod)
-                    highPeriod = int(highPeriod)
-                    for period in xrange(lowPeriod,highPeriod+1):
-                        repeatPeriods += [period]
-                else:
-                    repeatPeriods += [int(period)]
-        elif (arg.startswith("--rate=")):
-            val = arg.split("=",1)[1]
-            rateThreshold = float_or_fraction(val)
-            assert (0.0 < rateThreshold <= 1.0), "%s not a valid rate" % val
-        elif (arg.startswith("--minlength=")):
-            val = arg.split("=",1)[1]
-            lengthThreshold = int(val)
-            assert (lengthThreshold >= 0)
-        elif (arg.startswith("--progress=")):
-            val = arg.split("=",1)[1]
-            reportProgress = int(val)
-        elif (arg == "--allowduplicates"):
-            discardDuplicates     = False
-            discardNearDuplicates = False
-        elif (arg == "--noduplicates"):
-            discardDuplicates     = True
-            discardNearDuplicates = False
-        elif (arg == "--nonearduplicates"):
-            discardDuplicates     = False
-            discardNearDuplicates = True
-        elif (arg.startswith("--nonearduplicate=")):
-            val = arg.split("=",1)[1]
-            discardDuplicates     = False
-            discardNearDuplicates = True
-            nearDuplicatePrefix   = int(val)
-            assert (nearDuplicatePrefix > 0)
-        elif (arg.startswith("--hamming=")):
-            val = arg.split("=",1)[1]
-            hammingThreshold = int(val)
-            assert (hammingThreshold >= 0)
-        elif (arg.startswith("--prefix=")):
-            val = arg.split("=",1)[1]
-            prefixThreshold = int(val)
-            assert (prefixThreshold >= 0)
-        elif (arg.startswith("--suffix=")):
-            val = arg.split("=",1)[1]
-            suffixThreshold = int(val)
-            assert (suffixThreshold >= 0)
-        elif (arg.startswith("--subsample=")):
-            val = arg.split("=",1)[1]
-            (k,n) = val.split("/",2)
-            subsampleK = int(k)
-            subsampleN = int(n)
-            assert (0 < subsampleK <= subsampleN)
-        elif (arg == "--multipleruns"):
-            reportMultipleRuns = True
-        elif (arg == "--partialmotifs"):
-            allowPartialMotifs = True
-        elif (arg == "--splitbyvalidity"):
-            splitByValidity = True
-        elif (arg == "--noflankdisplay"):
-            flankDisplayLimit = None
-        elif (arg.startswith("--flankdisplay=")):
-            val = arg.split("=",1)[1]
-            flankDisplayLimit = int(val)
-            assert (flankDisplayLimit >= 0)
-        elif (arg.startswith("--readnamesuffix")):
-            readNameSuffix = arg.split("=",1)[1]
-        elif (arg.startswith("--head=")):
-            headLimit = int_with_unit(arg.split("=",1)[1])
-        elif (arg == "--markend"):
-            markEndOfFile = True
-        elif (arg == "--help=details"):
-            exit (detailedDescription)
-        elif (arg.startswith("--debug=")):
-            debug += (arg.split("=",1)[1]).split(",")
-        elif (arg.startswith("--")):
-            usage("unrecognized option: %s" % arg)
-        elif (inputFilename == None):
-            inputFilename = arg
-        else:
-            usage("unrecognized option: %s" % arg)
-
-    #=== determine periods of interest ===
-
-    if (repeatPeriods == []):
-        usage("you gotta give me a repeat period")
-
-    if (additionalInfo == True):
-        if (referenceFileName == None):
-            usage("reference file path needed. use --ref=<reference> to indicate")
-
-    periodSeed = {}
-    for period in repeatPeriods:
-        if (period < 1): usage("period %d is not valid" % period)
-        periodSeed[period] = True
-
-    repeatPeriods = [period for period in periodSeed]
-    repeatPeriods.sort()
-
-    #=== determine input format ===
-
-    if   (inputFormat == "fasta"):           sequence_reader = fasta_sequences
-    elif (inputFormat == "fastq"):           sequence_reader = fastq_sequences
-    elif (inputFormat == "fastq:noquals"):   sequence_reader = fastq_sequences
-    elif (inputFormat == "sam"):             sequence_reader = sam_sequences
-    elif (inputFilename == None):            sequence_reader = fasta_sequences
-    elif (inputFilename.endswith(".fastq")): sequence_reader = fastq_sequences
-    elif (inputFilename.endswith(".fq")):    sequence_reader = fastq_sequences
-    elif (inputFilename.endswith(".sam")):   sequence_reader = sam_sequences
-    else:                                    sequence_reader = fasta_sequences
-
-    if (inputFilename != None): inputF = file(inputFilename,"rt")
-    else:                       inputF = stdin
-
-    if   (readNameSuffix == None) \
-     and (sequence_reader == fastq_sequences) \
-     and (inputFormat != "fastq:noquals"):
-        readNameSuffix = "1"
-
-    #=== process the sequences ===
-
-    refSequence = {}
-    rightName = ""
-    sequence = ""
-    if additionalInfo:
-        firstFasta = True
-        originalRefF = open(referenceFileName)
-        for line in originalRefF.readlines():
-            line = line.replace('\r','')
-            line = line.replace('\n','')
-            if line.startswith(">"):
-                if firstFasta:
-                    firstFasta = False
-                else:
-                    refSequence[rightName] = sequence
-                rightName = line[1:]
-                sequence = ""
-                continue
-            sequence += line
-        originalRefF.close()
-        refSequence[rightName] = sequence
-
-    sequenceSeen = {}
-
-    numSequences = 0
-    for seqInfo in sequence_reader(inputF):
-        numSequences += 1
-        if (headLimit != None) and (numSequences > headLimit):
-            print >>stderr, "limit of %d sequences reached" % headLimit
-            break
-
-        if (sequence_reader == sam_sequences):
-            #seqName,"".join(seqNucs).upper().translate(nonDnaMap), refName, pre_s, cigar
-            (name, sequence, refName, pre_s, cigar) = seqInfo
-            quals = None
-        elif (sequence_reader == fastq_sequences):
-            (name,sequence,quals) = seqInfo
-            if (inputFormat == "fastq:noquals"): quals = None
-        else:
-            (name,sequence) = seqInfo
-            quals = None
-
-        if (reportProgress != None) and (numSequences % reportProgress == 0):
-            print >>stderr, "%s %d" % (name,numSequences)
-
-        # if we're subsampling and not interested in this sequence, skip it
-
-        if (subsampleN != None):
-            if ((numSequences-1) % subsampleN != (subsampleK-1)):
-                continue
-
-        # if this sequence is shorter than the length of interest, skip it
-
-        seqLen = len(sequence)
-        if (seqLen < period) or (seqLen < lengthThreshold): continue
-
-        # if we're not interested in duplicates and this is one, skip it;
-        # note that we assume no hash collisions occur, i.e. that all hash
-        # matches are truly sequence matches
-
-        if (discardDuplicates):
-            h = hash108(sequence)
-            if (h in sequenceSeen): continue
-            sequenceSeen[h] = True
-        elif (discardNearDuplicates):
-            h = hash108(sequence[:nearDuplicatePrefix])
-            if (h in sequenceSeen): continue
-            sequenceSeen[h] = True
-
-        # split the sequence into chunks of valid nucleotides
-
-        if (splitByValidity):
-            chunks = [(start,end) for (start,end) in nucleotide_runs(sequence)]
-        else:
-            chunks = [(0,len(sequence))]
-
-        # evaluate for each period of interest
-
-        for period in repeatPeriods:
-
-            # operate on each chunk
-
-            for (chunkStart,chunkEnd) in chunks:
-                chunkLen = chunkEnd - chunkStart
-                if (chunkLen < period) or (chunkLen < lengthThreshold): continue
-
-                if ("validity" in debug) or ("correlation" in debug) or ("runs" in debug):
-                    print >>stderr, ">%s_%d_%d" % (name,chunkStart,chunkEnd)
-
-                # compute correlation sequence
-
-                corr = correlation_sequence(sequence,period,chunkStart,chunkEnd)
-
-                if ("correlation" in debug) or ("runs" in debug):
-                    print >>stderr, sequence[chunkStart:chunkEnd]
-                    print >>stderr, corr
-
-                # find runs (candidates for being a microsat)
-
-                if (reportMultipleRuns):
-                    runs = all_suitable_runs(corr,lengthThreshold-period,rateThreshold, hammingThreshold)
-                else:
-                    runs = longest_suitable_run(corr,lengthThreshold,rateThreshold)
-                if (runs == []): continue
-
-
-                if ("runs" in debug):
-                    for (start,end) in runs:
-                        run = [" "] * seqLen
-                        for ix in xrange(start-period,end):
-                            run[ix] = "*"
-                        print >>stderr, "".join(run)
-
-                if ("candidates" in debug):
-                    for (start,end) in runs:
-                        print >>stderr, "%s %d %d" % (name,start,end)
-
-                # process runs and report those that pass muster
-
-                runCount = 0
-                for (start,end) in runs:
-                    runCount += 1
-
-                    start = chunkStart + start - period
-                    end   = chunkStart + end
-
-                    (kmer,d,start,end) = find_repeat_element(hammingThreshold, period,sequence,start,end,allowPartials=allowPartialMotifs)
-                    if (kmer == None): continue    # (no useful repeat kmer was found)
-
-                    rptExtent = end - start
-                    prefixLen = start
-                    suffixLen = seqLen - end
-                    if (rptExtent <= period): continue
-                    if (hammingThreshold != None) and (d         > hammingThreshold): continue
-                    if (prefixThreshold  != None) and (prefixLen < prefixThreshold):  continue
-                    if (suffixThreshold  != None) and (suffixLen < suffixThreshold):  continue
-
-                    if (flankDisplayLimit == None):
-                        seq = sequence[:start] \
-                            + sequence[start:end].lower() \
-                            + sequence[end:]
-                    else:
-                        seq = sequence[max(chunkStart,start-flankDisplayLimit):start] \
-                            + sequence[start:end].lower() \
-                            + sequence[end:min(chunkEnd,end+flankDisplayLimit)]
-                    reportName = name
-                    if (readNameSuffix != None):
-                        reportName += "_"+readNameSuffix+"_per"+str(period)+"_"+str(runCount)
-                    if (quals == None or quals == "." or quals == "\t."): quals = "\t."
-                    else:               quals = "\t" + quals
-                    if not additionalInfo:
-                        print "%d\t%d\t%d\t%s\t%d\t%s\t%s%s" \
-                            % (rptExtent,prefixLen,suffixLen,kmer,d,reportName,seq,quals)
-                    else:
-                        #pre_e = pre_s + prefixLen - 1
-                        refPoint = pre_s
-                        donorPoint = 0
-
-                        donorBeforeStart = prefixLen - 1 #pre_e
-                        donorMicroStart = prefixLen     #tr_s
-                        donorMicroEnd = donorMicroStart + rptExtent - 1 #tr_e
-                        donorAfterMicro = donorMicroEnd + 1 #suf_s
-                        donorEnd = len(seq) - 1    #suf_e
-
-                        set_pre_e = False
-                        set_tr_s = False
-                        set_tr_e = False
-                        set_suf_s = False
-                        set_suf_e = False
-
-                        pre_e = 0
-                        tr_s = 0
-                        tr_e = 0
-                        suf_s = 0
-                        suf_e = 0
-
-                        matchList = re.findall('(\d+)([IDM])', cigar)
-                        unCognitiveCigar = False
-                        for matchN, matchType in matchList:
-                            matchNum = int(matchN)
-                            if matchType == "M":
-                                donorPoint = donorPoint + matchNum
-                                refPoint = refPoint + matchNum
-                            elif matchType == "D":
-                                refPoint = refPoint + matchNum
-                                continue
-                            elif matchType == "I":
-                                donorPoint = donorPoint + matchNum
-                            else:
-                                unCognitiveCigar = True
-                                break
-
-                            if not set_pre_e:
-                                if donorPoint >= donorBeforeStart:
-                                    pre_e = refPoint - (donorPoint - donorBeforeStart)
-                                    set_pre_e = True
-                                else:
-                                    continue
-
-                            if not set_tr_s:
-                                if donorPoint >= donorMicroStart:
-                                    tr_s = refPoint - (donorPoint - donorMicroStart)
-                                    set_tr_s = True
-                                else:
-                                    continue
-
-                            if not set_tr_e:
-                                if donorPoint >= donorMicroEnd:
-                                    tr_e = refPoint - (donorPoint - donorMicroEnd)
-                                    set_tr_e = True
-                                else:
-                                    continue
-
-                            if not set_suf_s:
-                                if donorPoint >= donorAfterMicro:
-                                    suf_s = refPoint - (donorPoint - donorAfterMicro)
-                                    set_suf_s = True
-                                else:
-                                    continue
-
-                            if not set_suf_e:
-                                if donorPoint >= donorEnd:
-                                    suf_e = refPoint - (donorPoint - donorEnd)
-                                    set_suf_e = True
-                                else:
-                                    continue
-
-                        if unCognitiveCigar:
-                            break
-                        tr_len = tr_e - tr_s + 1
-
-                        if refName not in refSequence:
-                            tr_ref_seq = "."
-                        else:
-                            if refSequence[refName] == "":
-                                tr_ref_seq = "."
-                            elif len(refSequence[refName]) <= tr_e:
-                                tr_ref_seq = "."
-                            else:
-                                tr_ref_seq = refSequence[refName][tr_s:tr_e+1]
-
-                        pre_e += 1
-                        tr_e += 1
-                        suf_e += 1
-                        print "%d\t%d\t%d\t%s\t%d\t%s\t%s%s\t%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s" \
-                            % (rptExtent,prefixLen,suffixLen,kmer,d,reportName,seq,quals,reportName,refName,pre_s,pre_e,tr_s,tr_e,suf_s,suf_e,tr_len,tr_ref_seq)
-
-    if (markEndOfFile):
-        print "# microsat_snoop end-of-file"
-
-    if (inputF != stdin):
-        inputF.close()
-
-# non_negative_intervals
-#     find intervals with exactly + and no -
-#     from string like this : +++++++++---+++++++++
-def non_negative_intervals(seq, minLength=None):
-
-    start = -1
-    end = -1
-    firstPlus = 1
-    #print seq
-    for ix in range(len(seq)): # for every char in seq
-        ch = seq[ix]
-        if(ch == "+"):
-            if(firstPlus):
-                firstPlus = 0
-                start = ix
-            else:
-                continue
-        elif(ch == "-"):
-            if(start >= 0):
-                end = ix-1
-                if((end - start + 1) >= minLength):
-                    yield (start,end+1)
-                start = -1
-                firstPlus = 1
-    if(start > 0):
-        if((ix - start + 1) >= minLength):
-            yield (start, ix+1)
-
-
-###################################################################
-# modified by Chen Sun on 7/11/2014
-# We do not want other modules, so parse these functions inside
-#
-###################################################################
-
-# parse a string of the form {positives}/{positives_and_neutrals}
-
-def parse_spec(s):
-    if ("/" not in s): raise ValueError
-    (n,d) = s.split("/",1)
-    if (not n.startswith("{")) or (not n.endswith("}")): raise ValueError
-    if (not d.startswith("{")) or (not d.endswith("}")): raise ValueError
-
-    positives = n[1:-1]
-    d         = d[1:-1]
-
-    for ch in positives:
-        if (ch not in d): raise ValueError
-
-    neutrals = [ch for ch in d if (ch not in positives)]
-    return (positives,neutrals)
-
-
-# convert a string to a number, allowing fractions
-
-def float_or_fraction(s):
-    if ("/" in s):
-        (numer,denom) = s.split("/",1)
-        return float(numer)/float(denom)
-    else:
-        return float(s)
-
-
-# dense_intervals--
-#    Find all non-overlapping runs with a good enough rate (of positives), and
-#    which meet our length threshold.
-#
-#    The algorithm used is adapted from Zhang, Berman, Miller, "Post-processing
-#    long pairwise alignments", Bioinformatics Vol. 15 no. 12 1999.
-#
-# $$$ we use the denominator as the threshold, but we really should use the
-# $$$ .. numerator, comparing it to minLength*rate
-
-def dense_intervals(seq,rate,positives,neutrals,blockers="",minLength=None):
-
-    if (blockers == None):
-        blockers = "".join([chr(n) for n in range(1,256)
-                                   if  (chr(n) not in positives)
-                                   and (chr(n) not in neutrals)])
-
-    stackLeft       = [None]    # stack with each entry containing five
-    stackRight      = [None]    # .. elements;  note that entry zero is not
-    stackLeftScore  = [None]    # .. used
-    stackRightScore = [None]
-    stackLower      = [None]
-    top   = 0
-    score = 0
-
-    for ix in range(len(seq)):
-        ch = seq[ix]
-        if (ch in blockers):
-            # emit intervals
-
-            for sp in range(1,top+1):
-                left  = stackLeft [sp] + 1
-                right = stackRight[sp]
-
-                while (left < right) and (seq[left]  not in positives): left  += 1
-                while (right > left) and (seq[right] not in positives): right -= 1
-
-                right += 1
-                if (minLength == None) or (right - left >= minLength):
-                    yield (left,right)
-
-            #empty stack
-
-            stackLeft       = [None]
-            stackRight      = [None]
-            stackLeftScore  = [None]
-            stackRightScore = [None]
-            stackLower      = [None]
-            top   = 0
-            score = 0
-            continue
-
-        if   (ch in positives): weight = 1-rate
-        elif (ch in neutrals):  weight = -rate
-        else: raise ValueError
-
-        score += weight
-        #if ("algorithm" in debug):
-        #    print >>sys.stderr, "%3d: %c %5.2f" % (ix, ch, score),
-
-        if (weight < 0):
-            #if ("algorithm" in debug):
-            #    print >>sys.stderr
-            continue
-
-        if (top > 0) and (stackRight[top] == ix-1):
-            # add this site to the interval on top of the stack
-
-            stackRight     [top] = ix
-            stackRightScore[top] = score
-
-            #if ("algorithm" in debug):
-            #    print >>sys.stderr, \
-            #          " extending [%d] %d-%d %4.1f %4.1f" \
-            #        % (top,
-            #           stackLeft     [top], stackRight     [top],
-            #           stackLeftScore[top], stackRightScore[top]),
-
-        else:
-            # create a one site interval
-
-            top += 1
-            if (top >= len(stackLeft)):
-                stackLeft       += [None]
-                stackRight      += [None]
-                stackLeftScore  += [None]
-                stackRightScore += [None]
-                stackLower      += [None]
-
-            stackLeft      [top] = ix - 1
-            stackLeftScore [top] = score - weight
-            stackRight     [top] = ix
-            stackRightScore[top] = score
-            stackLower     [top] = top - 1
-
-            while (stackLower[top] > 0) \
-              and (stackLeftScore[stackLower[top]] > stackLeftScore[top]):
-                stackLower[top] = stackLower[stackLower[top]]
-
-            #if ("algorithm" in debug):
-            #    print >>sys.stderr, \
-            #          " creating  [%d] %d-%d %4.1f %4.1f -> %d" \
-            #        % (top,
-            #           stackLeft     [top], stackRight     [top],
-            #           stackLeftScore[top], stackRightScore[top],
-            #           stackLower    [top]),
-
-        # merge intervals;  if there is a previous interval with a no-higher
-        # left score and no-higher right score, merge this interval (and all
-        # intervening ones) into that one
-
-        while (top > 1) \
-          and (stackLower[top] > 0) \
-          and (stackRightScore[stackLower[top]] <= stackRightScore[top]):
-            stackRight     [stackLower[top]] = stackRight     [top]
-            stackRightScore[stackLower[top]] = stackRightScore[top]
-            top = stackLower[top]
-
-            #if ("algorithm" in debug):
-            #    print >>sys.stderr, \
-            #          "\n%*s merging   [%d] %d-%d %4.1f %4.1f" \
-            #        % (13, "", top,
-            #           stackLeft[top],      stackRight     [top],
-            #           stackLeftScore[top], stackRightScore[top]),
-
-        #if ("algorithm" in debug):
-        #    print >>sys.stderr
-
-    # emit intervals
-
-    for sp in range(1,top+1):
-        left  = stackLeft [sp] + 1
-        right = stackRight[sp]
-
-        while (left < right) and (seq[left]  not in positives): left  += 1
-        while (right > left) and (seq[right] not in positives): right -= 1
-
-        right += 1
-        if (minLength == None) or (right - left >= minLength):
-            yield (left,right)
-
-
-###################################################################
-# modified by Chen Sun on 7/11/2014
-#
-###################################################################
-
-# correlation_sequence--
-#    Compute the correlation sequence for a given period.  This is a sequence
-#    of + and - indicating whether the base at a given position matches the one
-#    P positions earlier (where P is the period).  The first P positions are
-#    blank.  Positions with single character runs longer than the period are
-#    considered as non-matches, unless the period is 1.
-
-def correlation_sequence(sequence,period,start=None,end=None):
-    if (start == None): start = 0
-    if (end   == None): end   = len(sequence)
-
-    prevCh = sequence[start]
-    run    = 1
-    for ix in xrange(start+1,start+period):
-        ch = sequence[ix]
-        if (ch != prevCh): run =  1
-        else:              run += 1
-        prevCh = ch
-
-    corr = [" "] * period
-    for ix in xrange(start+period,end):
-        rptCh = sequence[ix-period]
-        ch    = sequence[ix]
-        if (ch != prevCh): run =  1
-        else:              run += 1
-        if    (ch    in "ACGT") \
-          and (ch == rptCh) \
-          and ((period == 1) or (run < period)):
-            corr += ["+"]
-        else:
-            corr += ["-"]
-        prevCh = ch
-
-    return "".join(corr)
-
-
-# longest_suitable_run--
-#    Find longest run with a good enough rate (of positives).
-#
-#    We score a "+" as 1-r and anything else as -r.  This is based on the fol-
-#    lowing derivation (p is the number of "+"s, n is the number of non-"+"s):
-#        p/(p+n) >= r
-#        ==> p >= rp + rn
-#        ==> (1-r)p - rn >= 0
-#
-#    We adapt an algorithm from "Programming Pearls", pg. 81 (2000 printing).
-#
-# $$$ we use the denominator as the threshold, but we really should use the
-# $$$ .. numerator, comparing it to minLength*rate
-#
-# $$$ this needs to account for $$$ this situation:
-# $$$   sequence: ACGACGACGACGTTATTATTATTA
-# $$$   matches:     +++++++++---+++++++++
-# $$$ this is currently considered to be one interval (if rate <= 6/7), but it
-# $$$ ought to be two;  we can't just post-process, though, because some other
-# $$$ interval might be longer than the longest half of this;  maybe what we
-# $$$ need to do is consider matches at distances -P and -2P, or if we match
-# $$$ -P but that itself was a mismatch, we should carry the mismatch forward
-
-def longest_suitable_run(seq,minLength,rate):
-    maxEndingHere = 0
-    maxSoFar      = 0
-    start         = None
-
-    for ix in xrange(len(seq)):
-        if (seq[ix] == "+"): s = 1-rate
-        else:                s = -rate
-
-        if (maxEndingHere+s < 0):
-            maxEndingHere = 0
-            block         = ix
-        else:
-            maxEndingHere += s
-            if (maxEndingHere >= maxSoFar):
-                maxSoFar = maxEndingHere
-                start    = block + 1
-                end      = ix + 1
-
-    if (start == None) or (end - start < minLength):
-        return []
-    else:
-        return [(start,end)]
-
-
-# all_suitable_runs--
-#    Find all non-overlapping runs with a good enough rate (of positives), and
-#    which meet our length threshold.
-# $$$ this needs to post-process the intervals, splitting them to account for
-# $$$ this situation:
-# $$$   sequence: ACGACGACGACGTTATTATTATTA
-# $$$   matches:     +++++++++---+++++++++
-# $$$ this is currently reported as one interval (if rate <= 6/7), but it
-# $$$ ought to be two
-
-def all_suitable_runs(seq,minCorrLength,rate, hammingThreshold):
-
-    ################################################################
-    # modified by Chen Sun on 07/11/2014
-    #
-    ################################################################
-
-    if hammingThreshold > 0:
-        return [(start,end) for (start,end) in dense_intervals(seq,rate,"+","-",blockers=None,minLength=minCorrLength)]
-    elif hammingThreshold == 0:
-        return [(start,end) for (start,end) in non_negative_intervals(seq, minLength=minCorrLength)]
-
-
-# find_repeat_element--
-#    Find the most plausible repeat element for a run, and nudge the ends of
-#    the run if needed.  Note that we will not consider kmers that represent
-#    shorter repeats.  For example, we won't report ACTACT as a 6-mer since we
-#    consider this to have a shorter period than 6.
-
-def find_repeat_element(hammingThreshold, period,seq,start,end,allowPartials=False):
-
-    if hammingThreshold > 0:
-        (kmer,bestD,bestStart,bestEnd) = find_hamming_repeat_element(period,seq,start,end,allowPartials)
-        return (kmer,bestD,bestStart,bestEnd)
-    # count the number of occurences of each k-mer;  note that we can't
-    # reject kmers containing smaller repeats yet, since for a sequence like
-    # ACACACACACAAACACACACACACACACAC we must first discover ACACAC as the best
-    # 6-mer, and THEN reject it;  if we reject ACACAC while counting, we'd end
-    # up reporting something like ACACAA as the best motif
-
-    if ("element" in debug):
-        print >>stderr, "find_repeat_element(%d,%d,%d)" % (period,start,end)
-
-    if ("partial" in debug):
-        print period, seq, start, end, allowPartials;
-        print seq[start:end]
-
-    kmerToCount = {}
-    kmerToFirst = {}
-    for ix in xrange(start,end-(period-1)):
-        kmer = seq[ix:ix+period]
-        if ("N" in kmer): continue
-        if (kmer not in kmerToCount):
-            kmerToCount[kmer] = 1
-            kmerToFirst[kmer] = ix
-        else:
-            kmerToCount[kmer] += 1
-        #if ("element" in debug):
-        #    print >>stderr, "    %d: %s" % (ix,kmer)
-
-    # choose the best k-mer;  this is simply the most frequently occurring one,
-    # with ties broken by whichever one came first
-
-    kmers = [(-kmerToCount[kmer],kmerToFirst[kmer],kmer) for kmer in kmerToCount]
-    if (kmers == []): return (None,None,start,end)
-    kmers.sort()
-
-    if ("element" in debug):
-        for (count,first,kmer) in kmers:
-            print >>stderr, "    %s: %d" % (kmer,-count)
-
-    (count,first,kmer) = kmers[0]
-    if (contains_repeat(kmer)): return (None,None,start,end)
-
-    # determine the hamming distance between the run and a simple repeat, for
-    # each "plausible" start and end;  we compute the distance for each such
-    # interval, and choose the one with the lowest hamming distance;  ties are
-    # broken in a deterministic-but-unspecified manner
-
-    bestD = bestStart = bestEnd = None
-    ###################################################################################
-    # modified by Chen Sun(cxs1031@cse.psu.edu) on 10/18/2013
-    #     since we do not allow hamming_distance > 0, which means we do not allow mutation,
-    # we do not need this section to produce bestStart and End
-    ###################################################################################
-
-    #for (s,e) in plausible_intervals(start,end,period,len(seq),allowPartials=allowPartials):
-    #    d = hamming_distance(seq,s,e,kmer)
-    #    if (d == None): continue
-    #    if (bestD == None) or (d <= bestD):
-    #        (bestD,bestStart,bestEnd) = (d,s,e)
-
-
-
-    bestStart = start
-
-    if(allowPartials):
-        bestEnd = end
-    elif(not allowPartials):
-        bestEnd = start
-        pattern = seq[start:start+period]
-        if ("partial" in debug):
-            print "kmer:", kmer
-            if(pattern != kmer):
-                print "pattern:", pattern
-
-        while(bestEnd <= end-period):
-            bestEnd += period
-
-    # bestD will always be 0, as we do not allow mutation
-    bestD = 0
-
-    if ("partial" in debug):
-        print bestD, bestStart, bestEnd
-
-    ###################################################################################
-    # modified by Chen Sun(cxs1031@cse.psu.edu) on 10/10
-    #
-    ###################################################################################
-    return (kmer,bestD,bestStart,bestEnd)
-
-
-def find_hamming_repeat_element(period,seq,start,end,allowPartials=False):
-
-    # count the number of occurences of each k-mer;  note that we can't
-    # reject kmers containing smaller repeats yet, since for a sequence like
-    # ACACACACACAAACACACACACACACACAC we must first discover ACACAC as the best
-    # 6-mer, and THEN reject it;  if we reject ACACAC while counting, we'd end
-    # up reporting something like ACACAA as the best motif
-
-    if ("element" in debug):
-        print >>stderr, "find_repeat_element(%d,%d,%d)" % (period,start,end)
-
-    kmerToCount = {}
-    kmerToFirst = {}
-    for ix in xrange(start,end-(period-1)):
-        kmer = seq[ix:ix+period]
-        if ("N" in kmer): continue
-        if (kmer not in kmerToCount):
-            kmerToCount[kmer] = 1
-            kmerToFirst[kmer] = ix
-        else:
-            kmerToCount[kmer] += 1
-        #if ("element" in debug):
-        #    print >>stderr, "    %d: %s" % (ix,kmer)
-
-    # choose the best k-mer;  this is simply the most frequently occurring one,
-    # with ties broken by whichever one came first
-
-    kmers = [(-kmerToCount[kmer],kmerToFirst[kmer],kmer) for kmer in kmerToCount]
-    if (kmers == []): return (None,None,start,end)
-    kmers.sort()
-
-    if ("element" in debug):
-        for (count,first,kmer) in kmers:
-            print >>stderr, "    %s: %d" % (kmer,-count)
-
-    (count,first,kmer) = kmers[0]
-    if (contains_repeat(kmer)): return (None,None,start,end)
-
-    # determine the hamming distance between the run and a simple repeat, for
-    # each "plausible" start and end;  we compute the distance for each such
-    # interval, and choose the one with the lowest hamming distance;  ties are
-    # broken in a deterministic-but-unspecified manner
-
-    bestD = bestStart = bestEnd = None
-
-    for (s,e) in plausible_intervals(start,end,period,len(seq),allowPartials=allowPartials):
-        d = hamming_distance(seq,s,e,kmer)
-        if (d == None): continue
-        if (bestD == None) or (d <= bestD):
-            (bestD,bestStart,bestEnd) = (d,s,e)
-
-    return (kmer,bestD,bestStart,bestEnd)
-
-# plausible_intervals--
-#    Yield all plausible intervals intersecting with a run.  We generate all
-#    starts within P bp of the run's start.  For each of these, we either (a) try
-#    all ends within P bp of run's end, or (b) trim the new interval to a whole
-#    multiple of the period, and report this short interval and the longer
-#    interval with one more period appended.  Case (a) allows partial motifs,
-#    while case (b) only allows whole motifs.
-
-def plausible_intervals(start,end,period,seqLen,allowPartials=False):
-
-    # generate intervals that allow a partial copy of the motif
-
-    if (allowPartials):
-        for candStart in xrange(start-(period-1),start+period):
-            if (candStart < 0): continue
-            for candEnd in xrange(end-(period-1),end+period):
-                if (candEnd > seqLen): continue
-                if (candEnd <= candStart+period): continue
-                yield (candStart,candEnd)
-
-    # -OR- generate intervals that allow only whole copies of the motif
-
-    else:
-        for candStart in xrange(start-(period-1),start+period):
-            if (candStart < 0): continue
-            candEnd = candStart + ((end-candStart)/period)*period
-            yield (candStart,candEnd)
-            candEnd += period
-            if (candEnd <= seqLen): yield (candStart,candEnd)
-
-
-# hamming_distance--
-#    Determine the hamming distance between the run and a simple repeat.
-# $$$ improve this by allowing gaps, and stopping when we reach a threshold
-
-kmerToDiffs = {}  # (this is used for memo-ization)
-
-def hamming_distance(seq,start,end,kmer):
-    period = len(kmer)
-    if (end < start + period): return None
-
-    wholeEnd = start + ((end-start)/period)*period
-
-    if (kmer not in kmerToDiffs):
-        kmerToDiffs[kmer] = { kmer:0 }
-
-    d = 0
-    for ix in xrange(start,wholeEnd,period):
-        qmer = seq[ix:ix+period]    # same size as the kmer motif
-        if (qmer in kmerToDiffs[kmer]):
-            d += kmerToDiffs[kmer][qmer]
-            continue
-        diffs = 0
-        for iy in xrange(0,period):
-            if (qmer[iy] != kmer[iy]): diffs += 1
-        kmerToDiffs[kmer][qmer] = diffs
-        d += diffs
-
-    if (end > wholeEnd):
-        qmer = seq[wholeEnd:end]    # shorter than the kmer motif
-        if (qmer in kmerToDiffs[kmer]):
-            d += kmerToDiffs[kmer][qmer]
-        else:
-            diffs = 0
-            for iy in xrange(0,len(qmer)):
-                if (qmer[iy] != kmer[iy]): diffs += 1
-            kmerToDiffs[kmer][qmer] = diffs
-            d += diffs
-
-    return d
-
-
-# fasta_sequences--
-#    Read the fasta sequences from a file.  Note that we convert to upper case,
-#    and convert any letter other than ACGT to N.
-
-nonDnaMap = maketrans("BDEFHIJKLMOPQRSUVWXYZ","NNNNNNNNNNNNNNNNNNNNN")
-
-def fasta_sequences(f):
-    seqName = None
-    seqNucs = None
-
-    for line in f:
-        line = line.strip()
-        if (line.startswith(">")):
-            if (seqName != None):
-                yield (seqName,"".join(seqNucs))
-            seqName = sequence_name(line)
-            seqNucs = []
-        elif (seqName == None):
-            assert (False), "first sequence has no header"
-        else:
-            seqNucs += [line]
-
-    if (seqName != None):
-        yield (seqName,"".join(seqNucs).upper().translate(nonDnaMap))
-
-
-# fastq_sequences--
-#    Read the fastq sequences from a file.  Note that we convert to upper case,
-#    and convert any letter other than ACGT to N.
-
-def fastq_sequences(f):
-    lineNum = 0
-    for line in f:
-        lineNum += 1
-        line = line.strip()
-
-        if (lineNum % 4 == 1):
-            assert (line.startswith("@")), \
-                   "bad read name at line %d" % lineNum
-            seqName = line[1:]
-            continue
-
-        if (lineNum % 4 == 2):
-            seqNucs = line
-            continue
-
-        if (lineNum % 4 == 3):
-            assert (line.startswith("+")), \
-                   "can't understand line %d:\n%s" % (lineNum,line)
-            continue
-
-        quals = line
-        assert (len(quals) == len(seqNucs)), \
-               "length mismatch read vs. qualities at line %d" % lineNum
-        yield (seqName,"".join(seqNucs).upper().translate(nonDnaMap),quals)
-
-    assert (lineNum % 4 == 0), \
-           "incomplete read at end of file"
-
-def sam_sequences(f):
-    lineNum = 0
-    for line in f:
-        lineNum += 1
-        line = line.strip()
-
-        if line.startswith("@"):
-            continue
-
-        columns = line.split("\t")
-        seqName = columns[0]
-        refName = columns[2]
-        pre_s = int(columns[3]) - 1
-        cigar = columns[5]
-        seqNucs = columns[9]
-
-        yield (seqName,"".join(seqNucs).upper().translate(nonDnaMap), refName, pre_s, cigar)
-
-# sequence_name--
-#    Extract the sequence name from a fasta header.
-#    $$$ this may need to be improved $$$
-
-def sequence_name(s):
-    s = s[1:].strip()
-    if (s == ""): return ""
-    else:         return s.split()[0]
-
-
-# nucleotide_runs--
-#    Yield (start,end) for all runs of valid nucleotides in a sequence.
-
-def nucleotide_runs(s):
-    runs  = []
-    start = None
-    for (ix,nuc) in enumerate(s):
-        if (nuc in "ACGT"):
-            if (start == None):
-                start = ix
-        else:
-            if (start != None):
-                yield (start,ix)
-                start = None
-
-    if (start != None): yield (start,len(s))
-
-
-# contains_repeat--
-#    Determine whether a short sequence contains a repeated element, such as a
-#    6-mer containing a repeated 2-mer (ACACAC) or 3-mer (ACTACT).  The repeat
-#    must cover the entire sequence, without mismatches.
-
-def contains_repeat(kmer):
-    kmerLength = len(kmer)
-    hasRepeat = False
-    rptLen = 1
-    while (not hasRepeat) and (2 * rptLen <= kmerLength):
-        if (kmerLength % rptLen != 0):
-            rptLen += 1
-            continue
-        isRepeat = True
-        for i in xrange(rptLen,kmerLength,rptLen):
-            if (kmer[i:i+rptLen] != kmer[:rptLen]):
-                isRepeat = False
-                break
-        if (isRepeat):
-            hasRepeat = True
-            break
-        rptLen += 1
-    return hasRepeat
-
-
-# hash108--
-#    Return a 108-bit hash "value" of a string
-
-def hash108(s):
-    m = md5_new()
-    m.update(s)
-    return m.hexdigest()[:27]
-
-
-# float_or_fraction--
-#    Convert a string to a number, allowing fractions
-
-def float_or_fraction(s):
-    if ("/" in s):
-        (numer,denom) = s.split("/",1)
-        return float(numer)/float(denom)
-    else:
-        return float(s)
-
-
-# int_with_unit--
-#    Parse a string as an integer, allowing unit suffixes
-
-def int_with_unit(s):
-    if (s.endswith("K")):
-        multiplier = 1000
-        s = s[:-1]
-    elif (s.endswith("M")):
-        multiplier = 1000 * 1000
-        s = s[:-1]
-    elif (s.endswith("G")):
-        multiplier = 1000 * 1000 * 1000
-        s = s[:-1]
-    else:
-        multiplier = 1
-
-    try:               return               int(s)   * multiplier
-    except ValueError: return int(math.ceil(float(s) * multiplier))
-
-
-if __name__ == "__main__": main()
-
--- a/test-data/microsatellite.xml	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,178 +0,0 @@
-<tool id="microsatellite" name="Microsatellite detection" version="1.0.0">
-	<description>for short read, reference, and mapped data</description>
-	<command interpreter="python2.7"> microsatellite.py
-	"${filePath}"
-	#if $inputFileSource.inputFileType == "fasta"
-		--fasta
-    #elif $inputFileSource.inputFileType == "fastq"
-		--fastq
-    #elif $inputFileSource.inputFileType == "fastq_noquals"
-		--fastq:noquals
-	#elif $inputFileSource.inputFileType == "sam"
-		--sam
-    #end if
-
-	#if $inputFileSource.inputFileType == "sam"
-		#if $inputFileSource.referenceFileSource.requireReference
-			--r --ref="${inputFileSource.referenceFileSource.referencePath}"
-		#end if
-    #end if
-
-	--period="${period}"
-
-	#if $partialmotifs == "true"
-		--partialmotifs
-    #end if
-
-	--minlength="${minlength}"
-
-
-	--prefix="${prefix}"
-	--suffix="${surfix}"
-
-	--hamming="${hammingThreshold}"
-
-	#if $multipleruns
-		--multipleruns
-        #end if
-
-	#if $flankSetting.noflankdisplay
-		--noflankdisplay
-	#else
-		--flankdisplay=${flankSetting.flankdisplay}
-	#end if
-	&gt; $stdout
-	</command>
-
-  <inputs>
-	<param name="filePath" label="Select input file" type="data"/>
-	<conditional name="inputFileSource">
-		<param name="inputFileType" type="select" label="Select input file type">
-			<option value="fasta">Fasta File</option>
-			<option value="fastq">Fastq File</option>
-			<option value="fastq_noquals">Fastq File without Quality Information</option>
-			<option value="sam">SAM File</option>
-		</param>
-		<when value="sam">
-		    <conditional name="referenceFileSource">
-				<param name="requireReference" label="Do you want to extract correspond microsatellites in reference for comparison?" type="boolean">
-				</param>
-				<when value="true">
-					<param name="referencePath" label="Select reference file" type="data"/>
-				</when>
-			</conditional>
-		</when>
-	</conditional>
-
-	<param name="period" label="Motif size of microsatellites of interest (e.g. Mononucleotide microsatellite =1) (must be less than 10)" type="integer" size="2" value="1"/>
-  <param name="partialmotifs" label="Consider microsatellites with a partial motif?" type="boolean" checked="True"/>
-	<param name="minlength" label="Minimal length (bp) of microsatellite sequence reported" type="integer" size="2" value="5"/>
-
-
-	<param name="prefix" label="Do not report candidate repeat intervals that have left flanking region less than (bp):" type="integer" size="4" value="20"/>
-	<param name="surfix" label="Do not report candidate repeat intervals that have left flanking region less than (bp):" type="integer" size="4" value="20"/>
-
-
-	<param name="hammingThreshold" label="Hamming threshold of microsatellite, If greater than 0,  interrupted microsatellites will also be reported" type="integer" size="2" value="0"/>
-	<param name="multipleruns" label="Consider all candidate intervals in a sequence. If not check, only the longest one will be considered" type="boolean" checked="True"> </param>
-	<conditional name="flankSetting">
-        	<param name="noflankdisplay" label="Show the entire flanking regions" type="boolean" checked="True"/>
-		<when value="false">
-			<param name="flankdisplay" label="Limit length (bp) of flanking regions shown" type="integer" size="4" value="5"/>
-		</when>
-	</conditional>
-
-  </inputs>
-  <outputs>
-    <data name="stdout" format="tabular"/>
-  </outputs>
-  <tests>
-    <!-- Test data with valid values -->
-    <test>
-      <param name="filePath" value="C_sample_fastq"/>
-	  <param name="period" value="1"/>
-      <param name="partialmotifs" value="true" />
-	  <param name="minlength" value="3" />
-	  <param name="prefix" value="5"/>
-	  <param name="surfix" value="5"/>
-	  <param name="hammingThreshold"  value="0"/>
-	  <param name="multipleruns" value="true"> </param>
-      <output name="microsatellite" file="C_sample_snoope"/>
-    </test>
-
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**What it does**
-
-We use different algorithms to detect microsatellites depend on hamming distance parameter.
-If hamming distance is set to zero, the program will only concern about uninterrupted microsatellites. The process works as follows.
-
-1) Scanning reads using sliding windows. For a given repeat period ‘k’ (e.g. k=2 for dinucleotide TRs), we compared consecutive k-mer window size sequences, with a step size of k. If a base at a given position matches one k positions earlier it was marked with a plus, if corresponding sites had different bases it was marked with a minus. The first k position is blank.
-
-2) Since we do not allow mutations in reported TR, consecutive “+” signal sequence means that a k-mer TR is present in this sample.
-
-3) Report k-mer TRs if the length is larger than a threshold provided by the user.
-
-If hamming distance is set to integer more than zero, the program will concern both uninterrupted and interrupted microsatellites. The process works as follows:
-
-(1) Identify intervals that are highly correlated with the interval shifted by ‘k’ (the repeat period).  These intervals are called "runs" or "candidates". The allowed level of correlation is 6/7. Depending on whether we want to look for more than one microsat, we either find the longest such run (simple algorithm) or many runs (more complicated algorithm). The following steps are then performed on each run.
-
-(2) Find the most likely repeat motif in the run.  This is done by counting all kmers (of length P) and choosing the most frequent.  If that kmer is itself covered by a sub-repeat we discard this run.  The idea is that we can ignore a 6-mer like ACGACG because we will find it when we are looking for 3-mers.
-
-(3) Once we identify the most likely repeat motif, we then modify the interval, adjusting start and end to find the interval that has the fewest mismatches vs. a sequence of the motif repeated (hamming distance).
-
-(4) At this point we have a valid microsat interval (in the eyes of the program). It is subjected to some filtering stages (hamming distance or too close to an end), and if it satisfies those conditions, it's reported to the user
-
-For more option, the script to run this program can be downloaded and run with python independently from Galaxy. There are more option for the script mode. Help page is build-in inside the script.
-
-**Citation**
-
-When you use this tool, please cite **Fungtammasan A, Ananda G, Hile SE, Su MS, Sun C, Harris R, Medvedev P, Eckert K, Makova KD. 2015. Accurate Typing of Short Tandem Repeats from Genome-wide Sequencing Data and its Applications, Genome Research**
-This tool is developed by Chen Sun (cxs1031@cse.psu.edu) and Bob Harris (rsharris@bx.psu.edu)
-
-**Input**
-
-- The input files can be fastq, fasta, fastq without quality score, and SAM format.
-
-**Output**
-
-For fastq, the output will contain the following columns:
-
-- Column 1 = length of microsatellites (bp)
-- Column 2 = length of left flanking regions (bp)
-- Column 3 = length of right flanking regions (bp)
-- Column 4 = repeat motif (bp)
-- Column 5 = hamming distance
-- Column 6 = read name
-- Column 7 = read sequence with soft masking of microsatellites
-- Column 8 = read quality (the same Phred score scale as input)
-
-For fasta, fastq without quality score and sam format, column 8 will be replaced with dot(.).
-
-If the users have mapped file (SAM) and would like to profile microsatellites from premapped data instead of using flank-based mapping approach, they can select SAM format input and specify that they want correspond microsatellites in reference for comparison. The output will be as follow:
-
-- Column 1 = length of microsatellites (bp)
-- Column 2 = length of left flanking regions (bp)
-- Column 3 = length of right flanking regions (bp)
-- Column 4 = repeat motif (bp)
-- Column 5 = hamming distance
-- Column 6 = read name
-- Column 7 = read sequence with soft masking of microsatellites
-- Column 8 = read quality (the same Phred score scale as input)
-- Column 9 = read name (The same as column 6)
-- Column 10 = chromosome
-- Column 11 = left flanking region start
-- Column 12 = left flanking region stop
-- Column 13 = microsatellite start as infer from pair-end
-- Column 14 = microsatellite stop as infer from pair-end
-- Column 15 = right flanking region start
-- Column 16 = right flanking region stop
-- Column 17 = microsatellite length in reference
-- Column 18 = microsatellite sequence in reference
-
-</help>
-</tool>
--- a/test-data/microsatpurity.py	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-import sys
-# remove all read that have impure microsat
-# check only one line at a time
-
-
-fd=open(sys.argv[1])
-lines=fd.xreadlines()
-##motifIx=int(sys.argv[2])
-period=int(sys.argv[2])
-tr_ref_seqIx=int(sys.argv[3])-1
-##output=(sys.argv[4])
-##fout=open(output,'w')
-for line in lines:
-    temp=line.strip().split('\t')
-    temp=filter(None,temp)
-    #motif=temp[motifIx]
-    tr_ref_seq=temp[tr_ref_seqIx]
-    ##period=len(motif)
-    cand_motif=tr_ref_seq[:period]
-    len_microsat=len(tr_ref_seq)
-    expand_microsat_cand=cand_motif*(len_microsat/period) + cand_motif[:(len_microsat%period)]
-    if tr_ref_seq == expand_microsat_cand:
-    	print line.strip()
-        ##print line.strip() >> fout
\ No newline at end of file
--- a/test-data/microsatpurity.xml	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-<tool id="microsatpurity" name="Select uninterrupted microsatellites" version="1.0.0">
-  <description> of a specific column</description>
-  <command interpreter="python">microsatpurity.py $input $period $column_n > $output </command>
-
-  <inputs>
-    <param name="input" type="data" label="Select input" />
-    <param name="period" type="integer" label="motif size" value="1"/>
-    <param name="column_n" type="integer" value="0" label="Select column that contains microsatellites of interest (0 = last column)" />
-  </inputs>
-  <outputs>
-    <data format="tabular" name="output" />
-
-  </outputs>
-  <tests>
-    <!-- Test data with valid values -->
-    <test>
-      <param name="input" value="microsatpurity_in.txt"/>
-      <param name="period" value="2"/>
-      <param name="column_n" value="0"/>
-      <output name="output" file="microsatpurity_out.txt"/>
-    </test>
-
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool is used to select only the uninterrupted microsatellites. Interrupted microsatellites (e.g. ATATATATAATATAT) or sequences of microsatellites with non-microsatellite parts (e.g. ATATATATATG) will be removed.
-
-For TRFM pipeline (profiling microsatellites in short read data), this tool can be used to avoid the cases that flanking bases were misread as microsatellite. Thus, the read profile will only reflect the variation of TR length from expansion/contraction.
-For example, suppose that the sequence around microsatellite is AGCGACGaaaaaaGCGATCA. If we observe read with sequence AGCGACGaaaaaaaaaaGCGATCA, we can indicate that this is microsatellite expansion. However, if we observe AGCGACGaaaaaaaCGATCA, this is more like a substitution of G to A. These incidents can be removed with this tool.
-You can use the tool **combine mapped flaked bases** to get the microsatellites in reference that correspond to sequence between mapped reads. If the user map these reads around the uninterrupted microsatelites in reference, the corresponding sequences between these pairs should be the uninterrupted microsatellites regardless of expansion/contraction of microsatellites in short read data. However, if the substitution of flanking base or if the fluorescent signal from the previous run make it look like substitution, the corresponding sequences in reference in between the pairs will not be uninterrupted microsatellites. Thus this tool can remove those cases and keep only microsatellite expansion/contraction.
-
-
-**Citation**
-
-When you use this tool, please cite **Fungtammasan A, Ananda G, Hile SE, Su MS, Sun C, Harris R, Medvedev P, Eckert K, Makova KD. 2015. Accurate Typing of Short Tandem Repeats from Genome-wide Sequencing Data and its Applications, Genome Research**
-
-**Input**
-
-The input files can be any tab delimited file.
-
-If this tool is used in TRFM microsatellite profiling, it should contains:
-
-- Column 1 = microsatellite location in reference chromosome
-- Column 2 = microsatellite location in reference start
-- Column 3 = microsatellite location in reference stop
-- Column 4 = microsatellite location in reference motif
-- Column 5 = microsatellite location in reference length
-- Column 6 = microsatellite location in reference motif size
-- Column 7 = length of microsatellites (bp)
-- Column 8 = length of left flanking regions (bp)
-- Column 9 = length of right flanking regions (bp)
-- Column 10 = repeat motif (bp)
-- Column 11 = hamming distance
-- Column 12 = read name
-- Column 13 = read sequence with soft masking of microsatellites
-- Column 14 = read quality (the same Phred score scale as input)
-- Column 15 = read name (The same as column 12)
-- Column 16 = chromosome
-- Column 17 = left flanking region start
-- Column 18 = left flanking region stop
-- Column 19 = microsatellite start as infer from pair-end
-- Column 20 = microsatellite stop as infer from pair-end
-- Column 21 = right flanking region start
-- Column 22 = right flanking region stop
-- Column 23 = microsatellite length in reference
-- Column 24 = microsatellite sequence in reference
-
-**Output**
-
-The same as input format.
-
-
-</help>
-</tool>
--- a/test-data/pair_fetch_DNA_ff.py	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-#!/usr/bin/env python
-# pair_fetch_DNA_ff.py
-# Function: filter microsat and flanking region by quality score;
-# remove read with any base that has lower quality score than "quality_require" within "flanking_base" and convert from snoope to fastq
-# Note that require flanking length need to be screen by Bob snoope script first
-
-# Author: Arkarachai Fungtammasan
-# Version 1.0.0 (15 July 2012)
-# Input format: length_of_repeat[0] 	 left_flank_length[1]	right_flank_length[2]	repeat_motif[3]	hamming_distance[4]	read_name[5]	read_sequence[6]	read_quality[7]
-# Output format: two fastq file. First file contain left flank. Second file contain right flank.
-# Command: python pair_fetch_DNA_ff.py input.txt
-
-import sys
-from galaxy import eggs
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-# read file name
-
-
-
-filename=sys.argv[1]
-L_filename=sys.argv[2]
-R_filename=sys.argv[3]
-quality_require=sys.argv[4]
-flanking_base=sys.argv[5]
-try:
-	quality_require=int(quality_require)
-	flanking_base=int(flanking_base)
-except Exception, eee:
-	print eee
-	stop_err("Quality score cutoff and Length of flanking regions that require quality screening must be integer")
-
-fd=open(filename)
-fdd1=open(L_filename,'w')
-fdd2=open(R_filename,'w')
-lines=fd.xreadlines()
-for line in lines:
-    temp=line.strip().split('\t')
-    temp=filter(None,temp)
-    #get index
-    left_flank=(0,int(temp[1]))
-    microsat=(int(temp[1]),int(temp[1])+int(temp[0]))
-    right_flank=(int(temp[1])+int(temp[0]),int(temp[1])+int(temp[0])+int(temp[2]))
-    flag=0
-    #filter length of left and right flank
-    if (right_flank[1]-right_flank[0])<flanking_base:
-    	continue
-    if (left_flank[1]-left_flank[0])<flanking_base:
-    	continue
-    #filter quality score
-    for i in temp[7][microsat[0]-flanking_base:microsat[1]+flanking_base]:
-        if ord(i)<(quality_require+33):
-            flag=1
-        else:
-            flag=flag
-    #print out to seperated files
-    if flag ==0:
-        newname= temp[5]##+'_'+temp[3]+'_'+temp[0]
-        fdd1.writelines('@'+newname+'\n')
-        fdd2.writelines('@'+newname+'\n')
-        fdd1.writelines(temp[6][left_flank[0]:left_flank[1]]+'\n')
-        fdd2.writelines(temp[6][right_flank[0]:right_flank[1]]+'\n')
-        fdd1.writelines('+'+newname+'\n')
-        fdd2.writelines('+'+newname+'\n')
-        fdd1.writelines(temp[7][left_flank[0]:left_flank[1]]+'\n')
-        fdd2.writelines(temp[7][right_flank[0]:right_flank[1]]+'\n')
-
-fd.close()
-fdd1.close()
-fdd2.close()
-
-
--- a/test-data/probvalueforhetero.xml	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-<tool id="heteroprob" name="Evaluate the probability of the allele combination to generate read profile" version="2.0.0">
-  <description></description>
-  <command interpreter="python2.7">heteroprob.py  $microsat_raw $microsat_error_profile  $expectedminorallele > $microsat_corrected </command>
-
-  <inputs>
-    <param name="microsat_raw" type="data" label="Select microsatellite length profile and allele combination file" />
-    <param name="microsat_error_profile" type="data" label="Select microsatellite error profile that correspond to this dataset" />
-	<param name="expectedminorallele" type="float" value="0.5" label="Expected contribution of minor allele when present (0.5 for genotyping)" />
-
-  </inputs>
-  <outputs>
-    <data name="microsat_corrected" format="tabular" />
-  </outputs>
-  <tests>
-    <!-- Test data with valid values -->
-    <test>
-      <param name="microsat_raw" value="probvalueforhetero_in.txt"/>
-      <param name="microsat_error_profile" value="PCRinclude.allrate.bymajorallele"/>
-      <param name="expectedminorallele" value="0.5"/>
-      <output name="microsat_corrected" file="probvalueforhetero_out.txt"/>
-    </test>
-
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**What it does**
-
-- This tool will calculate the probability that the allele combination can generated the given read profile. This tool is part of the pipeline to estimate minimum read depth.
-- The calculation of probability is very similar to the tool **Correct genotype for microsatellite errors**. However, this tool will restrict the calculation to only the allele combination indicated in input. Also, when it encounter allele combination that cannot be generated from error profile, the total probability will be zero instead of using base substitution rate.
-
-**Citation**
-
-When you use this tool, please cite **Fungtammasan A, Ananda G, Hile SE, Su MS, Sun C, Harris R, Medvedev P, Eckert K, Makova KD. 2015. Accurate Typing of Short Tandem Repeats from Genome-wide Sequencing Data and its Applications, Genome Research**
-
-**Input**
-
-The input format is the same as output from **Correct genotype for microsatellite errors** tool.
-
-- Column 1 = location of microsatellite locus.
-- Column 2 = length profile (length of microsatellite in each read that mapped to this location in comma separated format).
-- Column 3 = motif of microsatellite in this locus. The input file can contain more than three column.
-- Column 4 = homozygous/heterozygous label.
-- Column 5 = log based 10 of (the probability of homozygous/the probability of heterozygous)
-- Column 6 = Allele for most probable homozygous form.
-- Column 7 = Allele 1 for most probable heterozygous form.
-- Column 8 = Allele 2 for most probable heterozygous form.
-
-Only column 2,3,7,8 were used in calculation.
-
-**Output**
-
-
-The output will be contain original eight column from the input. However, it will also add these following columns.
-- Column 9 = Probability of the allele combination to generate given read profile.
-- Column 10 = Number of possible rearrangement of given read profile.
-- Column 11 = Probability of the allele combination to generate read profile with any rearrangement (Product of column 9 and column 10)
-- Column 12 = Read depth
-
-
-
-
-</help>
-</tool>
--- a/test-data/profilegenerator.py	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-import collections
-import itertools
-import sys
-
-filename=sys.argv[1]
-MOTIF=sys.argv[2]
-MOTIFSIZE=len(MOTIF)
-MaxDEPTH=int(sys.argv[3])
-MINIMUMPROB=float(sys.argv[4])##1.0/(10**4)
-MININUMCOUNT=1
-fd=open(filename)
-lines=fd.readlines()
-countbymajorallele=collections.defaultdict(list)
-for line in lines:
-    temp=line.strip().split('\t')
-    t_major=int(temp[0])
-    t_count=int(temp[2])
-    countbymajorallele[t_major].append(t_count)
-fd.close()
-sumbymajorallele=collections.defaultdict(int)
-for t_majorallele in countbymajorallele.keys():
-    sumbymajorallele[t_majorallele]=sum(countbymajorallele[t_majorallele])
-
-fd=open(filename)
-##fd=open('PCRinclude.mono.A.bymajorallele')
-lines=fd.readlines()
-allmajor=collections.defaultdict(list)
-for line in lines:
-    temp=line.strip().split()
-    if int(temp[0])%MOTIFSIZE==0:
-        if (int(temp[2])/(sumbymajorallele[int(temp[0])]*1.0))>=MINIMUMPROB:
-            if int(temp[2])>=MININUMCOUNT:
-                allmajor[int(temp[0])].append(int(temp[1]))
-##print allmajor
-allkey=allmajor.keys()
-allkey.sort()
-#print allkey
-keycount=0
-combinelist_collection=[]
-for dummycount in range(len(allkey)-1):
-    pair1,pair2=allkey[keycount],allkey[keycount+1]
-    pair1list=allmajor[pair1]
-    pair2list=allmajor[pair2]
-    #print pair1list,pair2list
-    pair1list.extend(pair2list)
-    combinelist=list(set(pair1list))
-    combinelist.sort()
-    ##print combinelist
-    combinelist_collection.append(tuple(combinelist))
-    keycount+=1
-combinelist_collection=list(set(combinelist_collection))
-newcombinelist_collection=combinelist_collection[:]
-#combinelist_collection=set(combinelist_collection)
-for smallset1 in combinelist_collection:
-    for smallset2 in combinelist_collection:
-        if set(smallset1).issubset(set(smallset2)) and smallset1 != smallset2:
-            newcombinelist_collection.remove(smallset1)
-            break
-##print combinelist_collection
-
-for depth in range(2,MaxDEPTH+1):
-    for member_list in newcombinelist_collection:
-        for member in itertools.combinations_with_replacement(member_list,depth):
-            print 'chr'+'\t'+','.join(map(str,member))+'\t'+MOTIF
-
-
--- a/test-data/profilegenerator.xml	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,110 +0,0 @@
-<tool id="Profilegenerator" name="Generate all possible combination of read profile" version="2.0.0">
-  <description> of the consecutive allele from given error profile </description>
-  <command interpreter="python2.7">profilegenerator.py  $error_profile $MOTIF $Maxdepth $minprob > $output </command>
-
-  <inputs>
-    <param name="error_profile" type="data" label="Select error profile" />
-    <param name="MOTIF" type="text" value="A" label="Type in a motif of interest (e.g. AGC)" />
-	<param name="Maxdepth" type="integer" value="30" label="Maximum read depth of interest" />
-	<param name="minprob" type="float" value="0.00000001" label="Minimum error rate to be considered" />
-
-  </inputs>
-  <outputs>
-    <data name="output" format="tabular" />
-  </outputs>
-  <tests>
-    <!-- Test data with valid values -->
-    <test>
-      <param name="error_profile" value="sampleprofilegenerator_in"/>
-      <param name="MOTIF" value="A"/>
-      <param name="Maxdepth" value="3"/>
-      <param name="minprob" file="0.00000001"/>
-      <output name="output" file="sampleprofilegenerator_out"/>
-    </test>
-
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool will generate all possible combination of observed read profile of the consecutive alleles from given error profile. The range of observed read length can be filtered to contain only those that are frequently occur using "Minimum error rate to be considered" parameter.
-
-This problem will collect the lists of valid (pass "Minimum error rate to be considered" threshold) observed length profiles from combination of consecutive allele lengths. The lists that are equivalent or the subset of the other lists will be removed. For each depth and each list, length profile were generated from combination with replacement which compatible with python 2.7. There could be redundant error profiles generated from different lists if more than one combination of allele is generated due to overlap range of observed microsatellite lengths. The user need to remove them which can be done easily using **sort | uniq** command in unix.
-
-
-**Citation**
-
-When you use this tool, please cite **Fungtammasan A, Ananda G, Hile SE, Su MS, Sun C, Harris R, Medvedev P, Eckert K, Makova KD. 2015. Accurate Typing of Short Tandem Repeats from Genome-wide Sequencing Data and its Applications, Genome Research**
-
-**Input**
-
-- The error profile needs to contain these three columns.
-- Column 1 = Correct microsatellite length
-- Column 2 = Observed microsatellite length
-- Column 3 = Number of observation
-
-**Output**
-
-- Column 1 = Place holder for location of microsatellite locus. (just "chr")
-- Column 2 = length profile (length of microsatellite in each read that mapped to this location in comma separated format).
-- Column 3 = motif of microsatellite in this locus.
-
-**Example**
-
-- Suppose that we provide the following read profile ::
-
-	9	9	100000
-	10	10	91456
-	10	9	1259
-	11	11	39657
-	11	10	1211
-	11	12	514
-
-
-- Using default minimum probability to be consider and motif = A, all observed read lengths are valid. The program will generated lists of observed length profiles from consecutive allele length. ::
-
-	9:10 = [9,10]
-	10:11 = [9,10,11,12]
-
-- Lists that are subsets of other lists will be removed. Thus, [9,10] will not be considered.
-
-- Then the program will generate all combination with replacement for each depth from each list. Using **maximum read depth =3**, we will ge the following output. ::
-
-
-	chr	9,9	A
-	chr	9,10	A
-	chr	9,11	A
-	chr	9,12	A
-	chr	10,10	A
-	chr	10,11	A
-	chr	10,12	A
-	chr	11,11	A
-	chr	11,12	A
-	chr	12,12	A
-	chr	9,9,9	A
-	chr	9,9,10	A
-	chr	9,9,11	A
-	chr	9,9,12	A
-	chr	9,10,10	A
-	chr	9,10,11	A
-	chr	9,10,12	A
-	chr	9,11,11	A
-	chr	9,11,12	A
-	chr	9,12,12	A
-	chr	10,10,10	A
-	chr	10,10,11	A
-	chr	10,10,12	A
-	chr	10,11,11	A
-	chr	10,11,12	A
-	chr	10,12,12	A
-	chr	11,11,11	A
-	chr	11,11,12	A
-	chr	11,12,12	A
-	chr	12,12,12	A
-
-
-</help>
-</tool>
--- a/test-data/readdepth2sequencingdepth.xml	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-<tool id="readdepth2seqdepth" name="Convert informative read depth to sequencing depth" version="1.0.0">
-  <description>for flank-based mapping of microsatellites</description>
-  <command interpreter="python2.7">sequencingdepthconversion_G.py $repeatlength $flanksize $readlength $infodepth $probprediction > $output </command>
-
-  <inputs>
-    <param name="repeatlength" type="integer" value="10" label="Repeat length (bp)" />
-    <param name="flanksize" type="integer" value="20" label="Required flank bases on each side in mapping" />
-    <param name="readlength" type="integer" value="100" label="Read length (treat all read as single end read)" />
-    <param name="infodepth" type="integer" value="5" label="Required read depth" />
-    <param name="probprediction" type="float" value="0.9" label="Proportion of genome that need certain level of read depth" />
-  </inputs>
-  <outputs>
-    <data format="input" name="output" />
-
-  </outputs>
-  <tests>
-    <!-- Test data with valid values -->
-    <test>
-		<param name="repeatlength" value="10"/>
-    	<param name="flanksize" value="20" />
-    	<param name="readlength" value="100" />
-    	<param name="infodepth" value="5" />
-		<param name="probprediction"  value="0.9" />
-		<output name="output" file="readdepth2seqdepth.out"/>
-    </test>
-
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool is used to convert informative read depth (specified by user) to sequencing depth when the microsatellites is mapped using TRFM pipeline.
-The locus specific sequencing depth is the sequencing depth that will make a certain loci have certain read depth based on uniform mapped of read. It is calculated as: ::
-
-	yrequired = ( X * L ) / (L - (2F+r-1))
-
-Where X = read depth, L = read length, F = the number of flanked bases required on each flanking regions, r = the expected repeat length of microsatellite of interest.
-
-The genome wide sequencing depth is the sequencing depth that will make certain percentage of genome (e.g. 90 percent or 95 percent) to have certain locus specific sequencing depth. It's calculated using numerical guessing to find smallest lambda that: ::
-
-	 0.90 (or other proportion specified by user) &lt; = P(Y=0) + P(Y=1) + …+ P(Y=yrequired-1)
-
-	 P(Y=y) = (lambda^(y) * e ^(-lambda)) /y!
-
- y = specific level of sequencing depth. Lambda = genome wide sequencing depth
-
-
-**Citation**
-
-When you use this tool, please cite **Fungtammasan A, Ananda G, Hile SE, Su MS, Sun C, Harris R, Medvedev P, Eckert K, Makova KD. 2015. Accurate Typing of Short Tandem Repeats from Genome-wide Sequencing Data and its Applications, Genome Research**
-
-
-</help>
-</tool>
--- a/test-data/sequencingdepthconversion_G.py	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit()
-
-def info2require(X,L,F,r):
-    '''infodepth,readlength,flanksize,repeatlength
-    '''
-    return int(math.ceil((X*L*1.0)/(L-(1*((2*F)+r-1)))))
-
-def poissondef(meancov,specificcov):
-    nominator=1.0*(meancov**specificcov)*(math.e**(-1*meancov))
-    denominator=math.factorial(specificcov)
-    return nominator/denominator
-
-def require2recommend(needprob,mindepth):
-    i=mindepth
-    reverseneedprob=1-needprob
-    sumprob=1
-    while sumprob>reverseneedprob: #mean cov
-        sumprob=0
-        for j in range(0,mindepth): #specific cov
-            sumprob+=poissondef(i,j)
-        i+=1
-
-    return i-1
-
-import sys,math
-
-repeatlength=int(sys.argv[1])
-flanksize=int(sys.argv[2])#20
-readlength=int(sys.argv[3])#100
-infodepth=int(sys.argv[4])#5
-probdetection=float(sys.argv[5])#0.90
-
-if probdetection >1:
-    try:
-        probvalue=int('probvalue')
-    except Exception, eee:
-        print eee
-        stop_err("Proportion of genome to have certain locus specific must be between 0 and 1")
-
-print 'repeat_length'+'\t'+'read_length'+'\t'+'informative_read_depth''\t'+'=locus_specific_sequencing_depth'+'\t'+'=genome_wide_sequencing_depth'
-t_requiredepth=info2require(infodepth,readlength,flanksize,repeatlength)
-t_recomendseq=require2recommend(probdetection,t_requiredepth)
-preplotlist=[repeatlength,readlength,infodepth,t_requiredepth,t_recomendseq]
-plotlist=map(str,preplotlist)
-print '\t'.join(plotlist)
-
-#print info2require(infodepth,readlength,flanksize,repeatlength)
-#print poissondef(10,3)
-#print require2recommend(0.90,80)
-#informative_read_depth
-#required_seq_depth
-#recommend_seq_depth
\ No newline at end of file
--- a/test-data/space2underscore_readname.xml	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-<tool id="space2underscore_readname" name="Read name modifier" version="1.0.0">
-  <description>--change space to underscore of a specific column</description>
-  <command interpreter="python">changespacetounderscore_readname.py  $input $output $column_n </command>
-
-  <inputs>
-    <param name="input" type="data" label="Select input" />
-    <param name="column_n" type="integer" value="6" label="Select column to modify" />
-  </inputs>
-  <outputs>
-    <data format="tabular" name="output" />
-
-  </outputs>
-  <tests>
-    <!-- Test data with valid values -->
-    <test>
-      <param name="input" value="samplefq.snoope"/>
-      <param name="column_n" value="6"/>
-      <output name="output" file="samplefq.snoope.new"/>
-    </test>
-
-  </tests>
-  <help>
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool is used to change space to underscore. For TRFM pipeline (profiling microsatellites in short read data), this tool is used to change space in read name to underscore to prevent the downstream tools which might recognize incorrect column number due to space in read name. If the input do not have space in read name, this step can be skipped.
-
-**Citation**
-
-When you use this tool, please cite **Fungtammasan A, Ananda G, Hile SE, Su MS, Sun C, Harris R, Medvedev P, Eckert K, Makova KD. 2015. Accurate Typing of Short Tandem Repeats from Genome-wide Sequencing Data and its Applications, Genome Research**
-
-**Input**
-
-The input files can be any tab delimited file.
-
-If this tool is used in TRFM microsatellite profiling, it should be in the same format as output from **microsatellite detection program**. This format contains **length of repeat**, **length of left flanking region**, **length of right flanking region**, **repeat motif**, **hamming (editing) distance**, **read name**, **read sequence**, **read quality score**
-
-**Output**
-
-The same as input format.
-
-
-</help>
-</tool>
Binary file test-data/test-data/.DS_Store has changed
--- a/test-data/test-data/C_sample_fastq	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-@IL2_40_2_1_735_755
-ATTTTCCAGCACCGTCATGTGGTTCCAGAGGTTAAAGTGCTGAAATAACAT
-+
-IIIIIIIIIIIIIIIIIIIIIIII4IIIIIIIII5IIDI)'7%*8%%%%5*
-@IL2_40_2_1_919_700
-ATAAGGAAAAAAAAAAAAAAAACCAGGTCTTTTTTTTTTTTTTTTTGTTAT
-+
-IIIIIIIIIIIIIIIIIIIIII@IIII2III4-II47I?CII>-%:C-;$&
--- a/test-data/test-data/C_sample_snoope	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-3	33	15	A	0	IL2_40_2_1_735_755_1_per1_2	ATTTTCCAGCACCGTCATGTGGTTCCAGAGGTTaaaGTGCTGAAATAACAT	IIIIIIIIIIIIIIIIIIIIIIII4IIIIIIIII5IIDI)'7%*8%%%%5*
-3	42	6	A	0	IL2_40_2_1_735_755_1_per1_3	ATTTTCCAGCACCGTCATGTGGTTCCAGAGGTTAAAGTGCTGaaaTAACAT		IIIIIIIIIIIIIIIIIIIIIIII4IIIIIIIII5IIDI)'7%*8%%%%5*
-16	6	29	A	0	IL2_40_2_1_919_700_1_per1_1	ATAAGGaaaaaaaaaaaaaaaaCCAGGTCTTTTTTTTTTTTTTTTTGTTAT	IIIIIIIIIIIIIIIIIIIIII@IIII2III4-II47I?CII>-%:C-;$&
-17	29	5	T	0	IL2_40_2_1_919_700_1_per1_2	ATAAGGAAAAAAAAAAAAAAAACCAGGTCtttttttttttttttttGTTAT		IIIIIIIIIIIIIIIIIIIIII@IIII2III4-II47I?CII>-%:C-;$&
--- a/test-data/test-data/PCRinclude.allrate.bymajorallele	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,997 +0,0 @@
-10	10	91456	A
-10	9	1259	A
-10	11	605	A
-10	8	16	A
-10	12	8	A
-10	7	2	A
-11	11	39657	A
-11	10	1211	A
-11	12	514	A
-11	9	54	A
-11	13	9	A
-11	8	3	A
-11	14	1	A
-12	12	18850	A
-12	11	986	A
-12	13	417	A
-12	10	73	A
-12	14	8	A
-12	9	1	A
-12	8	1	A
-13	13	10201	A
-13	12	885	A
-13	14	320	A
-13	11	83	A
-13	15	12	A
-13	10	8	A
-14	14	3649	A
-14	13	409	A
-14	15	151	A
-14	12	62	A
-14	11	6	A
-14	16	5	A
-14	10	1	A
-15	15	847	A
-15	14	140	A
-15	16	60	A
-15	13	20	A
-15	17	4	A
-15	12	3	A
-16	16	182	A
-16	15	60	A
-16	17	14	A
-16	14	12	A
-16	13	1	A
-16	12	1	A
-16	18	1	A
-17	17	11	A
-17	16	5	A
-17	15	2	A
-17	18	1	A
-18	18	4	A
-18	17	2	A
-5	5	10047169	A
-5	6	44	A
-6	6	2808071	A
-6	5	195	A
-6	7	69	A
-7	7	1097174	A
-7	6	313	A
-7	8	83	A
-7	5	6	A
-8	8	369496	A
-8	7	387	A
-8	9	248	A
-8	6	3	A
-8	10	2	A
-9	9	184958	A
-9	8	707	A
-9	10	486	A
-9	7	5	A
-9	11	4	A
-10	10	46	C
-10	9	3	C
-5	5	1354993	C
-5	6	7	C
-6	6	193431	C
-6	5	14	C
-6	7	2	C
-7	7	22171	C
-7	6	4	C
-8	8	2966	C
-8	9	3	C
-8	7	3	C
-9	9	638	C
-9	8	8	C
-9	7	1	C
-10	10	21211	AC
-10	8	3	AC
-10	12	1	AC
-11	11	15048	AC
-11	9	10	AC
-12	12	6043	AC
-12	10	15	AC
-12	14	1	AC
-13	13	5070	AC
-13	11	40	AC
-13	15	1	AC
-14	14	3093	AC
-14	12	44	AC
-14	10	1	AC
-15	15	2848	AC
-15	13	31	AC
-15	17	1	AC
-16	16	1273	AC
-16	14	30	AC
-16	12	2	AC
-17	17	1297	AC
-17	15	27	AC
-18	18	1269	AC
-18	16	43	AC
-18	20	2	AC
-18	14	1	AC
-19	19	679	AC
-19	17	17	AC
-19	21	1	AC
-20	20	645	AC
-20	18	34	AC
-20	22	2	AC
-20	16	1	AC
-21	21	723	AC
-21	19	28	AC
-21	17	1	AC
-21	23	1	AC
-22	22	499	AC
-22	20	29	AC
-22	18	3	AC
-23	23	540	AC
-23	21	30	AC
-23	19	2	AC
-23	25	1	AC
-24	24	385	AC
-24	22	38	AC
-24	26	2	AC
-24	20	1	AC
-25	25	407	AC
-25	23	22	AC
-25	27	2	AC
-25	21	1	AC
-26	26	257	AC
-26	24	30	AC
-26	22	3	AC
-26	28	1	AC
-26	20	1	AC
-27	27	339	AC
-27	25	28	AC
-27	23	3	AC
-27	29	2	AC
-28	28	202	AC
-28	26	17	AC
-28	30	6	AC
-29	29	277	AC
-29	27	29	AC
-29	31	6	AC
-29	25	3	AC
-30	30	117	AC
-30	28	12	AC
-30	32	3	AC
-30	18	1	AC
-31	31	144	AC
-31	29	18	AC
-31	27	4	AC
-31	33	2	AC
-32	32	101	AC
-32	30	23	AC
-32	28	2	AC
-32	34	2	AC
-32	26	1	AC
-33	33	106	AC
-33	31	15	AC
-33	35	3	AC
-33	29	1	AC
-34	34	33	AC
-34	32	7	AC
-35	35	21	AC
-35	33	4	AC
-35	31	1	AC
-36	36	12	AC
-36	34	1	AC
-37	37	10	AC
-37	35	3	AC
-37	31	1	AC
-37	39	1	AC
-38	38	4	AC
-38	36	1	AC
-6	6	1521439	AC
-7	7	513952	AC
-8	8	134603	AC
-8	6	2	AC
-9	9	60741	AC
-9	7	3	AC
-9	11	1	AC
-10	10	21772	AG
-10	8	3	AG
-10	12	1	AG
-11	11	13880	AG
-11	9	10	AG
-11	13	1	AG
-12	12	5628	AG
-12	10	13	AG
-12	14	4	AG
-13	13	4494	AG
-13	11	17	AG
-14	14	1898	AG
-14	12	15	AG
-15	15	2427	AG
-15	13	18	AG
-16	16	1076	AG
-16	14	24	AG
-16	12	1	AG
-17	17	874	AG
-17	15	12	AG
-17	19	1	AG
-17	13	1	AG
-18	18	536	AG
-18	16	20	AG
-18	14	1	AG
-19	19	563	AG
-19	17	25	AG
-20	20	201	AG
-20	18	14	AG
-21	21	260	AG
-21	19	10	AG
-22	22	83	AG
-22	20	5	AG
-23	23	147	AG
-23	21	5	AG
-23	25	1	AG
-24	24	99	AG
-24	22	4	AG
-24	18	1	AG
-25	25	62	AG
-25	23	3	AG
-25	27	1	AG
-26	26	38	AG
-26	24	8	AG
-27	27	24	AG
-27	25	3	AG
-27	23	1	AG
-28	28	14	AG
-28	26	2	AG
-29	29	12	AG
-29	27	5	AG
-29	31	1	AG
-30	30	7	AG
-30	28	2	AG
-31	31	7	AG
-31	27	3	AG
-31	23	1	AG
-32	32	4	AG
-32	28	1	AG
-6	6	1880822	AG
-7	7	684837	AG
-7	9	1	AG
-8	8	183381	AG
-9	9	75547	AG
-9	7	6	AG
-9	11	1	AG
-10	10	18179	AT
-10	8	7	AT
-10	12	4	AT
-11	11	8969	AT
-11	9	5	AT
-11	13	2	AT
-12	12	4888	AT
-12	10	8	AT
-12	14	2	AT
-13	13	2785	AT
-13	11	17	AT
-13	15	1	AT
-14	14	2310	AT
-14	12	40	AT
-14	16	4	AT
-14	10	2	AT
-15	15	1461	AT
-15	13	33	AT
-15	11	1	AT
-15	17	1	AT
-16	16	879	AT
-16	14	42	AT
-16	18	2	AT
-16	12	1	AT
-17	17	599	AT
-17	15	38	AT
-17	19	2	AT
-17	13	1	AT
-18	18	367	AT
-18	16	29	AT
-18	20	7	AT
-18	14	1	AT
-19	19	223	AT
-19	17	34	AT
-19	21	3	AT
-20	20	97	AT
-20	18	14	AT
-20	16	2	AT
-20	22	1	AT
-21	21	60	AT
-21	19	18	AT
-21	17	1	AT
-22	22	53	AT
-22	20	15	AT
-22	24	5	AT
-22	18	3	AT
-23	23	11	AT
-23	21	1	AT
-24	24	7	AT
-24	20	2	AT
-24	22	2	AT
-6	6	1671932	AT
-6	8	1	AT
-7	7	595145	AT
-8	8	195533	AT
-8	10	5	AT
-8	6	2	AT
-9	9	52576	AT
-9	7	3	AT
-10	10	17	CG
-11	11	17	CG
-12	12	6	CG
-6	6	4097	CG
-7	7	678	CG
-8	8	184	CG
-9	9	19	CG
-10	10	19552	AAC
-11	11	19003	AAC
-12	12	6245	AAC
-12	9	1	AAC
-13	13	3406	AAC
-14	14	8448	AAC
-14	11	2	AAC
-15	15	2356	AAC
-15	12	6	AAC
-16	16	1373	AAC
-16	13	4	AAC
-17	17	3140	AAC
-17	14	5	AAC
-18	18	944	AAC
-18	15	2	AAC
-19	19	456	AAC
-19	16	1	AAC
-20	20	1474	AAC
-20	17	3	AAC
-21	21	328	AAC
-21	18	1	AAC
-22	22	178	AAC
-23	23	538	AAC
-23	26	1	AAC
-24	24	112	AAC
-25	25	60	AAC
-26	26	239	AAC
-26	23	1	AAC
-27	27	45	AAC
-28	28	58	AAC
-28	25	2	AAC
-29	29	77	AAC
-30	30	17	AAC
-31	31	38	AAC
-31	28	1	AAC
-32	32	94	AAC
-32	29	3	AAC
-33	33	15	AAC
-35	35	55	AAC
-35	32	1	AAC
-38	38	12	AAC
-41	41	6	AAC
-9	9	57212	AAC
-10	10	31455	AAG
-11	11	11876	AAG
-12	12	3458	AAG
-12	9	6	AAG
-13	13	1141	AAG
-14	14	928	AAG
-15	15	548	AAG
-15	12	4	AAG
-16	16	189	AAG
-17	17	235	AAG
-18	18	63	AAG
-19	19	66	AAG
-20	20	122	AAG
-22	22	11	AAG
-23	23	33	AAG
-9	9	104524	AAG
-10	10	69106	AAT
-11	11	30381	AAT
-12	12	12001	AAT
-12	9	1	AAT
-13	13	7168	AAT
-13	10	2	AAT
-14	14	5470	AAT
-14	11	3	AAT
-15	15	2524	AAT
-15	12	3	AAT
-16	16	1733	AAT
-16	13	1	AAT
-17	17	1324	AAT
-17	14	3	AAT
-18	18	1022	AAT
-18	15	3	AAT
-19	19	502	AAT
-19	16	3	AAT
-20	20	570	AAT
-20	17	2	AAT
-21	21	370	AAT
-21	18	1	AAT
-22	22	98	AAT
-23	23	164	AAT
-23	20	3	AAT
-24	24	143	AAT
-24	21	1	AAT
-25	25	122	AAT
-25	22	1	AAT
-26	26	45	AAT
-26	23	2	AAT
-27	27	32	AAT
-27	24	1	AAT
-28	28	6	AAT
-29	29	64	AAT
-29	26	1	AAT
-30	30	28	AAT
-30	24	1	AAT
-31	31	9	AAT
-32	32	9	AAT
-32	29	1	AAT
-38	38	6	AAT
-9	9	179182	AAT
-9	12	1	AAT
-10	10	14290	ACC
-11	11	5692	ACC
-12	12	1795	ACC
-13	13	1141	ACC
-14	14	545	ACC
-15	15	308	ACC
-16	16	162	ACC
-17	17	107	ACC
-18	18	23	ACC
-19	19	35	ACC
-20	20	44	ACC
-21	21	5	ACC
-22	22	5	ACC
-22	19	1	ACC
-23	23	11	ACC
-25	25	7	ACC
-26	26	7	ACC
-27	27	10	ACC
-28	28	24	ACC
-28	25	1	ACC
-35	35	5	ACC
-9	9	46614	ACC
-10	10	2865	ACG
-11	11	900	ACG
-12	12	325	ACG
-13	13	82	ACG
-14	14	83	ACG
-9	9	9465	ACG
-10	10	6269	ACT
-11	11	2284	ACT
-12	12	634	ACT
-13	13	441	ACT
-14	14	295	ACT
-15	15	118	ACT
-16	16	60	ACT
-17	17	71	ACT
-18	18	58	ACT
-19	19	42	ACT
-20	20	24	ACT
-24	24	5	ACT
-37	37	8	ACT
-41	41	5	ACT
-41	35	1	ACT
-9	9	20025	ACT
-10	10	2897	AGC
-11	11	948	AGC
-12	12	320	AGC
-13	13	97	AGC
-14	14	87	AGC
-15	15	13	AGC
-16	16	9	AGC
-17	17	25	AGC
-17	14	1	AGC
-9	9	9579	AGC
-10	10	21141	AGG
-11	11	8128	AGG
-12	12	2964	AGG
-13	13	1209	AGG
-14	14	860	AGG
-15	15	320	AGG
-16	16	190	AGG
-17	17	225	AGG
-18	18	147	AGG
-20	20	80	AGG
-21	21	9	AGG
-22	22	35	AGG
-23	23	27	AGG
-24	24	8	AGG
-26	26	9	AGG
-9	9	57350	AGG
-10	10	5964	ATC
-11	11	2346	ATC
-12	12	789	ATC
-13	13	386	ATC
-14	14	285	ATC
-15	15	165	ATC
-16	16	93	ATC
-17	17	149	ATC
-18	18	51	ATC
-19	19	6	ATC
-20	20	15	ATC
-21	21	15	ATC
-22	22	29	ATC
-23	23	25	ATC
-24	24	24	ATC
-26	26	34	ATC
-27	27	9	ATC
-28	28	30	ATC
-29	29	8	ATC
-30	30	8	ATC
-31	31	11	ATC
-34	34	11	ATC
-34	31	1	ATC
-36	36	5	ATC
-9	9	19837	ATC
-10	10	11	CCG
-11	11	24	CCG
-14	14	5	CCG
-16	16	5	CCG
-9	9	135	CCG
-12	12	10192	AAAC
-13	13	4917	AAAC
-14	14	4704	AAAC
-15	15	12713	AAAC
-16	16	2415	AAAC
-17	17	1431	AAAC
-18	18	1861	AAAC
-18	14	2	AAAC
-19	19	5254	AAAC
-19	15	2	AAAC
-19	23	1	AAAC
-20	20	913	AAAC
-20	16	1	AAAC
-21	21	615	AAAC
-22	22	509	AAAC
-22	18	2	AAAC
-23	23	2249	AAAC
-23	19	5	AAAC
-23	15	1	AAAC
-24	24	329	AAAC
-24	20	2	AAAC
-25	25	230	AAAC
-25	21	1	AAAC
-26	26	175	AAAC
-27	27	548	AAAC
-27	23	2	AAAC
-28	28	195	AAAC
-28	24	1	AAAC
-29	29	62	AAAC
-30	30	67	AAAC
-31	31	165	AAAC
-31	27	1	AAAC
-32	32	64	AAAC
-33	33	63	AAAC
-34	34	21	AAAC
-35	35	40	AAAC
-36	36	55	AAAC
-37	37	6	AAAC
-38	38	8	AAAC
-39	39	10	AAAC
-40	40	7	AAAC
-45	45	7	AAAC
-12	12	12855	AAAG
-12	16	13	AAAG
-12	20	9	AAAG
-12	18	2	AAAG
-13	13	6727	AAAG
-14	14	3699	AAAG
-14	13	8	AAAG
-15	15	3858	AAAG
-15	17	6	AAAG
-15	13	1	AAAG
-16	16	1244	AAAG
-17	17	750	AAAG
-17	13	1	AAAG
-18	18	380	AAAG
-18	20	5	AAAG
-18	14	1	AAAG
-19	19	1164	AAAG
-19	15	1	AAAG
-20	20	153	AAAG
-21	21	186	AAAG
-22	22	115	AAAG
-23	23	321	AAAG
-23	19	1	AAAG
-24	24	82	AAAG
-25	25	89	AAAG
-26	26	26	AAAG
-26	13	3	AAAG
-27	27	64	AAAG
-28	28	36	AAAG
-29	29	32	AAAG
-31	31	31	AAAG
-33	33	19	AAAG
-35	35	10	AAAG
-36	36	11	AAAG
-38	38	16	AAAG
-41	41	5	AAAG
-12	12	23143	AAAT
-13	13	10045	AAAT
-14	14	6815	AAAT
-15	15	8439	AAAT
-16	16	3102	AAAT
-16	12	2	AAAT
-17	17	2018	AAAT
-17	13	2	AAAT
-18	18	2044	AAAT
-19	19	2955	AAAT
-19	15	1	AAAT
-19	14	1	AAAT
-20	20	909	AAAT
-21	21	711	AAAT
-21	17	2	AAAT
-22	22	500	AAAT
-22	18	2	AAAT
-23	23	993	AAAT
-23	19	3	AAAT
-24	24	382	AAAT
-24	20	3	AAAT
-25	25	190	AAAT
-26	26	185	AAAT
-26	22	1	AAAT
-27	27	281	AAAT
-27	23	2	AAAT
-28	28	165	AAAT
-28	24	2	AAAT
-29	29	48	AAAT
-30	30	46	AAAT
-31	31	101	AAAT
-32	32	28	AAAT
-33	33	19	AAAT
-34	34	24	AAAT
-34	30	1	AAAT
-35	35	41	AAAT
-35	31	2	AAAT
-36	36	16	AAAT
-37	37	6	AAAT
-38	38	5	AAAT
-39	39	20	AAAT
-39	35	1	AAAT
-40	40	5	AAAT
-41	41	10	AAAT
-42	42	6	AAAT
-45	45	6	AAAT
-12	12	1468	AACC
-13	13	590	AACC
-14	14	318	AACC
-15	15	163	AACC
-16	16	102	AACC
-17	17	106	AACC
-18	18	18	AACC
-19	19	34	AACC
-20	20	7	AACC
-22	22	7	AACC
-23	23	13	AACC
-24	24	16	AACC
-25	25	9	AACC
-31	31	9	AACC
-12	12	214	AACG
-13	13	135	AACG
-14	14	39	AACG
-15	15	45	AACG
-12	12	522	AACT
-13	13	142	AACT
-14	14	143	AACT
-15	15	88	AACT
-16	16	16	AACT
-17	17	51	AACT
-18	18	7	AACT
-20	20	21	AACT
-21	21	27	AACT
-23	23	7	AACT
-24	24	11	AACT
-30	30	5	AACT
-12	12	346	AAGC
-13	13	83	AAGC
-14	14	60	AAGC
-15	15	40	AAGC
-16	16	21	AAGC
-18	18	9	AAGC
-19	19	7	AAGC
-12	12	4943	AAGG
-13	13	2714	AAGG
-14	14	1385	AAGG
-14	15	3	AAGG
-15	15	949	AAGG
-16	16	612	AAGG
-16	14	4	AAGG
-17	17	331	AAGG
-18	18	362	AAGG
-19	19	204	AAGG
-20	20	138	AAGG
-21	21	149	AAGG
-22	22	68	AAGG
-23	23	49	AAGG
-24	24	27	AAGG
-25	25	44	AAGG
-26	26	8	AAGG
-27	27	14	AAGG
-28	28	14	AAGG
-29	29	14	AAGG
-30	30	12	AAGG
-31	31	23	AAGG
-34	34	11	AAGG
-43	43	6	AAGG
-12	12	2676	AAGT
-13	13	1438	AAGT
-14	14	940	AAGT
-15	15	649	AAGT
-16	16	305	AAGT
-17	17	291	AAGT
-18	18	181	AAGT
-19	19	55	AAGT
-20	20	73	AAGT
-21	21	8	AAGT
-22	22	43	AAGT
-22	26	1	AAGT
-23	23	32	AAGT
-23	19	1	AAGT
-24	24	18	AAGT
-25	25	19	AAGT
-26	26	8	AAGT
-27	27	12	AAGT
-29	29	18	AAGT
-30	30	12	AAGT
-31	31	12	AAGT
-32	32	11	AAGT
-33	33	35	AAGT
-34	34	9	AAGT
-35	35	6	AAGT
-12	12	594	AATC
-13	13	205	AATC
-14	14	88	AATC
-15	15	112	AATC
-16	16	20	AATC
-17	17	81	AATC
-18	18	23	AATC
-21	21	13	AATC
-22	22	8	AATC
-24	24	19	AATC
-26	26	7	AATC
-28	28	9	AATC
-33	33	6	AATC
-12	12	2293	AATG
-13	13	1226	AATG
-14	14	678	AATG
-15	15	455	AATG
-16	16	222	AATG
-17	17	211	AATG
-18	18	104	AATG
-19	19	79	AATG
-20	20	40	AATG
-21	21	33	AATG
-22	22	73	AATG
-23	23	24	AATG
-24	24	16	AATG
-25	25	18	AATG
-26	26	15	AATG
-27	27	22	AATG
-27	23	1	AATG
-28	28	5	AATG
-32	32	17	AATG
-33	33	16	AATG
-12	12	2633	AATT
-13	13	1086	AATT
-14	14	1052	AATT
-15	15	386	AATT
-16	16	393	AATT
-17	17	98	AATT
-18	18	104	AATT
-19	19	105	AATT
-20	20	34	AATT
-21	21	12	AATT
-22	22	20	AATT
-25	25	18	AATT
-26	26	25	AATT
-27	27	7	AATT
-29	29	7	AATT
-35	35	12	AATT
-12	12	1406	ACAG
-13	13	964	ACAG
-14	14	300	ACAG
-15	15	130	ACAG
-16	16	102	ACAG
-17	17	49	ACAG
-18	18	30	ACAG
-19	19	88	ACAG
-20	20	5	ACAG
-23	23	5	ACAG
-12	12	4868	ACAT
-12	15	4	ACAT
-13	13	3216	ACAT
-14	14	957	ACAT
-15	15	1052	ACAT
-16	16	588	ACAT
-17	17	422	ACAT
-18	18	239	ACAT
-19	19	238	ACAT
-19	15	1	ACAT
-20	20	25	ACAT
-21	21	79	ACAT
-22	22	20	ACAT
-23	23	38	ACAT
-27	27	42	ACAT
-29	29	18	ACAT
-31	31	5	ACAT
-32	32	5	ACAT
-35	35	6	ACAT
-36	36	9	ACAT
-41	41	14	ACAT
-44	44	8	ACAT
-44	40	1	ACAT
-50	50	12	ACAT
-12	12	833	ACCC
-13	13	345	ACCC
-14	14	190	ACCC
-15	15	60	ACCC
-16	16	12	ACCC
-17	17	15	ACCC
-19	19	8	ACCG
-12	12	416	ACCT
-13	13	123	ACCT
-14	14	140	ACCT
-15	15	69	ACCT
-16	16	41	ACCT
-17	17	45	ACCT
-19	19	18	ACCT
-20	20	27	ACCT
-21	21	19	ACCT
-22	22	6	ACCT
-27	27	13	ACCT
-28	28	7	ACCT
-29	29	9	ACCT
-30	30	7	ACCT
-34	34	6	ACCT
-45	45	5	ACCT
-12	12	84	ACGC
-13	13	52	ACGC
-15	15	63	ACGC
-12	12	433	ACGG
-13	13	163	ACGG
-14	14	38	ACGG
-15	15	44	ACGG
-16	16	7	ACGG
-17	17	11	ACGG
-19	19	6	ACGG
-25	25	10	ACGG
-12	12	1119	ACGT
-13	13	509	ACGT
-14	14	338	ACGT
-15	15	16	ACGT
-16	16	66	ACGT
-17	17	7	ACGT
-19	19	27	ACGT
-12	12	2211	ACTC
-13	13	685	ACTC
-14	14	188	ACTC
-15	15	151	ACTC
-16	16	91	ACTC
-18	18	17	ACTC
-19	19	24	ACTC
-20	20	23	ACTC
-21	21	13	ACTC
-23	23	19	ACTC
-45	45	8	ACTC
-12	12	161	ACTG
-13	13	69	ACTG
-14	14	7	ACTG
-15	15	14	ACTG
-16	16	15	ACTG
-12	12	3118	AGAT
-13	13	1216	AGAT
-14	14	1084	AGAT
-15	15	869	AGAT
-16	16	508	AGAT
-17	17	322	AGAT
-18	18	159	AGAT
-19	19	258	AGAT
-20	20	63	AGAT
-21	21	84	AGAT
-22	22	69	AGAT
-22	14	6	AGAT
-23	23	112	AGAT
-24	24	107	AGAT
-25	25	36	AGAT
-26	26	113	AGAT
-27	27	42	AGAT
-28	28	58	AGAT
-29	29	37	AGAT
-30	30	16	AGAT
-31	31	32	AGAT
-32	32	24	AGAT
-33	33	10	AGAT
-34	34	43	AGAT
-35	35	6	AGAT
-36	36	13	AGAT
-36	32	1	AGAT
-37	37	35	AGAT
-38	38	34	AGAT
-39	39	20	AGAT
-39	35	2	AGAT
-40	40	27	AGAT
-41	41	29	AGAT
-42	42	30	AGAT
-43	43	87	AGAT
-44	44	67	AGAT
-45	45	20	AGAT
-46	46	15	AGAT
-47	47	28	AGAT
-48	48	26	AGAT
-49	49	13	AGAT
-50	50	11	AGAT
-52	52	5	AGAT
-54	54	6	AGAT
-12	12	236	AGCC
-13	13	109	AGCC
-14	14	17	AGCC
-15	15	14	AGCC
-16	16	8	AGCC
-18	18	12	AGCC
-21	21	18	AGCC
-23	23	13	AGCC
-12	12	23	AGCG
-13	13	19	AGCG
-18	18	9	AGCG
-12	12	272	AGCT
-13	13	89	AGCT
-14	14	108	AGCT
-15	15	49	AGCT
-16	16	19	AGCT
-17	17	19	AGCT
-18	18	19	AGCT
-19	19	44	AGCT
-22	22	12	AGCT
-27	27	16	AGCT
-12	12	87	AGGC
-13	13	19	AGGC
-14	14	16	AGGC
-18	18	7	AGGC
-12	12	3610	AGGG
-13	13	1980	AGGG
-14	14	1095	AGGG
-15	15	624	AGGG
-16	16	159	AGGG
-17	17	59	AGGG
-18	18	43	AGGG
-19	19	60	AGGG
-20	20	49	AGGG
-21	21	12	AGGG
-23	23	10	AGGG
-12	12	531	ATCC
-13	13	323	ATCC
-14	14	221	ATCC
-15	15	58	ATCC
-16	16	78	ATCC
-17	17	38	ATCC
-18	18	12	ATCC
-19	19	19	ATCC
-20	20	17	ATCC
-21	21	44	ATCC
-22	22	12	ATCC
-23	23	39	ATCC
-24	24	11	ATCC
-25	25	12	ATCC
-27	27	10	ATCC
-32	32	6	ATCC
-39	39	8	ATCC
-40	40	6	ATCC
-48	48	7	ATCC
-12	12	272	ATCG
-13	13	89	ATCG
-14	14	108	ATCG
-15	15	49	ATCG
-16	16	19	ATCG
-17	17	19	ATCG
-18	18	19	ATCG
-19	19	44	ATCG
-22	22	12	ATCG
-27	27	16	ATCG
-12	12	1119	ATGC
-13	13	509	ATGC
-14	14	338	ATGC
-15	15	16	ATGC
-16	16	66	ATGC
-17	17	7	ATGC
-19	19	27	ATGC
-12	12	13	CCCG
-12	12	178	AGTC
-13	13	77	AGTC
-14	14	13	AGTC
-15	15	12	AGTC
--- a/test-data/test-data/combineprob_out.txt	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-read_depth	allele	heterozygous_prob	motif
-2	10_11	0.485943568663	A
-2	11_12	0.472130683091	A
-2	9_10	0.494635026326	A
-3	10_11	0.71878954705	A
-3	11_12	0.688571908761	A
-3	9_10	0.73801798345	A
--- a/test-data/test-data/microsatcompat_in.txt	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-15	64416346	64416378	AT	32	16	18	22	61	TA	0	ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1	TTCCTTTATAAGAAATCTTTACatatatatatatatatatGACTGTTTTGCTTTGTTTTGAGTTTCATAAAAATAGTATCATGGGGGCCGGTCACGGTGGC	CCCFFFFFGHHFFIJIHGHIGIGGEGGIGHEGBHIIIJIFGCHGGIIJJEEIEIADHGICBFIGIGCGIJIIIGIIHIGDHGIIJHF>C888=@DB92<@?	ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1	15	64416324	64416346	64416346	64416378	64416378	64416439	32	ATATATATATATATATATATATATATATATAT
-17	52191125	52191133	GA	8	4	8	26	67	AC	0	ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1	CTTCCAGGGCCCTTCCAATGCCAAAAacacacacCTTTTTCCCCTGACCCTCTGTCAGTCTTCTGAATTTAAAGCTGGGCTCTGGGACTTACCAGTGTGAG	CCCFFFFFHHHHHJJJJJJJJJJJJJJIHIIJIJJJJJJJJJJJIGIJJJJJJJHIJJIIJJJHHHHHHHFFFFFCEEDDDDDDDDBDDDDDDDDDCCCDC	ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1	17	52191099	52191125	52191125	52191133	52191133	52191200	8	ACACACAC
-17	52191125	52191133	AC	8	4	8	26	67	AG	0	ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1	CTTCCAGGGCCCTTCCAATGCCAAAAacacacacCTTTTTCCCCTGACCCTCTGTCAGTCTTCTGAATTTAAAGCTGGGCTCTGGGACTTACCAGTGTGAG	CCCFFFFFHHHHHJJJJJJJJJJJJJJIHIIJIJJJJJJJJJJJIGIJJJJJJJHIJJIIJJJHHHHHHHFFFFFCEEDDDDDDDDBDDDDDDDDDCCCDC	ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1	17	52191099	52191125	52191125	52191133	52191133	52191200	8	AGAGAGAG
--- a/test-data/test-data/microsatcompat_out.txt	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-15	64416346	64416378	AT	32	16	18	22	61	TA	0	ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1	TTCCTTTATAAGAAATCTTTACatatatatatatatatatGACTGTTTTGCTTTGTTTTGAGTTTCATAAAAATAGTATCATGGGGGCCGGTCACGGTGGC	CCCFFFFFGHHFFIJIHGHIGIGGEGGIGHEGBHIIIJIFGCHGGIIJJEEIEIADHGICBFIGIGCGIJIIIGIIHIGDHGIIJHF>C888=@DB92<@?	ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1	15	64416324	64416346	64416346	64416378	64416378	64416439	32	ATATATATATATATATATATATATATATATAT
--- a/test-data/test-data/microsatellite_flanking_L.fastq	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-@SRR345592.75000006 HS2000-192_107:1:63:5822:176818_1_per1_1
-TACCCTCCTGTCTTCCCAGACTGATTTCTGTTCCTGCCCT
-+SRR345592.75000006 HS2000-192_107:1:63:5822:176818_1_per1_1
-GGGGGGGGGGGGGGGGGFGGGGGGGGGFEGGGGGGGGGGG
--- a/test-data/test-data/microsatellite_flanking_R.fastq	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-@SRR345592.75000006 HS2000-192_107:1:63:5822:176818_1_per1_1
-TTCTTGACTCCTCTGAATGGGTACGGGAGTGTGGACCTCAGGGAGGCCCCCTTG
-+SRR345592.75000006 HS2000-192_107:1:63:5822:176818_1_per1_1
-GGGGG?FFFGGGGGDEGGEFFBEFCEEBD@BACB*?=9;(/=5'6=4:?>C*A<
--- a/test-data/test-data/microsatpurity_in.txt	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-15	64416346	64416378	AT	32	16	18	22	61	AT	0	ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1	TTCCTTTATAAGAAATCTTTACatatatatatatatatatGACTGTTTTGCTTTGTTTTGAGTTTCATAAAAATAGTATCATGGGGGCCGGTCACGGTGGC	CCCFFFFFGHHFFIJIHGHIGIGGEGGIGHEGBHIIIJIFGCHGGIIJJEEIEIADHGICBFIGIGCGIJIIIGIIHIGDHGIIJHF>C888=@DB92<@?	ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1	15	64416324	64416346	64416346	64416378	64416378	64416439	32	ATATATATATATATATATATATATATATATAT
-15	64416346	64416378	AT	32	16	18	22	61	AT	0	ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1	TTCCTTTATAAGAAATCTTTACatatatatatatatatatGACTGTTTTGCTTTGTTTTGAGTTTCATAAAAATAGTATCATGGGGGCCGGTCACGGTGGC	CCCFFFFFGHHFFIJIHGHIGIGGEGGIGHEGBHIIIJIFGCHGGIIJJEEIEIADHGICBFIGIGCGIJIIIGIIHIGDHGIIJHF>C888=@DB92<@?	ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1	15	64416324	64416346	64416346	64416378	64416378	64416439	32	ATATATATATATATATATTATATATATATAT
-17	52191125	52191133	AC	8	4	8	26	67	AC	0	ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1	CTTCCAGGGCCCTTCCAATGCCAAAAacacacacCTTTTTCCCCTGACCCTCTGTCAGTCTTCTGAATTTAAAGCTGGGCTCTGGGACTTACCAGTGTGAG	CCCFFFFFHHHHHJJJJJJJJJJJJJJIHIIJIJJJJJJJJJJJIGIJJJJJJJHIJJIIJJJHHHHHHHFFFFFCEEDDDDDDDDBDDDDDDDDDCCCDC	ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1	17	52191099	52191125	52191125	52191133	52191133	52191200	8	ACACACAC
--- a/test-data/test-data/microsatpurity_out.txt	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-15	64416346	64416378	AT	32	16	18	22	61	AT	0	ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1	TTCCTTTATAAGAAATCTTTACatatatatatatatatatGACTGTTTTGCTTTGTTTTGAGTTTCATAAAAATAGTATCATGGGGGCCGGTCACGGTGGC	CCCFFFFFGHHFFIJIHGHIGIGGEGGIGHEGBHIIIJIFGCHGGIIJJEEIEIADHGICBFIGIGCGIJIIIGIIHIGDHGIIJHF>C888=@DB92<@?	ERR194158.789781069_HSQ1008:176:D0UYCACXX:2:1201:4831:11242/1_1_per2_1	15	64416324	64416346	64416346	64416378	64416378	64416439	32	ATATATATATATATATATATATATATATATAT
-17	52191125	52191133	AC	8	4	8	26	67	AC	0	ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1	CTTCCAGGGCCCTTCCAATGCCAAAAacacacacCTTTTTCCCCTGACCCTCTGTCAGTCTTCTGAATTTAAAGCTGGGCTCTGGGACTTACCAGTGTGAG	CCCFFFFFHHHHHJJJJJJJJJJJJJJIHIIJIJJJJJJJJJJJIGIJJJJJJJHIJJIIJJJHHHHHHHFFFFFCEEDDDDDDDDBDDDDDDDDDCCCDC	ERR194158.781426177_HSQ1008:176:D0UYCACXX:2:1109:7175:90983/1_1_per2_1	17	52191099	52191125	52191125	52191133	52191133	52191200	8	ACACACAC
--- a/test-data/test-data/nice1tab.py	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-import sys
-fd=open(sys.argv[1])
-lines=fd.readlines()
-for line in lines:
-    temp=line.strip().split()
-    print '\t'.join(temp)
\ No newline at end of file
--- a/test-data/test-data/probvalueforhetero_in.txt	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-chr	9,10	A	hetero	-1.27220836321	10	10	9
-chr	10,11	A	hetero	-0.939119957032	11	11	10
-chr	11,12	A	hetero	-0.720375026792	12	12	11
-chr	9,9,10	A	hetero	-1.6841441619	9	9	10
-chr	9,10,10	A	hetero	-0.97233405327	10	10	9
-chr	10,10,11	A	hetero	-1.29451118958	10	10	11
-chr	10,11,11	A	hetero	-0.641022011041	11	11	10
-chr	11,11,12	A	hetero	-1.01921634129	11	11	12
-chr	11,12,12	A	hetero	-0.425116661902	12	12	11
--- a/test-data/test-data/probvalueforhetero_out.txt	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-chr	9,10	A	hetero	-1.27220836321	10	10	9	0.247317513163	2	0.494635026326	2
-chr	10,11	A	hetero	-0.939119957032	11	11	10	0.242971784331	2	0.485943568663	2
-chr	11,12	A	hetero	-0.720375026792	12	12	11	0.236065341545	2	0.472130683091	2
-chr	9,9,10	A	hetero	-1.6841441619	9	9	10	0.124528157268	3	0.373584471803	3
-chr	9,10,10	A	hetero	-0.97233405327	10	10	9	0.121477837216	3	0.364433511647	3
-chr	10,10,11	A	hetero	-1.29451118958	10	10	11	0.122575544751	3	0.367726634253	3
-chr	10,11,11	A	hetero	-0.641022011041	11	11	10	0.117020970932	3	0.351062912797	3
-chr	11,11,12	A	hetero	-1.01921634129	11	11	12	0.11865253007	3	0.35595759021	3
-chr	11,12,12	A	hetero	-0.425116661902	12	12	11	0.110871439517	3	0.332614318551	3
--- a/test-data/test-data/profilegenerator_in.txt	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-9	9	100000
-10	10	91456
-10	9	1259
-11	11	39657
-11	10	1211
-11	12	514
--- a/test-data/test-data/profilegenerator_out.txt	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-chr	9,9	A
-chr	9,10	A
-chr	9,11	A
-chr	9,12	A
-chr	10,10	A
-chr	10,11	A
-chr	10,12	A
-chr	11,11	A
-chr	11,12	A
-chr	12,12	A
-chr	9,9,9	A
-chr	9,9,10	A
-chr	9,9,11	A
-chr	9,9,12	A
-chr	9,10,10	A
-chr	9,10,11	A
-chr	9,10,12	A
-chr	9,11,11	A
-chr	9,11,12	A
-chr	9,12,12	A
-chr	10,10,10	A
-chr	10,10,11	A
-chr	10,10,12	A
-chr	10,11,11	A
-chr	10,11,12	A
-chr	10,12,12	A
-chr	11,11,11	A
-chr	11,11,12	A
-chr	11,12,12	A
-chr	12,12,12	A
--- a/test-data/test-data/readdepth2seqdepth.out	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-repeat_length	read_length	informative_read_depth	=locus_specific_sequencing_depth	=genome_wide_sequencing_depth
-10	100	5	10	15
--- a/test-data/test-data/samplePESAM_2_profile_C.txt	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-M01368:22:000000000-A4T24:1:1101:10010:3775_1:N:0:2_1_per1_1	shifted	540	713	713	719	719	759	6	GGGGGG
-M01368:22:000000000-A4T24:1:1101:10015:2849_1:N:0:2_1_per1_2	shifted	4007	4082	4082	4088	4088	4258	6	TTTTTT
-M01368:22:000000000-A4T24:1:1101:10070:4955_1:N:0:2_1_per1_1	shifted	1849	1930	1930	1936	1936	2100	6	CCCCCC
-M01368:22:000000000-A4T24:1:1101:10070:4955_1:N:0:2_1_per1_2	shifted	1849	2025	2025	2030	2030	2100	5	GGGGG
-M01368:22:000000000-A4T24:1:1101:10126:5433_1:N:0:2_1_per1_1	shifted	1428	1517	1517	1522	1522	1543	5	AAAAA
--- a/test-data/test-data/sampleTRgenotypingcorrection	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-chr1	14,13,13,13	A	hetero	-0.429451855856	13	13	14
-chr1	5,6,6,6,6,7,7,8,8	A	hetero	-14.8744881854	7	6	8
--- a/test-data/test-data/sampleTRprofile_C.txt	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-chr1	14,13,13,13	A
-chr1	5,6,6,6,6,7,7,8,8	A
--- a/test-data/test-data/samplefq.snoope	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-6	40	54	G	0	SRR345592.75000006 HS2000-192_107:1:63:5822:176818_1_per1_1	TACCCTCCTGTCTTCCCAGACTGATTTCTGTTCCTGCCCTggggggTTCTTGACTCCTCTGAATGGGTACGGGAGTGTGGACCTCAGGGAGGCCCCCTTG	GGGGGGGGGGGGGGGGGFGGGGGGGGGFEGGGGGGGGGGG?FFDFGGGGGG?FFFGGGGGDEGGEFFBEFCEEBD@BACB*?=9;(/=5'6=4:?>C*A<
--- a/test-data/test-data/samplefq.snoope.new	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-6	40	54	G	0	SRR345592.75000006_HS2000-192_107:1:63:5822:176818_1_per1_1	TACCCTCCTGTCTTCCCAGACTGATTTCTGTTCCTGCCCTggggggTTCTTGACTCCTCTGAATGGGTACGGGAGTGTGGACCTCAGGGAGGCCCCCTTG	GGGGGGGGGGGGGGGGGFGGGGGGGGGFEGGGGGGGGGGG?FFDFGGGGGG?FFFGGGGGDEGGEFFBEFCEEBD@BACB*?=9;(/=5'6=4:?>C*A<
--- a/test-data/test-data/sampleprofilegenerator_in	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-9	9	100000
-10	10	91456
-10	9	1259
-11	11	39657
-11	10	1211
-11	12	514
--- a/test-data/test-data/sampleprofilegenerator_out	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-chr	9,9	A
-chr	9,10	A
-chr	9,11	A
-chr	9,12	A
-chr	10,10	A
-chr	10,11	A
-chr	10,12	A
-chr	11,11	A
-chr	11,12	A
-chr	12,12	A
-chr	9,9,9	A
-chr	9,9,10	A
-chr	9,9,11	A
-chr	9,9,12	A
-chr	9,10,10	A
-chr	9,10,11	A
-chr	9,10,12	A
-chr	9,11,11	A
-chr	9,11,12	A
-chr	9,12,12	A
-chr	10,10,10	A
-chr	10,10,11	A
-chr	10,10,12	A
-chr	10,11,11	A
-chr	10,11,12	A
-chr	10,12,12	A
-chr	11,11,11	A
-chr	11,11,12	A
-chr	11,12,12	A
-chr	12,12,12	A
--- a/test-data/test-data/samplesortedPESAM_C.sam	Wed Apr 22 12:19:28 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-M01368:22:000000000-A4T24:1:1101:10010:3775_1:N:0:2_1_per1_1	113	shifted	720	37	40M	=	541	-46	TTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACC	HHFG@IIHHHHHIHHFHHGFGGGGDBDDEDDDBBB?????	XT:A:U	NM:i:0	SM:i:37	AM:i:37	X0:i:1	X1:i:0	XM:i:0	XO:i:0	XG:i:0	MD:Z:40
-M01368:22:000000000-A4T24:1:1101:10010:3775_1:N:0:2_1_per1_1	177	shifted	541	37	173M	=	720	46	CTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAAC	::GECC:*:)D<GEGGGECCCEC?00E?::CCCCEEECC:C*GEC4'.>ACGGEC:CC?>><DCE?C:EC?GECE?:CCECGEEC*GEECEC:GEEGE?GGECC:ECA2CC*CCC8DEGGEGC=CGECEAEGEEDGGEDEGD=EBGGGFDHHHHHHHHEEHHHHHIIHFIIHH	XT:A:U	NM:i:0	SM:i:37	AM:i:37	X0:i:1	X1:i:0	XM:i:0	XO:i:0	XG:i:0	MD:Z:173
-M01368:22:000000000-A4T24:1:1101:10015:2849_1:N:0:2_1_per1_2	113	shifted	4089	37	170M	=	4008	-176	GCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGTAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAG	GECGGGGGGGGGGGGEGEGGGGD>2GEGGGGGEEGGGGGGGGGGGGGEEECEGEAGGEEGEB>=GGFGEAGHHHEHHHFHFF?ED;HFIHHIIIIHIIHHHHIHHHHIHHHHHHHHIIIIHIHHHHIHHHHHIIHHIIHHIIHIIIIIGGGGGGDDDDDDDDBBB????<	XT:A:U	NM:i:0	SM:i:37	AM:i:37	X0:i:1	X1:i:0	XM:i:0	XO:i:0	XG:i:0	MD:Z:170
-M01368:22:000000000-A4T24:1:1101:10015:2849_1:N:0:2_1_per1_2	177	shifted	4008	37	75M	=	4089	176	TGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGC	CEGGEEEECC?:EEGECGGGGECGGGGEEGGEEGCCGEGGGGGGGGGGDGGGGGE>EEGGGGGGGGGGGAGGGGE	XT:A:U	NM:i:0	SM:i:37	AM:i:37	X0:i:1	X1:i:0	XM:i:0	XO:i:0	XG:i:0	MD:Z:75
-M01368:22:000000000-A4T24:1:1101:10070:4955_1:N:0:2_1_per1_1	129	shifted	1937	37	164M	=	1850	-87	TCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCTCGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGCCATAAAGCCTAAATAGCCCACACGT	HHHHIHHHHHHHHHHHHHHHHHHHHHGGFGGGGGGGHGGGGGGGGGGGGEGGGGGGAEEGGGEGGGGGGEGEEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGECGGGGGGGGGGGGGGAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGCEGEGG	XT:A:U	NM:i:1	SM:i:37	AM:i:37	X0:i:1	X1:i:0	XM:i:1	XO:i:0	XG:i:0	MD:Z:138T25
-M01368:22:000000000-A4T24:1:1101:10070:4955_1:N:0:2_1_per1_1	65	shifted	1850	37	81M	=	1937	87	CCCTTAACAGTACATAGTACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGA	?????BBBEEDBBDDDGGGGGGIIIIIIIIIIIIIHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIHIHHHIIIIIIHGH	XT:A:U	NM:i:0	SM:i:37	AM:i:37	X0:i:1	X1:i:0	XM:i:0	XO:i:0	XG:i:0	MD:Z:81
-M01368:22:000000000-A4T24:1:1101:10070:4955_1:N:0:2_1_per1_2	129	shifted	2031	37	70M	=	1850	-181	TAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGCCATAAAGCCTAAATAGCCCACACGT	GGGGGGGGECGGGGGGGGGGGGGGAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGCEGEGG	XT:A:U	NM:i:1	SM:i:37	AM:i:37	X0:i:1	X1:i:0	XM:i:1	XO:i:0	XG:i:0	MD:Z:44T25
-M01368:22:000000000-A4T24:1:1101:10070:4955_1:N:0:2_1_per1_2	65	shifted	1850	37	176M	=	2031	181	CCCTTAACAGTACATAGTACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCTCGCTCCGGGCCCATAACACTT	?????BBBEEDBBDDDGGGGGGIIIIIIIIIIIIIHHHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIHIHHHIIIIIIHGHIIIHHHHHHHIHHHHHHHHHHHHHHHHHHHHHGGFGGGGGGGHGGGGGGGGGGGGEGGGGGGAEEGGGEGGGGGGEGEEGGGGGGGGGGGGGGGG	XT:A:U	NM:i:0	SM:i:37	AM:i:37	X0:i:1	X1:i:0	XM:i:0	XO:i:0	XG:i:0	MD:Z:176
-M01368:22:000000000-A4T24:1:1101:10126:5433_1:N:0:2_1_per1_1	129	shifted	1523	37	21M	=	1429	-94	GTCTTTAACTCCACCATTAGC	GGGEGGEGGGGGCGGGGGEGG	XT:A:U	NM:i:0	SM:i:37	AM:i:37	X0:i:1	X1:i:0	XM:i:0	XO:i:0	XG:i:0	MD:Z:21
-M01368:22:000000000-A4T24:1:1101:10126:5433_1:N:0:2_1_per1_1	65	shifted	1429	37	89M	=	1523	94	CTATGCATCCAACGCGTTGGGAGCTCTCCCATATGGTCGACCTGCAGGCGGCCGCGAATTCACTAGTGATTTCCAAGGACAAATCAGAG	?????BBBDDDDDDDDGGGFGGFEHIIIIIIIHIIIHIHHHHHIIHFHHHHHHHHHHHHHHHHHHHHGGGGGGGGGGGGGGGGGGEGEE	XT:A:U	NM:i:0	SM:i:37	AM:i:37	X0:i:1	X1:i:0	XM:i:0	XO:i:0	XG:i:0	MD:Z:89
Binary file test-data/test-data/shifted.2bit has changed