# HG changeset patch # User tyty # Date 1418112191 18000 # Node ID 1c325ff557d93db7e9d2b18305ade522b1020038 # Parent 11f0f526dca9e707416af375d330b1e4bed6f68c Uploaded diff -r 11f0f526dca9 -r 1c325ff557d9 Iterative_mapping/iterative_map.py --- a/Iterative_mapping/iterative_map.py Tue Dec 09 03:02:49 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,127 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -import os -from read_file import * -from read_s_file import * -import random -import string - -type_input = sys.argv[1] -seq_file = sys.argv[2] -ref_file = sys.argv[3] -shift = sys.argv[4] -length = sys.argv[5] -t_end = sys.argv[6] -map_type = sys.argv[7] -output_file = sys.argv[8] - - -if map_type!="default": - s = "" - sm = "" - s = s+"-v "+sys.argv[9] - sm = sm+"-v "+sys.argv[9] - sm = sm+" -5 "+sys.argv[10] - sm = sm+" -3 "+sys.argv[11] - s = s+" -k "+sys.argv[12] - sm = sm+" -k "+sys.argv[12] - if sys.argv[13]: - s = s+" -a" - sm = sm+" -a" - if int(sys.argv[14])>=1: - s = s+" -m "+sys.argv[14] - sm = sm+" -m "+sys.argv[14] - if sys.argv[15]: - s = s+" --best --strata " - sm = sm+" --best --strata " - -else: - s = "-v 3 -a --best --strata " - sm = "-v 3 -a --best --strata " - -ospath = os.path.realpath(sys.argv[0]) -ost = ospath.split('/') -syspath = "" -for i in range(len(ost)-1): - syspath = syspath+ost[i].strip() - syspath = syspath+'/' - -syspathrs = os.getcwd() -syspathrs = syspathrs+'/' - -os.system("bowtie-build -f "+ref_file+" "+syspathrs+"ref > "+syspathrs+"log.txt") - -os.system("cp "+seq_file+" "+syspathrs+"seq0.fa") - -if type_input == "fasta": - tp = 'fasta' -if type_input == "fastq": - tp = 'fastq' - -k = 0 - -if type_input == "fasta": - os.system("bowtie "+sm+"-f "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam") -if type_input == "fastq": - os.system("bowtie "+sm+"-q "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam") - -while(True): - os.system("samtools view -Sb -F 0xfff "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"mapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get mapped reads - os.system("samtools view -Sb -f 0x4 "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"umapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get unmapped reads - os.system("samtools view -Sb -f 0x10 "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"rmapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get reversed mapped reads - os.system("samtools merge -f "+syspathrs+"unmapped"+str(k)+".bam "+syspathrs+"umapped"+str(k)+".bam "+syspathrs+"rmapped"+str(k)+".bam") #get reversed mapped reads - os.system("samtools view -h -o "+syspathrs+"unmapped"+str(k)+".sam "+syspathrs+"unmapped"+str(k)+".bam") #get reversed mapped reads - if k>0: - os.system("samtools view -h -o "+syspathrs+"mapped"+str(k)+".sam "+syspathrs+"mapped"+str(k)+".bam") #get reversed mapped reads - os.system("cut -f 1 "+syspathrs+"unmapped"+str(k)+".sam > "+syspathrs+"unmapped"+str(k)+".txt") - os.system("cut -f 1 "+syspathrs+"mapped"+str(k)+".sam > "+syspathrs+"mapped"+str(k)+".txt") - os.system("python "+syspath+"remove_map.py "+syspathrs+"unmapped"+str(k)+".txt "+syspathrs+"mapped"+str(k)+".txt "+syspathrs+"runmapped"+str(k)+".txt") - os.system("rm "+syspathrs+"mapped"+str(k)+".sam") - os.system("rm "+syspathrs+"mapped"+str(k)+".txt") - os.system("rm "+syspathrs+"unmapped"+str(k)+".txt") - else: - os.system("cut -f 1 "+syspathrs+"unmapped"+str(k)+".sam > "+syspathrs+"runmapped"+str(k)+".txt") - - os.system("rm "+syspathrs+"unmapped"+str(k)+".bam") - os.system("rm "+syspathrs+"umapped"+str(k)+".bam") - os.system("rm "+syspathrs+"rmapped"+str(k)+".bam") - os.system("python "+syspath+"seq_track.py "+syspathrs+"runmapped"+str(k)+".txt "+syspathrs+"seq"+str(k)+".fa "+syspathrs+"unmap_seq"+str(k)+".fa "+tp) #get unmapped sequence - os.system("python "+syspath+"truncate.py "+syspathrs+"unmap_seq"+str(k)+".fa "+shift+" "+syspathrs+"seq"+str(k+1)+".fa "+length+" "+t_end) #truncate unmapped sequence - os.system("rm "+syspathrs+"seq"+str(k)+".fa") #Remove sequences being mapped - os.system("rm "+syspathrs+"map"+str(k)+".sam") #Remove mapping file - os.system("rm "+syspathrs+"unmap_seq"+str(k)+".fa") #Remove unmapped sequnce - os.system("rm "+syspathrs+"runmapped"+str(k)+".txt") - os.system("rm "+syspathrs+"unmapped"+str(k)+".sam") - - os.system("wc -l "+syspathrs+"seq"+str(k+1)+".fa > "+syspathrs+"count"+str(k+1)+".txt") - c = read_sp_file(syspathrs+"count"+str(k+1)+".txt") - if c[0][0] == '0': #If no reads is in the sequence file, stop - os.system("rm "+syspathrs+"count"+str(k+1)+".txt") - os.system("rm "+syspathrs+"seq"+str(k+1)+".fa") - break - os.system("rm "+syspathrs+"count"+str(k+1)+".txt") - k = k+1 - if type_input == "fasta": - os.system("bowtie "+s+"-f "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam") - if type_input == "fastq": - os.system("bowtie "+s+"-q "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam") - - -ss = "" -for i in range(0,k+1): - ss = ss+" "+syspathrs+"mapped"+str(i)+".bam" - - -os.system("samtools merge -f "+syspathrs+"combine.bam"+" "+ss) -os.system("samtools sort "+syspathrs+"combine.bam sorted") -os.system("samtools view -b -h sorted.bam > " + output_file) -#print("samtools merge mapped_all.bam"+ss) -os.system("rm "+syspathrs+"mapped*.bam") -os.system("rm "+syspathrs+"combine.bam") -os.system("rm "+syspathrs+"sorted.bam") -os.system("rm "+syspathrs+"ref*") -#os.system("rm -r "+syspathrs) - - diff -r 11f0f526dca9 -r 1c325ff557d9 Iterative_mapping/iterative_map.xml --- a/Iterative_mapping/iterative_map.xml Tue Dec 09 03:02:49 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,96 +0,0 @@ - - - - #if $mapping_file.type == "user" - iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $t_end $mapping_file.type $output $mapping_file.param_v $mapping_file.param_five $mapping_file.param_three $mapping_file.param_k $mapping_file.param_a $mapping_file.param_m $mapping_file.param_best - #else - iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $t_end $mapping_file.type $output - #end if - - - biopython - numpy - samtools - bowtie - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**TIPS**: - ------ - -**Input**: - -* 1. Sequence file type (FASTA/FASTQ) -* 2. Sequence file (fasta/fastq format) -* 3. Reference file (fasta) used to map the reads to -* 4. “Shift” (The length of the sequence that will be trimmed at the 3’end of the reads before each round of mapping) -* 5. “Length” (The minimum length of the reads for mapping after trimming) -* [Optional] -* 1. Bowtie mapping flags (options) [Default: -v 0 -a --best --strata] (-v flag indicates the number of allowed mismatches. Use -5/-3 flag to trim the nucleotides from 5'/3' end of the reads) - ------ - -**Output**: - -A bam file with all of the reads that are mapped - - - - - diff -r 11f0f526dca9 -r 1c325ff557d9 Iterative_mapping/map_ex.py --- a/Iterative_mapping/map_ex.py Tue Dec 09 03:02:49 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from read_file import * -from Bio import SeqIO - -map_file = sys.argv[1] -result_file = sys.argv[2] - - -#reads = read_t_file(read_file); - -f = open(map_file); -h = file(result_file, 'w') - -for aline in f.readlines(): - tline = aline.strip(); - tl = tline.split('\t'); - if len(tl)>4: - if int(tl[1].strip())== 0: - h.write(tline) - h.write('\n') - - -f.close(); -h.close() - - - - diff -r 11f0f526dca9 -r 1c325ff557d9 Iterative_mapping/rRNA.txt --- a/Iterative_mapping/rRNA.txt Tue Dec 09 03:02:49 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ ->25s rRNA 3375nts -GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTAAGCATATCAATAAGCGGAGGAAAAGAAACTAACAAGGATTCCCTTAGTAACGGCGAGCGAACCGGGAAGAGCCCAGCTTGAAAATCGGACGTCTTCGGCGTTCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGGAAAGGGGCGCCAGAGAGGGTGAGAGCCCGTCGTGCCCGGACCCTGTCGCACCACGAGGCGCTGTCTACGAGTCGGGTTGTTTGGGAATGCAGCCCCAATCGGGCGGTAAATTCCGTCCAAGGCTAAATACGGGCGAGAGACCGATAGCGAACAAGTACCGCGAGGTAAAGATGAAAAGGACTTTGAAAAGAGAGTCAAAGAGTGCTTGAAATTGTCGGGAGGGAAGCGGATGGGGGCCGGCGATGCGTCCTGGTCGGATGCGGAACGGAGCAATCCGGTCCGCCGATCGATTCGGGGCGTGGACCGACGCGGATTACGGTGGCGGCCTAAGCCCGGGCTTTTGATACGCTTGTGGAGACGTCGCTGCCGTGATCGTGGTCTGCAGCACGCGCCTAACGGCGTGCCTCGGCATCAGCGTGCTCCGGGCGTCGGCCTGTGGGCTCCCCATTCGACCCGTCTTGAAACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAACGGGTGAGTAAACCCGTAAGGCGCAAGGAAGCTGATTGGCGGGATCCTCGCGGGTGCACCGCCGACCGACCTTGATCTTCTGAGAAGGGTTCGAGTGTGAGCATGCCTGTCGGGACCCGAAAGATGGTGAACTATGCCTGAGCGGGGTAAAGCCAGAGGAAACTCTGGTGGAAGCCCGCAGCGATACTGACGTGCAAATCGTTCGTCTGACTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGAGCTCGGACGCGAGTTCTATCGGGTAAAGCCAATGATTAGAGGCATTGGGGGCGCAACGCCTCGACCTATTCTCAAACTTTAAATAGGTAGGACGTGTCGGCTGCTTTGTTGAGCCGTCACACGGAATCGAGAGCTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGGAAGCCGGGTTACGGTGCCCAACTGCGCGCTAACCTAGAACCCACAAAGGGTGTTGGTCGATTAAGACAGCAGGACGGTGGTCATGGAAGTCGAAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCCGAAAATGGATGGCGCTTAAGCGCGACCTATACCCGGCCGTCGGGGCAAGAGCCAGGCCTCGATGAGTAGGAGGGCGCGGCGGTCGCTGCAAAACCTAGGGCGCGAGGCGCGGAGCGGCCGTCGGTGCAGATCTTGGTGGTAGTAGCAAATATTCAAATGAGAACTTTGAAGGCCGAAGAGGGGAAAGGTTCCATGTGAACGGCACTTGCACATGGGTTAGTCGATCCTAAGAGTCGGGGGAAACCCGTCTGATAGCGCTTAAGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGACGTGGCGGTTGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCGGGGGCCTCGGGAAGAGTTATCTTTTCTGTTTAACAGCCTGCCCACCCTGGAAACGGCTCAGCCGGAGGTAGGGTCCAGCGGCTGGAAGAGCACCGCACGTCGCGTGGTGTCCGGTGCGCCCCCGGGCGCCCTTGAAAATCCGGAGGACCGAGTGCCGCTCACGCCCGGTCGTACTCATAACCGCATCAGGTCTCCAAGGTGAACAGCCTCTGGTCGATGGAACAATGTAGGCAAGGGAAGTCGGCAAAATGGATCCGTAACTTCGGGAAAAGGATTGGCTCTGAGGGCTGGGCTCGGGGGTCCCAGTTCCGAACCCGTCGGCTGTCAGCGGACTGCTCGAGCTGCTTCCGCGGCGAGAGCGGGTCGCCGGCTGCCGGCCGGGGGACGACTGGGAACGGCTCTCTCGGGAGCTTTCCCCGGGCGTCGAACAGTCAGCTCAGAACTGGTACGGACAAGGGGAATCCGACTGTTTAATTAAAACAAAGCATTGCGATGGTCCCTGCGGATGCTAACGCAATGTGATTTCTGCCCAGTGCTCTGAATGTCAAAGTGAAGAAATTCAACCAAGCGCGGGTAAACGGCGGGAGTAACTATGACTCTCTTAAGGTAGCCAAATGCCTCGTCATCTAATTAGTGACGCGCATGAATGGATTAACGAGATTCCCACTGTCCCTGTCTACTATCCAGCGAAACCACAGCCAAGGGAACGGGCTTGGCAGAATCAGCGGGGAAAGAAGACCCTGTTGAGCTTGACTCTAGTCCGACTTTGTGAAATGACTTGAGAGGTGTAGGATAAGTGGGAGCTTCGGCGCAAGTGAAATACCACTACTTTTAACGTTATTTTACTTACTCCGTGAATCGGAGGCCGGGGTACAACCCCTGTTTTTGGTCCCAAGGCTCGCTTCGGCGGGTCGATCCGGGCGGAGGACATTGTCAGGTGGGGAGTTTGGCTGGGGCGGCACATCTGTTAAAAGATAACGCAGGTGTCCTAAGATGAGCTCAACGAGAACAGAAATCTCGTGTGGAACAAAAGGGTAAAAGCTCGTTTGATTCTGATTTTCAGTACGAATACGAACCGTGAAAGCGTGGCCTATCGATCCTTTAGACTTCGGAATTTGAAGCTAGAGGTGTCAGAAAAGTTACCACAGGGATAACTGGCTTGTGGCAGCCAAGCGTTCATAGCGACGTTGCTTTTTGATCCTTCGATGTCGGCTCTTCCTATCATTGTGAAGCAGAATTCACCAAGTGTTGGATTGTTCACCCACCAATAGGGAACGTGAGCTGGGTTTAGACCGTCGTGAGACAGGTTAGTTTTACCCTACTGATGCCCGCGTCGCGATAGTAATTCAACCTAGTACGAGAGGAACCGTTGATTCGCACAATTGGTCATCGCGCTTGGTTGAAAAGCCAGTGGCGCGAAGCTACCGTGCGCTGGATTATGACTGAACGCCTCTAAGTCAGAATCCGGGCTAGAAGCGACGCATGCGCCCGCCGCCCGATTGCCGACCCTCAGTAGGAGCTTAGGCTCCAAAGGCACGTGTCGTTGGCTAAGTCCGTTCGGCGGAACGGTCGTTCGGACCGCCTTGAATTATAATTACCACCGAGCGGCGGGTAGAATCCTTTGCAGACGACTTAAATACGCGACGGGGTATTGTAAGTGGCAGAGTGGCCTTGCTGCCACGATCCACTGAGATTCAGCCCTTTGTCGCTAAGATTCGA ->gi|20197903:2706-4513 Arabidopsis thaliana chromosome 2 BAC F23H14 genomic sequence, complete sequence -TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCTCAAAGATTAAGCCATGCATGTGTAAGTATGAACGAATTCAGACTGTGAAACTGCGAATGGCTCATTAAATCAGTTATAGTTTGTTTGATGGTAACTACTACTCGGATAACCGTAGTAATTCTAGAGCTAATACGTGCAACAAACCCCGACTTATGGAAGGGACGCATTTATTAGATAAAAGGTCGACGCGGGCTCTGCCCGTTGCTCTGATGATTCATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATCAACTTTCGATGGTAGGATAGTGGCCTACCATGGTGGTAACGGGTGACGGAGAATTAGGGTTCGATTCCGGAGAGGGAGCCTGAGAAACGGCTACCACATCCAAGGAAGGCAGCAGGCGCGCAAATTACCCAATCCTGACACGGGGAGGTAGTGACAATAAATAACAATACTGGGCTCTTTCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCCCTTAACGAGGATCCATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAACCTTGGGATGGGTCGGCCGGTCCGCCTTTGGTGTGCATTGGTCGGCTTGTCCCTTCGGTCGGCGATACGCTCCTGGTCTTAATTGGCCGGGTCGTGCCTCCGGCGCTGTTACTTTGAAGAAATTAGAGTGCTCAAAGCAAGCCTACGCTCTGGATACATTAGCATGGGATAACATCATAGGATTTCGATCCTATTGTGTTGGCCTTCGGGATCGGAGTAATGATTAACAGGGACAGTCGGGGGCATTCGTATTTCATAGTCAGAGGTGAAATTCTTGGATTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGGATGTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTCCTAGTCTCAACCATAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTATAGGACTCCGCTGGCACCTTATGAGAAATCAAAGTTTTTGGGTTCCGGGGGGAGTATGGTCGCAAGGCTGAAACTTAAAGGAATTGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGGAAACTTACCAGGTCCAGACATAGTAAGGATTGACAGACTGAGAGCTCTTTCTTGATTCTATGGGTGGTGGTGCATGGCCGTTCTTAGTTGGTGGAGCGATTTGTCTGGTTAATTCCGTTAATGAACGAGACCTCAGCCTGCTAACTAGCTACGTGGAGGCATCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCGTTTAGGCCAAGGAAGTTTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGCGCGCTACACTGATGTATTCAACGAGTTCACACCTTGGCCGACAGGCCCGGGTAATCTTTGAAATTTCATCGTGATGGGGATAGATCATTGCAATTGTTGGTCTTCAACGAGGAATTCCTAGTAAGCGCGAGTCATCAGCTCGCGTTGACTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAATGATCCGGTGAAGTGTTCGGATCGCGGCGACGTGGGTGGTTCGCCGCCCGCGACGTCGCGAGAAGTCCACTAAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTG ->Arabidopsis thaliana 1 -GGATGCGATCATACCAGCACTAATGCACCGGATCCCATCAGAACTCCGCAGTTAAGCGTGCTTGGGCGAGAGTAGTACTAGGATGGGTGACCTCCTGGGAAGTCCTCGTGTTGCATCCCTC ->gi|186498419|ref|NR_022453.1| Arabidopsis thaliana (AT2G01020) rRNA -AAAACGACTCTCGGCAACGGATATCTCGGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGATACTTGGTGTGAATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCCAAGCCTTCTGGCCGAGGGCACGTCTGCCTGGGTGTCACAA \ No newline at end of file diff -r 11f0f526dca9 -r 1c325ff557d9 Iterative_mapping/read_file.py --- a/Iterative_mapping/read_file.py Tue Dec 09 03:02:49 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys - - - -def read_t_file(in_file): - f = open(in_file); - result = []; - for aline in f.readlines(): - temp = []; - tline = aline.strip(); - tl = tline.split('\t'); - for i in range(0, len(tl)): - temp.append(tl[i].strip()); - result.append(temp); - f.close(); - return result; - - diff -r 11f0f526dca9 -r 1c325ff557d9 Iterative_mapping/read_file.pyc Binary file Iterative_mapping/read_file.pyc has changed diff -r 11f0f526dca9 -r 1c325ff557d9 Iterative_mapping/read_s_file.py --- a/Iterative_mapping/read_s_file.py Tue Dec 09 03:02:49 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys - - - -def read_sp_file(in_file): - f = open(in_file); - result = []; - for aline in f.readlines(): - temp = []; - tline = aline.strip(); - tl = tline.split(' '); - for i in range(0, len(tl)): - if len(tl[i].strip())>0: - temp.append(tl[i].strip()); - result.append(temp); - f.close(); - return result; - - diff -r 11f0f526dca9 -r 1c325ff557d9 Iterative_mapping/read_s_file.pyc Binary file Iterative_mapping/read_s_file.pyc has changed diff -r 11f0f526dca9 -r 1c325ff557d9 Iterative_mapping/remove_map.py --- a/Iterative_mapping/remove_map.py Tue Dec 09 03:02:49 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,29 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from read_file import * - - -unmap_file = sys.argv[1] -map_file = sys.argv[2] -result_file = sys.argv[3] - - -unmap = read_t_file(unmap_file) -mapped = read_t_file(map_file) -h = file(result_file, 'w') - -maps = set() -for i in range(len(mapped)): - maps.add(mapped[i][0]) - - -for i in range(len(unmap)): - name = unmap[i][0] - if name not in maps: - h.write(name) - h.write('\n') - - -h.close() diff -r 11f0f526dca9 -r 1c325ff557d9 Iterative_mapping/sample.fasta --- a/Iterative_mapping/sample.fasta Tue Dec 09 03:02:49 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1000 +0,0 @@ ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1415:1662#0/1 -TCATTCATCCATTTCCAGTGCTCAGCTAACCCCAACT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1359:1735#0/1 -TGCTGGCGACGCATCATTCAAATTTCTGCCCTATCAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1469:1743#0/1 -TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1750:1642#0/1 -AACCGGGACGTGGCGGTTGACGGCAACGTTAGGGAGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1899:1627#0/1 -AAAAACGACTCTCGGCAACGGATATCTCGGCTCTCGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1859:1668#0/1 -TACGCTCCTGGTCTTAATTGGCCGGGTCGTGCCCCCC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1830:1700#0/1 -ATTTCGATCCTATTGTGTTGGCCTTCGGGATCGGAGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2000:1705#0/1 -TTAATGATTAACAGGGACAGTCGGGGGCATTCGTATT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2149:1632#0/1 -GATCATACATTACTGACTAAAAGAAGCAAAATCTTGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2052:1637#0/1 -AGAACATGAAACCGTAAGCTCCCAAGCAGTGGGAGGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2036:1706#0/1 -AAAACTTAGCTGAGACGACGCAGAAACAGGTGAGATC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2183:1706#0/1 -TTCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2068:1721#0/1 -AAGTAGCACGTCCCTCAGGAAAGAAGCTCTTCAGATT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2162:1740#0/1 -ATTAATCAAGAACGAAAGTTGGGGGCTCGAAGACGGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2339:1628#0/1 -TGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCCGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2338:1670#0/1 -TCGCGCGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2459:1678#0/1 -CCTCGTCGCTGCGTCTCTCTCCCGCAACCTTCGATTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2492:1685#0/1 -AAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2286:1687#0/1 -AAGGCGGTGGAGGCCGCCCAGCATTGCCCGCACCTAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2283:1704#0/1 -ACTCTTGTTACAATGATTGTATGACATTCCTGATGGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2475:1719#0/1 -TTATGCAGAAATTGCTATAAGAAGAAACCTAAACTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2299:1733#0/1 -CTGTGAAACTGCGAATGGCTCATTAAATCAGTTATAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2464:1744#0/1 -GTGTTTTTATCCAAATCCGGGGATAAACACATTTTGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2547:1662#0/1 -AGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2512:1664#0/1 -TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2675:1664#0/1 -GTACAGATCGGAAGAGCACACGTCTGAACTCCAGTCA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2694:1675#0/1 -ACTACGAGAGGAACCGTTGATTCGCACAATTGGTCAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2558:1692#0/1 -ATCTTTCATATCCAGAGAGAGAGAAAGAGAACAAAAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2587:1709#0/1 -CAACCGTGAAAGCGTGGCCTATCGATCCTTTAGACCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2560:1727#0/1 -TTTCCGAGAGTATGCAGATTTTGTTTTCCAAGAATAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2960:1646#0/1 -CTTGCACATGGGTTAGTCGATCCTAAGAGTCGGGGGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2981:1669#0/1 -GATTCATCCCAAAACATTACAAAACGTTACAATGGCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2884:1740#0/1 -TCTCGCGCTTGTACGGCTTTGGCTCGGATTCGTCCGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3172:1696#0/1 -TCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3046:1703#0/1 -AGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3218:1717#0/1 -TCTCGCGCTTGTACGGCTTTGGCTCGGATTCGTCCGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3140:1732#0/1 -ATAAGAAGGTTATTGATTTGGTTAAAGAATACAATGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3212:1744#0/1 -TCCTGGTCTTAATTGGCCGGGTCGTGGCCCCCGCGCC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3277:1654#0/1 -TACTCGGATAACCGTAGTAATTCTAGAGCTAATACGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3373:1695#0/1 -CGTTTAGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3255:1702#0/1 -ACGCGGGCTCTGCCCGTTGCTCTGATGATTCATGATA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3391:1733#0/1 -AAATAGAAGGGTCAAAAGCTAAGGAAGAAAAGAAAGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3606:1635#0/1 -TTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3710:1652#0/1 -CCCCGTGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3551:1662#0/1 -AGAACATGAAACCGTAAGCTCCCAAGCAGTGGGAGGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3696:1663#0/1 -ACGCATTTATTAGATAAAAGGTCGACGCGGGCTCTGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3679:1667#0/1 -ACGGGGGGCAGCAGTGGGGAATCTTGGACAATGGGCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3622:1671#0/1 -TCATCCAATTGGAGACGAATCATATCGAGCGATGGAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3506:1682#0/1 -TGCGTTGTGGAAGTCGAAGAGGTTGATGAATATTGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3540:1687#0/1 -AAATTAAACCTCACCGACGGATTCAATTCTCTCGTTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3627:1725#0/1 -TCATCCAGATCTCAACTTTCTCTCATCTTCAAATTAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3710:1735#0/1 -GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3787:1636#0/1 -AGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3996:1694#0/1 -AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3942:1696#0/1 -TGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4104:1635#0/1 -AGGTCGGCGGTTCCATCACCACAACGCCGGACGACAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4243:1713#0/1 -AGATACTCCTTTGACCGAAGAAACCATAACGGAAGCC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4091:1714#0/1 -AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4229:1733#0/1 -GGGCCTGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4060:1745#0/1 -TGACTGTGAAACTGCGAATGGCTCATTAAATCAGTTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4308:1693#0/1 -TCCTCGGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4384:1697#0/1 -AAAATTTCATCGTGATGGGGATAGATCATTGCAATTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4506:1651#0/1 -GATAATAAAGAGATGGAGATTGTTTTGGAGCAACGGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4535:1654#0/1 -AAAAAGGGTAAAAGCTCGTTTGATTCTGATTTTCAGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4592:1677#0/1 -CTACCTGGTTAATCCTGCCAGTAGTCATATGCTTGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4726:1686#0/1 -TATGTTTTTGTTTGTTCGTAAATTCTTGACATCACTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4544:1694#0/1 -TGGAGCTCCGCTGAATTTTCTTTGCTCCATTTCCGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4604:1707#0/1 -TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4996:1665#0/1 -CTTATTTTACGAAGCTCCCCTCGGTTACAGCATTGAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4860:1668#0/1 -AGCGGCTGGAAGAGCACCGCACGTCGCGTGGTGTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4786:1671#0/1 -TATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4889:1691#0/1 -ATGGGGGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4843:1693#0/1 -AGGCAGTCCGATCCAACGGCTAGGCACTTACATGGCA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4977:1703#0/1 -ATTAGATAAAAGGTCGACGCGGGCTCTGCCCGTTGCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4791:1723#0/1 -TTGTGTTGGCCTTCGGGATCGGAGTAATGATTAACAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5019:1686#0/1 -ACAATGGAGCAACCACTCCTTCCGGCCTCGTCGCCGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5134:1686#0/1 -TCTGCTGTTGTAGAGCCTTACAACAGTGTGCTTTCAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5307:1664#0/1 -ACGTGCGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5278:1674#0/1 -GCATCAAAATCCTCCGACGATGACAACCATAGCTGCA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5486:1715#0/1 -TTGTGGTGGCGACGCATCATTCAAATTTCTGCCCTAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5541:1641#0/1 -TTCTCCCCGAAATGCGTTGAGGCGCAGCAGTTGACTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5706:1646#0/1 -ACGAGGATCCATTGGAGGGCAAGTCTGGTGCCTGCAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5593:1651#0/1 -AAGGGGCTTCTTGTCATTGATGATGAGCTAGCCACCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5503:1695#0/1 -GAGAGGGAACTAATCATAAGAGATGCAATGAGTGTGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5581:1706#0/1 -ATAAAAACCAGAAGAATCCGTATAAATTATCCTAACA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5713:1729#0/1 -TCTCTTGGAGAGTTCGATCCTGGCTCAGGATGAACGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5831:1656#0/1 -TTCACGTGGCTCAAGTCACTAGCAATGCTCTTGCTTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5942:1680#0/1 -TCCGGTGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5772:1711#0/1 -AGCGCGAACTTCGAAAGGGGATCTGGTTAAAATTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5957:1723#0/1 -TTACAATAAGTCTGCCTATAGTGGGAGAGGTGACAGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5981:1739#0/1 -GAGAAATTTGGAGTTTCGCCGGAATCTTCCTCTGTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6231:1644#0/1 -AATTGGAAAAGGTGGAGGAGTTGGCGGTGGCATCGGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6152:1724#0/1 -TGCAATTCTACCACGACCTCATCGACGAGCTCATAAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6100:1731#0/1 -TCCTCTGCGTTTTCACGAAACAGAGAACCTTATCGTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6174:1733#0/1 -TCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6310:1648#0/1 -AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6287:1659#0/1 -TCAACGAGTTCACACCTTGGCCGACAGGCCCGGGTAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6332:1666#0/1 -TTTGGGTTCTCTCCTTATAGTTTGATGAACATTGTTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6295:1704#0/1 -AGATAATCGGAACCTTCGTCCTTGTCTACACGGTCTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6590:1655#0/1 -AGAAGCCACCTCCGGTTCCGGTTTACAAGCCCCCGCC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6585:1691#0/1 -CTTTTCCCAGAGAAGAAGCAATGACGGTATCTGGGGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6722:1719#0/1 -AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6820:1642#0/1 -AAGCATTTGCCAAGGATGTTTTCATTAATCAAGAACG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:7000:1662#0/1 -GGGAAGCGTTGAGGCTTCACCCTCCACTCATCGTGTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6873:1673#0/1 -AGCACTTTAGGATGGCATAGCCTTAAAGTTAAGGGTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6770:1699#0/1 -AACTGTCTACAAAATCATTGAAGATCTACCCAAAAAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6782:1735#0/1 -GATCCGTCAAATTCAATTGATCCTCTCTCCAAATCAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:7159:1658#0/1 -ACCCACCTCCGGTTCCGGTTTACAAGCCACCGCCAAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:7183:1660#0/1 -ATTTCATCGTGATGGGGATAGATCATTGCAATTGTTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:7167:1679#0/1 -TTCTGCCTCCGGCGCTGTTACTTTGAAGAAATTAGAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:7223:1688#0/1 -TTGGACGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:7410:1647#0/1 -ACCATGCAGTATCGGTTTATATAACATCCACATTGTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:7387:1710#0/1 -TCTTCGAGTTTTGCTACTTGTATGGGATGATAAGACT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:7397:1741#0/1 -TTTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:7624:1680#0/1 -AATTGTTGTTGTTATCTTCAACGATAGCGTTGTGCTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:7600:1744#0/1 -AGTTGTTGCAGTTAAAAAGCTCGTAGTTGAACCTTGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:7789:1676#0/1 -ACCCGTCTGATAGCGCTTAAGCGCGAACTTCGAAAGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:7950:1684#0/1 -TGAACGAGACCTCAGCCTGCTAACTAGCTACGTGGAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:7926:1706#0/1 -ACGGTATCTGGGGAATAAGCATCGGCTAACTCTGTGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:7757:1711#0/1 -TTGCCCTTAGATGTTCTGGGCCGCACGCGCGCTACAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:7970:1747#0/1 -TCGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8054:1672#0/1 -CGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGACG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8245:1672#0/1 -GATATATAATCATAGATTCCAGAATTTGACATTTTCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8088:1710#0/1 -GAAAGTTGGGGGCTCGAAGACGATCAGATACCGTCCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8244:1713#0/1 -GGGACGCATTTATTAGATAAAAGGTCGACGCGGGCTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8069:1727#0/1 -TCGTCTACACCGTCTACGCCACAGCCGTTGACCCCAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8018:1745#0/1 -TTCTCTACTGGATTGCTCAGCTTCTTGGCTCCGTCGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8394:1647#0/1 -CTACAAGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8475:1658#0/1 -ACACTTGATCGGAATAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8367:1664#0/1 -GGGTGAGTAAACCCGTAAGGCGCAAGGAAGCTGATTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8488:1695#0/1 -GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8442:1727#0/1 -CGGTCGGCGATACGCTCCTGGTCTTAATTGGCCGGGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8328:1744#0/1 -TTGTCATTCTCTTCGCCGGAATCTAATCTCTCTCGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8520:1660#0/1 -TTGGCCGTTGTTTCCTTCTTCTTCTTCTTCTTCTTTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8563:1665#0/1 -AATTCAAATTTCTGCCCTATCAACTTTCGATGGTAGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8739:1667#0/1 -TCTCTCGGGAGCTTTCCCCGGGCGTCGAACAGTCAGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8596:1682#0/1 -TAGCCCACGAATGCGGTCACAACGCATTCAGCGACTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8643:1688#0/1 -TTGCACGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8727:1745#0/1 -AGGGAGAGCTAATGCTTCTTGGGTATTTAGGTTTGAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8767:1649#0/1 -TCGGTACCAAATCGAGGCAAACTCTGAATACTAGATA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8932:1656#0/1 -GCTGGCGACGCATCATTCAAATTTCTGCCCTATCAAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8841:1666#0/1 -TCAAACGAGGAAAGGCTTACGGTGGATACCTAGGCAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8951:1673#0/1 -TGGTGGAGCTGGAGGAGGATTTGGTGGAGGAGCTGGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8876:1674#0/1 -AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:8993:1737#0/1 -TTACCGTGCGCTGGATTATGACTGAACGCCTCTAAGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:9241:1664#0/1 -AAGTCACTCTTTCTTGTTGCCTTACTTGTCGGCTCTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:9095:1710#0/1 -AGAACGAAAGTTGGGGGCTCGAAGACGATAAGATACC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:9231:1716#0/1 -TCGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:9171:1733#0/1 -AAAATTTCTGCCCTATCAACTTTCGATGGTAGGATAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:9471:1658#0/1 -GACACATACACACATAGCCATGGCCTCTTCTTTCTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:9383:1680#0/1 -TTCAGTGTTGATTCGTCTTCCTTCACGCCGTTCTTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:9627:1730#0/1 -ATTGTTTGTAAAGGGTGAGAGATTATTTTTCAGTGAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:9921:1680#0/1 -ATGGCTTCTCCGGTGAGATACCTGACTCGATTGGTTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:9931:1727#0/1 -AAAGCATCGGCTAACTCTGTGCCAGCAGCCGCGGTAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:10235:1668#0/1 -AAAAATCTTGAGTAAAAACAAATTTTCCTGTATCTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:10040:1686#0/1 -AAAACCCGTCTGATAGCGCTTAAGCGCGAACTTCGAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:10169:1689#0/1 -ACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:10213:1743#0/1 -GGCGCTTAAGCGCGCGACCTATACCCGGCCGTCGGGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:10021:1744#0/1 -CAAAGCAAGCCTACGCTCTGGATACATTAGCATGGGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:10294:1712#0/1 -TTCTCTCTCTAACATTCTTCAGAGAGGGAGACTTTAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:10392:1736#0/1 -TAGAGTGCTCAAAGCAAGCCTACGCTCTGGATACATT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:10582:1655#0/1 -ACGGTACTGGACAATGTGGAAGCTTCCCTTGTTCGGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:10777:1673#0/1 -CACTCTCACCCACAAGTTAGTCATAAAAAAAAAAAAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:10980:1714#0/1 -GTGGAGAGACTGAAAACCGCGAAGAGGATGTGAATGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:10931:1720#0/1 -TTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:11171:1717#0/1 -TGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:11458:1668#0/1 -GATCACAAGTTTTAAGCAGTATTTGTAAGAAAATGGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:11388:1696#0/1 -TTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:11317:1701#0/1 -ACAACTCAAGCTTCCATCAACTTGACCCACACCGAAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:11403:1706#0/1 -TAGATAACATCAAGACAACAACCGTCGGTCCCGGAAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:11291:1739#0/1 -GAAAATCAAATCTTTTCATTTACAATTATCTTTCTTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:11593:1673#0/1 -TCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:11537:1726#0/1 -CCATCTCCGTATTGTCTTCTACGTAGACAATGTGCCC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:11709:1733#0/1 -CGAGGCGCTGTCTACGAGTCGGGTTGTTTGGGAATGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:11851:1657#0/1 -TAATCAAGATCGAAAGTTGGGGGCTCGAAGACGATCA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:11822:1687#0/1 -TTGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:11950:1691#0/1 -TTATGTTTACAGCTCTCCTCCTCCTCCGGTGAAGTCC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:11752:1704#0/1 -TGAACGGCACTTGCACATGGGTTAGTCGATCCTAAGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:11895:1708#0/1 -TTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:11756:1739#0/1 -TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:12136:1709#0/1 -ATGCGAAAAGTGTAAAGGTGGGAAGATCGGAATTGCA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:12070:1715#0/1 -ACGAGCAGCTCCCAACCACAGACTACTGAATTAATCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:12204:1724#0/1 -GAAAATTTTCCCCATTAAACAAAAAAAAATCAAATCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:12140:1746#0/1 -ACTACTCTCTCCACTAAACAAAAACACTAGAGTTAAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:12332:1664#0/1 -GCGAAAGCATTTGCCAAGGATGCTTTCATTAATCAAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:12455:1665#0/1 -ACAAGTCTTGTCTCTCGAGTGTTCTTCAAATGTTAGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:12252:1728#0/1 -GGAACTCCATGCGAATATGAAGCGCATGGATACAAGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:12315:1747#0/1 -TGGACCGGTAATTTCATTACATCGCCGGACGGCCGGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:12640:1665#0/1 -TCTCTTCTACAGTAAACAAAAAATGGCAATGAATGGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:12666:1668#0/1 -TTATTCTGCACTTGGAAGAAGAACTAGAAAAAGGAAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:12735:1728#0/1 -ATCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:12751:1677#0/1 -TCCTCCTGTTTACAAGTCCCCACCACCACCGGTTAAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:12821:1679#0/1 -TCTTCTCCGGTGATTTACTTACTTAACAATCATGGCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:12935:1691#0/1 -ACTGCCTTCGGATTTGATTTGGTTCGTGGCACCAAGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:12996:1719#0/1 -TAATCTCTCTGTTTAATCTTATGATCTGCTGTTTTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:13223:1661#0/1 -GTTGATGTGTTTATTCCCAGAGATCGAAGGACTGGTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:13442:1665#0/1 -TTTAACAGCCTGCCCACCCTGGAAACGGCTCAGCCGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:13331:1684#0/1 -GGGGGTCGCAGTGACCAGGCCCGGGCGACTGTTTACC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:13451:1685#0/1 -TGCTCAAATTTCTTCTTTAGAAGATTAAAAATCTTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:13309:1690#0/1 -TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:13312:1742#0/1 -AAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:13704:1696#0/1 -GACTCGGAGCAGAGATTTAGGGTCTGTAATTTGTATA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:13563:1703#0/1 -TATAGTCAGAGGTGAAATTCTTGGATTTATGAAAGAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:13735:1728#0/1 -AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:13899:1667#0/1 -CGTTATTTTACTTACTCCGTGAATCGGAGGCGGGGTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:13836:1707#0/1 -AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:13886:1718#0/1 -GAACAGAAAATAGAAAGAAGTAACAAAACCAAAGCAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:13866:1724#0/1 -TCAATGAGCCATGGGTTTTCTCGCACGCTGGCTATGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:13943:1728#0/1 -TCGAAGACGATCAGATACCGTCCTAGTCTCAACCATA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:14097:1712#0/1 -CCCTCGACCTATTCTCAAACTTTAAATAGGTAGGACG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:14116:1729#0/1 -GTTCTCACGTTCGAAGACCATTTCTTGTTCTTTGGAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:14699:1669#0/1 -AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:14594:1718#0/1 -AATTTCCTCATGTTGAGAGGTACTTCTGGACTGTGGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:14743:1729#0/1 -AACTCACCTGCCGAATCAACTAGCCCCGAAAATGGAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:14510:1734#0/1 -TTCACCCACCGCAGCTTCACCACCGGCACCTCCGACG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:14821:1705#0/1 -AACTGGCTAACACGTATTGGGTTGAGTATCTCTCTAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:14892:1717#0/1 -CTTTCGGATATTCCACCAGTCTCTCGCAATCTTCGCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:14835:1743#0/1 -AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:15239:1702#0/1 -TCTGACGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:15059:1717#0/1 -TGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:15152:1718#0/1 -GGGCAGGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:15444:1702#0/1 -GATATCTATTTATTCAATAACCCTTACAACACCGAAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:15407:1747#0/1 -TCGGATACGGGTCGGGCTTCTTAGCTGTTTGAAGAAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:15681:1670#0/1 -AATTGCCACAACATGGGCGTTCTTCTTAGCAAGAATT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:15630:1695#0/1 -TTATTAGATAAAAGGTCGACGCGGGCTCTGCCCGGTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:15506:1715#0/1 -TTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:15976:1688#0/1 -ATCATCCATCACATCTCCTCTTGCAAACCAACGTAAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:15940:1745#0/1 -AACAAGGTAGCCGTACTGGAAGGTGCGGCTGGATCAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:16043:1680#0/1 -ACGTATGTGGCAAAATACGGGGATGACTTGTGGCTAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:16175:1683#0/1 -ATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:16074:1695#0/1 -AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:16238:1700#0/1 -AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:16229:1727#0/1 -TCGAAGACGATCAGATACCGTCCTAGTCTCAACCATA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:16025:1727#0/1 -CCGGGAAGGACGCACCTCTGGTGTACCAGTTATCGTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:16429:1678#0/1 -ACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:16255:1695#0/1 -CTAAGAAAGTTGATCCACCGCCGGTGCCAGTCCACAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:16494:1739#0/1 -AGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:16656:1687#0/1 -TTAAATCAGTTATAGTTTGTTTGATGGTAACTACTAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:16685:1715#0/1 -TAAAACGTCGTGGACTTTTTGAGTCTGACGCTGCATT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:16885:1690#0/1 -CTCAGGATCGGAAGAGCACACGTCTGAACTCCAGTCA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:16994:1709#0/1 -ATAACCGTAGTAATTCTAGAGCTAATACGTGCAACAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:16768:1715#0/1 -TCGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:16899:1742#0/1 -GGTAAACTTCGAGCCTGTCCGACCCAGAAGGCACAAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:17201:1693#0/1 -TCTTTGGATAACAATATCCCAAACTGAAAATGGCTAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:17179:1704#0/1 -TTAAGTCCTCCGCTGCCTTCCCAGCCACCCGCAAGGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:17158:1711#0/1 -ACTAATGTAAAGGAAGCCTGTGCTTGGCTTGGATATA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:17051:1725#0/1 -ATAACGGTCCTAAGGTAGCGAAATTCCTTGTCGGGTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:17448:1685#0/1 -ACAATGACTGGGCCTCCACCTGCCATGGTGATGCCTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:17551:1710#0/1 -GGTTTGTCTTCAAAATCGTCAGAAGAAGAAGAAGAAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:17662:1746#0/1 -GCAACACGGGGAAACTTACCAGGTCCAGACATAGTAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:17823:1706#0/1 -TCCCCCGGTTGGATTGAAGGGTTGAAAAAATTAGACA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:17951:1723#0/1 -TGATCGGAGGATGTTGCGACGGAGCCGTCCTTTGACC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:18174:1676#0/1 -ATTAAATCCTAAAATCCATTATTGATTGAATCTTCGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:18241:1694#0/1 -CCAAATCTAATAAATCTAAGTGTAGTTTTCGGTGTAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:18095:1727#0/1 -ACAAGCAACGGCGGAAGAGTTAACTGCATGCAGGTGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:18074:1746#0/1 -ACCACCAAATCGCCGCCATGTTTAAACAAGCTTCTCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:18318:1679#0/1 -GAAAGCATTTGCCAAGGATGTTTTCATTAATCAAGAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:18410:1680#0/1 -GCAAGCCTACGCTCTGGATACATTAGCATGGGATAAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:18290:1681#0/1 -TCAGGCCTTGGAGGACCGAACCCACGTATGTGGCAAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:18280:1722#0/1 -GAGTTCGATCCTGGCTCAGGATGAACGCTGGCGGCAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:18429:1742#0/1 -TAGACTACGGATGGGACACCGCCGGACTTTCAGCTGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:18675:1682#0/1 -TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:18535:1684#0/1 -CCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:18657:1722#0/1 -GAGAAGATAAAGAGATAAAGGAAAGACTAACGTTAAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:18618:1729#0/1 -TGAACACACACAAACACACACACACACAGCCTTTTTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:18823:1701#0/1 -ACCTGACCCAGATAGCGAGAAGTTTCATGGATAAGGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:19063:1699#0/1 -ACGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:19104:1705#0/1 -AAAAACGACTCTCGGCAACGGATATCTCGGCTCTCGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:19019:1718#0/1 -AGACGATCAGATACCGTCCTGGTCTCAACCATAAACG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:19433:1728#0/1 -AAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:19355:1731#0/1 -AATCAAGAACGAAAGTTGGGGGCTCGAAGACGATCAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:19500:1683#0/1 -TTGTAACACGGACCAAGGAGTCTGACATGTGTGCGAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:19657:1698#0/1 -GGGATTGGCTTTGGGCTTTTCCTGCGCAGCTTAGGTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:19640:1749#0/1 -TTTAATTAATAACAATAAATGTTCTTTTTCAGTTTTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:19866:1690#0/1 -AAGCCCGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:19966:1707#0/1 -AAAATGCCAAGACGGACGATCAGGATACGAGGCTTAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:20090:1682#0/1 -CAGGTCCCCGAGTGGCTCACACGATATGCTTCACGTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:20183:1689#0/1 -GATATCGAGCGATGGATTTGGCAGAACTGTGGGCGAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:20206:1747#0/1 -TCGAAAGTTGGGGGCTCGAAGACGATCAGATGCCGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:20292:1695#0/1 -AGGAACGGAGACGGCAGGAACGATGAGTTCTATAGTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:20306:1713#0/1 -CCGATGGTTGTGGACAACATGTATTATAAGAACATCA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:20402:1713#0/1 -TTACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:20275:1726#0/1 -AACTGCGAATGGCTCATTAAATCAGTTATAGTTTGTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:20403:1737#0/1 -AAATAAGAAGACATATTTATCAACTTGATCAACTTGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:20539:1702#0/1 -CCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCGTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:20612:1731#0/1 -AGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:20566:1745#0/1 -AGTTCGGTCTCTCTGCCGGAGTCGGATCATTAAACGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:20889:1691#0/1 -CTTTCATTTCAGAGTCTTGGTGTTGTTTATGGTGATT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:20841:1709#0/1 -TTTGCTGGCATAATGGGAATGGGTTTTCCACCTTACA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:20980:1742#0/1 -TCCACTACTTTTAACGTTATTTTACTTACTCCGTGAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:21184:1708#0/1 -CGATCAGATACCGTCCTAGTCTCAACCATAAACGATG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:21173:1745#0/1 -TCGGTTGCTAATGGTTTGATCAATTTCCTCAACATAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:21427:1722#0/1 -CGTTAACGAACGAGACCTCAGCCTGCTAACTAGCTAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1449:1766#0/1 -TCAACTGCGAAAGCATTTGCCAAGGATGTTTTCATTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1303:1793#0/1 -TAACCATAAACGATGCCGACCAGGGATCAGCGGATGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1430:1865#0/1 -TCTTCATCCTCGTCTACTGCACCGCCGGTATCTCTGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1450:1892#0/1 -AGCAACTGCCAAAGCACCCGCAACAAAATTATAAAGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1342:1935#0/1 -ATTGATGATGTCTTTACTTCTTCAAGAGGATCTACCA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1406:1941#0/1 -AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1676:1764#0/1 -TGGTGAGACAAAAACGACGATATTTTCCTCTTCCTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1512:1774#0/1 -AAAAACAATAAACGAAAACTGAGAGAGAGATTGAAAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1677:1808#0/1 -ATTGGATCACTTTCAATGAGCCATGGGTTTTCTCGCA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1579:1824#0/1 -TTTTGTGTTTGTCCACCACCTCCTCCATGTTATTCTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1744:1840#0/1 -GGGTGAGAGCCCCGTTGTGCCCGGACCCTGTCGCACC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1579:1852#0/1 -AGAAGAAGAAGAAGAAAAAGTGGAGCAAGCTTCGTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1550:1869#0/1 -AACTCTGTGCCAGCAGCCGCGGTAATACAGAGGATGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1565:1898#0/1 -AGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1738:1918#0/1 -ACATTCCTCACCAAACCCTCTCCAAAACACACCCACA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1547:1922#0/1 -TCGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1732:1964#0/1 -TCTCGCGCTTGTACGGCTTTGGCTCGGATTCGTCCGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1654:1991#0/1 -GATTATATAAGGGAATGTTCAGTTCCAGTGACTGAGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1518:1992#0/1 -GGTGGAGGTTTACAAACACCACCAATCTTCTCTCTTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1865:1796#0/1 -GATTCATCCCAAAACATTACAAAACGTTACAATGGCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1886:1815#0/1 -GGGCGTAAAGCGTCTGTAGGTGGCTTTTTAAGTCCGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1855:1840#0/1 -TGGTATCGGTGGACTCGGCGGTGCAGGTGGGCTAGGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1960:1855#0/1 -ACCTCACTGCCGCCGATTAGACTCCGCCGGATGCAAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1808:1879#0/1 -TTACCATAAACGATGCCGACCAGGGATCAGCGGATGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1869:1890#0/1 -TGCTTCTTGGGTATTTAGGTTTGACACAGCTTCACAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1898:1925#0/1 -TCCCATGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:1765:1950#0/1 -AACTCATATCAACCTTCGCCGGAAATAATGGCTTTCC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2096:1773#0/1 -ATAGCACCGATAACAATCTACTCTCTTTAAAAGAAAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2186:1774#0/1 -TTGAGAAAATAAACGAGGAGGTGGCTCATCCTGAGAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2019:1774#0/1 -AAGACGATCAGATACCGTCCTAGTCTCAACCATAAAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2236:1797#0/1 -GTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2218:1825#0/1 -ACCTATCTCACTCTAAAATCTCTCTCTGCCAATCTCA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2067:1830#0/1 -ACATGTCTGTGAAGATGCGGACTACCTGCACCTGGAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2183:1833#0/1 -AGGGCGCGAGCCCGGGCGGAGCGGCCGTCGGTGCAGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2095:1843#0/1 -GACACAACACACCTCAAGCTTTATAACTTCTAAAACA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2184:1923#0/1 -AAGAGTCGGGGGAAACCCGTCTGATAGCGCTTAAGCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2086:1933#0/1 -AAAAACGACTCTCGGCAACGGATATCTCGGCTCTCGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2134:1944#0/1 -AGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2220:1966#0/1 -AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2066:1971#0/1 -TGAAGCATTTTCCAAAGAGAAAGAGAGAGAAATGGGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2239:1991#0/1 -ATTATCCATTGGAGGGCAAGTCTGGTGCCAGCAGCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2113:1996#0/1 -TTTGGTGTGCATTGGTCGGCTTGTCCCTTCGGTCGGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2369:1755#0/1 -TGATCCTTTCGTCTTTTTCTGACTCTTCAATCTCTCC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2281:1791#0/1 -TTAATTCCGGAACCGGGACGTGGCGGTTGACGGCAAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2431:1837#0/1 -AAAATTTCATCGTGATGGGGATAGATCATTGCAATTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2272:1843#0/1 -GGGGATCCGGTTAAAATTCCGGAACCGGGACGTGGCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2316:1843#0/1 -TGGCTTGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2451:1850#0/1 -TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2264:1872#0/1 -ATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2443:1872#0/1 -ACAGGTCTGTGATGCCCTTAGATGTTCTGGGTCGCAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2297:1890#0/1 -ATAACATCATAGGATTTCGATCCTATTGTGTTGGCCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2472:1986#0/1 -ACTACCACTCTCCACCTCCTCCGGCGAAGTCCCCACC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2564:1768#0/1 -GGGGCCTGATAGGCGGTGGTTTACCCTGTGGCGGATG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2674:1786#0/1 -TAATAACAGGTCTGTGATGCCCTTAGATGTTCTGGGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2648:1802#0/1 -GCGATAATACAGAGGATGCAAGCGTTATCCGGAATGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2691:1831#0/1 -CAGTATAATCAATCAGAAAACAAGTAGAAACTTTAAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2560:1839#0/1 -TCGAAGACGATCAGATACCGTCCTAGTCTCGACCATA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2612:1876#0/1 -TCTTGGGCCTTTGCCACCAACTTTGTTCCCGGAAAGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2503:1919#0/1 -TCAGACGAGGAAAGGCTTACGGTGGATACCTAGGCAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2640:1953#0/1 -CACCGGGAGAAAATCCTCCTCCGCTGTCGTGAGAGCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2553:1969#0/1 -GATCATCACCTTCCCCACTAATACTCTATAGTTTGTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2669:1987#0/1 -GGATCGTGAGACTCCGATGGTTGTGGACAACATGTAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2782:1800#0/1 -GACATTTTCTGTTTCCGTCTACAAGAACCACTTTGTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2957:1811#0/1 -ATTAATTCACATTTAAACACTTCTCTGCATATATTTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2990:1818#0/1 -TATTTTACTTACTCCGTGAATCGGAGGCGGGGTACAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2806:1852#0/1 -TGAAAATAAGCGTAGATCCGGAGATTCCCGAATAGGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2899:1856#0/1 -ACTGATGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2903:1882#0/1 -AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2913:1931#0/1 -TTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2816:1932#0/1 -GCATCAGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2795:1958#0/1 -TCTTTCTTGATTCTATGGGTGGTGGTGCATGGCCGTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:2993:1984#0/1 -GACGATCAGATACCGTCCTAGTCTCAACCATAAACGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3163:1754#0/1 -AGGCAAGAGACAACCTGGCGAACTGAAACATCTTAGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3082:1793#0/1 -AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3054:1825#0/1 -TAGATGTTGAGACTCTAATCCCTAACCACAATGACTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3100:1835#0/1 -CTATCTTAAAACTTCTTACCTAAAATAGAAATTTGAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3199:1865#0/1 -TTTCTTAAAAATTGGATTTTGTGTTGGGTTTTTCTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3032:1907#0/1 -AGAGAGAAGAAGTAGGCAGACAAAGAAGAAGAAGAAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3232:1919#0/1 -ATCACAAGCAAACAAGAGAAAACATTTTATTGTTATT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3075:1937#0/1 -ACAGACTGTGAAACTGCGAATGGCTCATTAAATCAGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3005:1941#0/1 -TTTTAAGTCCGCCGTCAAATCCCAGGGCTCAACCCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3026:1990#0/1 -AAGAAAGAGATTCCCCCGCCGGTTCCGGTTTACGATC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3131:1992#0/1 -GGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3319:1777#0/1 -GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3337:1789#0/1 -GACGCGGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3416:1846#0/1 -AACAGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3292:1846#0/1 -AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3491:1888#0/1 -ATCTTTTCGAAGAGCGGAAGCTAAGGAGAGGCGAATT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3266:1903#0/1 -TATAGAAATGGCCAAAGACGTGGAAGGACCTGAGGGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3339:1904#0/1 -GTGAAGACACCAGAGACGCCTAGTTTGGTGGGAAAGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3435:1918#0/1 -AGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3478:1938#0/1 -CCGGAGAATAATATCCGATCTGCTAGTGCGGTTAATA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3381:1938#0/1 -ATCAGCGACTCTCCCACTCGCTCGTGTCGTCGAAGTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3311:1972#0/1 -ACATTCATCCCAAAACATTACAAAACGTTACAATGGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3471:1996#0/1 -CCCATGTCCACCTAAATACAGTCCTCCTGTGGAGGTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3408:1998#0/1 -AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3714:1760#0/1 -TGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3734:1790#0/1 -ATACAGCAGCTGAAGCTGTGTATCGGAAAGCTCAATT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3699:1795#0/1 -ACAAAAGACAAGTTTTTAAACTGCAGAACCGCATTTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3651:1798#0/1 -TCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3557:1799#0/1 -TGATAGTGCTTAAGCGCGAACTTCGAAAGGGGATCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3531:1811#0/1 -TACAGACACACACGTGGCTCATCACCTGTTCTCGACA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3526:1856#0/1 -TCTTTGAAGAAATTAGAGTGCTCAAAGCAAGCCTACG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3693:1860#0/1 -AAAACAACTCTGCTTCACTCTCTATCTTTCTTAAGTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3616:1916#0/1 -AGCTTGAAAATCGGACGTCTTCGGCGTTCGAATTGTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3599:1930#0/1 -AGACGATCAGATACCGTCCTAGTCTCAACCATAAACG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3620:1956#0/1 -GAATCTCTCTGTGTTTTTTCTATCTCTCTCTTTCTTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3596:1973#0/1 -TGTAAGAGCTAGGCAGCAGGGATTATGTGTACGCAAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3721:1974#0/1 -AAGCCTGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3507:1998#0/1 -ACAAGGTGCTGGTGCTGGAGGAGGATATGGAGGTGGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3756:1769#0/1 -ACGCCCTCGACCTATTCTCAAACTTTAAATAGGTAGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3901:1846#0/1 -TACAAGTCTCCTCCGCCACCAACTCCGACATATGTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3892:1871#0/1 -ATCACCGCCTCCATATTCTTTCGCGTCTTTTGCTTCC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3765:1925#0/1 -AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3842:1953#0/1 -TCGAGCATTTTTGACGCCAAGGCTGGAATTGCATTCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3758:1970#0/1 -AACCCCGACTTATGGAAGGGACGCATTTATTAGATAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3931:1972#0/1 -CTCCGTGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:3828:1991#0/1 -CCTTCGGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4152:1771#0/1 -TCCTGAGATTTTTTATATATTTTCTCCAGATCTGCTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4212:1780#0/1 -GACTCAACCAATTTCTTCTCAGGTAATACTCGTAAGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4130:1806#0/1 -TTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4183:1813#0/1 -AAGGCACGTGTCGTTGGCTAAGTCCGTTCGGCGGAAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4010:1857#0/1 -ACTCTGCTTCACTCTCTATCTTTCTTAAGTAAACAAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4206:1859#0/1 -TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4051:1895#0/1 -TCAAGCGTTATCCGGAATGATTGGGCGTAAAGCGTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4226:1939#0/1 -TGATCATCACTTTCACAATCTTCTTCATCGATTTCTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4381:1809#0/1 -GCAGCATCAGCAACAACAGAGCAGTCCTGGCTTTCTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4456:1814#0/1 -TTCATGGACGTTGATAAGATCTTTCCATTTAGCAGCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4311:1831#0/1 -AAGAGTTATCTTTTCTGTTTAACAGCCTGCCCACCCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4464:1844#0/1 -TACGCTTGGGCCTTCGTCGCCAAGACTTCTCAAGTTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4254:1850#0/1 -AAAAGGAGGTAGGGGTGCAGAGACAGCCAGGAGGTTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4286:1883#0/1 -ATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4465:1887#0/1 -AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4329:1895#0/1 -CCGTTCGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4379:1906#0/1 -ACTACTCTCTCCACTAAACAAAAACACTAGAGTTAAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4486:1907#0/1 -TAAGAGCCAAAGGTTGAGAATGTGACTCTTGGACCAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4456:1996#0/1 -GAGGATGCGAAAAGATACATCCGGCAACTTCCCAACT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4674:1782#0/1 -TTTATGACTGAACGCCTCTAAGTCAGAATCCGGGCTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4634:1791#0/1 -GAATCACTTCACTCTCTCTAATCAAAAAGCTTTTAAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4501:1806#0/1 -TCGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4629:1820#0/1 -TGATCGTTCTTATTGACCCTAGCCGCTACACACTTTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4590:1838#0/1 -ATTCATTTCAATCAATCTTCTTCTTCTTCTTCTTCTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4741:1847#0/1 -AACGACTCTCGGCAACGGATATCTCGGCTCTCGCATC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4708:1892#0/1 -TTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4508:1902#0/1 -TTAGCATGGGATAACATCATAGGATTTCGATCCTATT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4691:1903#0/1 -AGTGAAATACCACTACTTTTTACGTTATTTTACTTAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4648:1966#0/1 -AAATTTCAAAATCAGATCCAACAAATCTTCTTCTTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4963:1798#0/1 -AGACGTCGGCGGGGGCCTCGGAAAGAGTTATCTTTTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4766:1882#0/1 -CTGTCGCACCACGAGGCGCTGTCTACGAGTCGGGTTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4860:1888#0/1 -ATACTCGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4947:1899#0/1 -CATCAATACTCCTGTGAGAACAAAATGAAGCTTTCTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4982:1926#0/1 -AGACTCGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4879:1942#0/1 -TCGAGTCAGGTAATTGGAATGAGTACAATCTAAATCC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:4914:1944#0/1 -GTGCAAAGGTTTCCTCGGGCCGGACGGAGATTGGCCC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5091:1754#0/1 -TTTATGTTTCTCATTATTACTGCGGGAATTTCAATTA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5145:1756#0/1 -TATCGGTAGGGGAGCGTTCCGCCTTAGGGGGAAGCAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5015:1781#0/1 -TTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5166:1833#0/1 -TTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5035:1836#0/1 -TGGCGACGCATCATTCAAATTTCTGCCCTATCAACTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5195:1862#0/1 -TTCACCCATTTCTTGGCTTACAACAACAAATCTTAAA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5062:1871#0/1 -TTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5235:1878#0/1 -ACGGAAATGTCTTCATGGGTTCTTCCGAACATGTTCC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5231:1897#0/1 -GGAACGAACACACCACCTTCTCTCCAATCTGGATCTG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5168:1901#0/1 -ACTCACCTGCCGAATCAACTAGCCCCGAAAATGGATG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5220:1915#0/1 -CCACCTCCGGTTCCGGTTTACAAGCCCCCGCCAAAGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5013:1950#0/1 -ACTTTCAATGAGCCATGGGTTTTCTCGCACGCTGGCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5175:1985#0/1 -CGTGAAATACCACTACTTTTAACGTTATTTTACTTAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5145:1985#0/1 -ATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5453:1770#0/1 -CATCAAAACCCAGACGCTGTATGGTTATGGGGAATGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5412:1781#0/1 -ATGTCGGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5412:1816#0/1 -TAGGACTCCGCTGGCACCTTATGAGAAATCAAAGTTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5496:1822#0/1 -AAAAACCTAAAACAAAAAAAATCTCTTTCCTTCTTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5346:1823#0/1 -AAGCCACCACCAAAGGTGGAGCTTCCACCGCCTATTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5333:1831#0/1 -TCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5305:1833#0/1 -AGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5371:1867#0/1 -GTAGCGTCCTCAGCAACGGACCGGGCCTAAGTTCCCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5374:1893#0/1 -ATGTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5377:1943#0/1 -TGTTCGGGAAACATTGACGACAAAGGAAAGTTTGGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5313:1973#0/1 -TGTATACAAGTCTCCTCCGCCACCAACTCCGACATAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5620:1756#0/1 -AAAAACTTTCTCTCAATTCTCTCTACCGTGATCAAGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5716:1779#0/1 -TCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5667:1857#0/1 -GTATTCAACGAGTTCACACCTTGGCCGACAGGCCCGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5654:1887#0/1 -GTAACGGGTGACGGAGAATTAGGGTTCGATTCCGGAG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5636:1908#0/1 -TTCTCGCGCTTGTACGGCTTTGGCTCGGATTCGTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5715:1927#0/1 -CCGTCGGGGCAAGAGCCAGGCCTCGATGAGTAGGAGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5610:1936#0/1 -GAAGAAGCAATGACGGTATCTGGGGAATAAGCATCGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5832:1784#0/1 -GCCCGGATCGGAAGAGCACACGTCTGAACTCCAGTCA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5917:1794#0/1 -AGTCGCGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5769:1883#0/1 -AGCAAAGTTTTATGTAATCAAATCGTACAGTGAAGAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5796:1889#0/1 -TTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5992:1905#0/1 -ATGTGTGTATTAATTTATCTTCTTGTTTTAAAGAGAC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5885:1923#0/1 -AAGAAGAGCCAATGGCGATGAAGACATCACATGTTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5976:1937#0/1 -AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:5957:1978#0/1 -TTAGTTGAACCTTGGGATGGGTCGGCCGGTCCGCCTT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6197:1781#0/1 -ACTTAGGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6180:1789#0/1 -ATTTTGGCTGGTGCTGAATACGGTAGTGGAAGTTCTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6038:1789#0/1 -TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6177:1825#0/1 -TTGATCCCGAGACTGAAGCTATGACCAGGAGAATTGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6159:1830#0/1 -CGGGCCGATCGGAAGAGCACACGTCTGAACTCCAGTC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6151:1878#0/1 -ATACGGTGTATGAATCCGAATTACACAGAGTTCAAGT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6123:1878#0/1 -AGCGCTAACTTCGAAAGGGGATCCGGTTAAAATTCCG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6196:1879#0/1 -ATTTCTGCCCTATCAACTTTCGATGGTAGGATAGTGG ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6237:1895#0/1 -AAGATCCCAGACGAAATGGCTCAGAAAGTGGTGCTGA ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6023:1896#0/1 -ACAACCGCTGAATATTTGGCTTATGAATGTGGAAAGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6123:1904#0/1 -ATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGC ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6125:1922#0/1 -ATGGATCGTTGCACGTCGTCGTTTTTGCTTCCCACAT ->DGM97JN1_120925_0255_AD166MACXX:4:1101:6184:1926#0/1 -GATCATCAAAAAACACCTCAAAGAATTATTCATTCAG diff -r 11f0f526dca9 -r 1c325ff557d9 Iterative_mapping/seq_track.py --- a/Iterative_mapping/seq_track.py Tue Dec 09 03:02:49 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from read_file import * -from Bio import SeqIO - -unmap_file = sys.argv[1] -reads_file = sys.argv[2] -result_file = sys.argv[3] -tp = sys.argv[4] - - -unmap = read_t_file(unmap_file); - -h = file(result_file, 'w') - -reads = SeqIO.parse(reads_file,tp) -um = set() -for i in range(0, len(unmap)): - id_r = unmap[i][0] - um.add(id_r) - -for read in reads: - if read.id in um: - h.write('>') - h.write(read.id) - h.write('\n') - h.write(read.seq.tostring()) - h.write('\n') - - - -h.close() - - - - diff -r 11f0f526dca9 -r 1c325ff557d9 Iterative_mapping/truncate.py --- a/Iterative_mapping/truncate.py Tue Dec 09 03:02:49 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from Bio import SeqIO - -fasta_file = sys.argv[1] -shift_in = sys.argv[2] -result_file = sys.argv[3] -length = sys.argv[4] -t_end = sys.argv[5] - -shift = int(shift_in) - -fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); -h = file(result_file,'w') -for seq in fasta_sequences: - nuc = seq.id; - sequence = seq.seq.tostring(); - if (len(sequence)-shift)>=int(length): - h.write('>'+nuc) - h.write('\n') - if t_end == 'three_end': - h.write(sequence[0:(len(sequence)-shift)]) - if t_end == 'five_end': - h.write(sequence[(shift):(len(sequence))]) - h.write('\n') - - - - -h.close() - - - - diff -r 11f0f526dca9 -r 1c325ff557d9 Iterative_mapping/unmap.py --- a/Iterative_mapping/unmap.py Tue Dec 09 03:02:49 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from read_file import * -from Bio import SeqIO - -map_file = sys.argv[1] -result_file = sys.argv[2] - - -#reads = read_t_file(read_file); - -f = open(map_file); -h = file(result_file, 'w') - -for aline in f.readlines(): - tline = aline.strip(); - tl = tline.split('\t'); - if len(tl)>4: - if int(tl[1].strip()) != 0: - h.write(tl[0].strip()); - h.write('\n'); - - -f.close(); -h.close() - - - - diff -r 11f0f526dca9 -r 1c325ff557d9 get_reads/get_read.py --- a/get_reads/get_read.py Tue Dec 09 03:02:49 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from Bio import SeqIO -import os -from read_file import * -import random -import string - -fasta_file = sys.argv[1] -map_file = sys.argv[2] -result_file = sys.argv[3] - -syspathrs = os.getcwd() - -os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > "+syspathrs+"map_info.txt") - -fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); -length_seq = {}; -for seq in fasta_sequences: - nuc = seq.id; - length_seq[nuc] = len(seq.seq.tostring()); - - - -mapping = {} -transcripts = [] - -f = open(syspathrs+"map_info.txt"); -for aline in f.readlines(): - tline = aline.strip(); - tl = tline.split('\t'); - if tl[0].strip() not in transcripts: - transcripts.append(tl[0].strip()); - mapping[tl[0].strip()] = []; - - mapping[tl[0].strip()].append(tl[1].strip()); - -distribution = {}; -coverage = {}; -for transcript in length_seq: - distribution[transcript] = []; - for i in range(0, length_seq[transcript]): - distribution[transcript].append(0); - sum_count = float(0); - if transcript in mapping: - for j in range(0, len(mapping[transcript])): - index = mapping[transcript][j]; - #count = reads[mapping[transcript][j][0]]; - sum_count = sum_count + 1; - distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1; - coverage[transcript] = float(sum_count)/float(length_seq[transcript]); - else: - coverage[transcript] = 0 - - - - - -h = file(result_file, 'w') -for transcript in length_seq: - h.write(transcript); - h.write('\n') - for i in range(0, length_seq[transcript]): - h.write(str(distribution[transcript][i])) - h.write('\t') - h.write('\n') - h.write('\n') - -#os.system("rm -r "+syspathrs) - - - -f.close(); -h.close() - - - - diff -r 11f0f526dca9 -r 1c325ff557d9 get_reads/get_read.xml --- a/get_reads/get_read.xml Tue Dec 09 03:02:49 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,44 +0,0 @@ - - - get_read.py $lib_file $map_file $output - - biopython - numpy - samtools - - - - - - - - - - - - - - - - - - -**TIPS**: - ------ - -**Input** - -* 1. A mapped (bam) file from Bowtie (or any other mapping program) -* 2. Reference library sequences (fasta) used to map the reads to - ------ - -**Output**: - -A text file with reverse transcription stop counts mapped to each nucleotide (RTSC file) - - - - - diff -r 11f0f526dca9 -r 1c325ff557d9 get_reads/read_file.py --- a/get_reads/read_file.py Tue Dec 09 03:02:49 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys - - - -def read_t_file(in_file): - f = open(in_file); - result = []; - for aline in f.readlines(): - temp = []; - tline = aline.strip(); - tl = tline.split('\t'); - for i in range(0, len(tl)): - temp.append(tl[i].strip()); - result.append(temp); - f.close(); - return result; - - diff -r 11f0f526dca9 -r 1c325ff557d9 get_reads/read_file.pyc Binary file get_reads/read_file.pyc has changed diff -r 11f0f526dca9 -r 1c325ff557d9 get_reads/test.bam Binary file get_reads/test.bam has changed diff -r 11f0f526dca9 -r 1c325ff557d9 reactivity_cal/.DS_Store Binary file reactivity_cal/.DS_Store has changed diff -r 11f0f526dca9 -r 1c325ff557d9 reactivity_cal/._.DS_Store Binary file reactivity_cal/._.DS_Store has changed diff -r 11f0f526dca9 -r 1c325ff557d9 reactivity_cal/parse_dis_react.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/parse_dis_react.py Tue Dec 09 03:03:11 2014 -0500 @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sys + +def parse_dist(in_file): + result = [] + distribution = {} + name = [] + f = open(in_file) + flag = 0 + for aline in f.readlines(): + line = aline.strip() + dis = line.strip() + dist = dis.split('\t') + if len(dist) > 0: + if len(dist) == 1: + if dist[0].strip().find('coverage')==-1: + if flag == 0: + name.append(line) + flag = 1 + t_name = line + else: + distribution[t_name] = 'null' + name.append(line) + flag = 1 + t_name = line + else: + distri = [] + for i in range(0, len(dist)): + distri.append(dist[i].strip()) + distribution[t_name] = distri + flag = 0 + result.append(name) + result.append(distribution) + f.close() + return result + + + + + + + + + + + + + + + diff -r 11f0f526dca9 -r 1c325ff557d9 reactivity_cal/parse_dis_react.pyc Binary file reactivity_cal/parse_dis_react.pyc has changed diff -r 11f0f526dca9 -r 1c325ff557d9 reactivity_cal/rRNA.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/rRNA.txt Tue Dec 09 03:03:11 2014 -0500 @@ -0,0 +1,8 @@ +>25s rRNA 3375nts +GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTAAGCATATCAATAAGCGGAGGAAAAGAAACTAACAAGGATTCCCTTAGTAACGGCGAGCGAACCGGGAAGAGCCCAGCTTGAAAATCGGACGTCTTCGGCGTTCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGGAAAGGGGCGCCAGAGAGGGTGAGAGCCCGTCGTGCCCGGACCCTGTCGCACCACGAGGCGCTGTCTACGAGTCGGGTTGTTTGGGAATGCAGCCCCAATCGGGCGGTAAATTCCGTCCAAGGCTAAATACGGGCGAGAGACCGATAGCGAACAAGTACCGCGAGGTAAAGATGAAAAGGACTTTGAAAAGAGAGTCAAAGAGTGCTTGAAATTGTCGGGAGGGAAGCGGATGGGGGCCGGCGATGCGTCCTGGTCGGATGCGGAACGGAGCAATCCGGTCCGCCGATCGATTCGGGGCGTGGACCGACGCGGATTACGGTGGCGGCCTAAGCCCGGGCTTTTGATACGCTTGTGGAGACGTCGCTGCCGTGATCGTGGTCTGCAGCACGCGCCTAACGGCGTGCCTCGGCATCAGCGTGCTCCGGGCGTCGGCCTGTGGGCTCCCCATTCGACCCGTCTTGAAACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAACGGGTGAGTAAACCCGTAAGGCGCAAGGAAGCTGATTGGCGGGATCCTCGCGGGTGCACCGCCGACCGACCTTGATCTTCTGAGAAGGGTTCGAGTGTGAGCATGCCTGTCGGGACCCGAAAGATGGTGAACTATGCCTGAGCGGGGTAAAGCCAGAGGAAACTCTGGTGGAAGCCCGCAGCGATACTGACGTGCAAATCGTTCGTCTGACTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGAGCTCGGACGCGAGTTCTATCGGGTAAAGCCAATGATTAGAGGCATTGGGGGCGCAACGCCTCGACCTATTCTCAAACTTTAAATAGGTAGGACGTGTCGGCTGCTTTGTTGAGCCGTCACACGGAATCGAGAGCTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGGAAGCCGGGTTACGGTGCCCAACTGCGCGCTAACCTAGAACCCACAAAGGGTGTTGGTCGATTAAGACAGCAGGACGGTGGTCATGGAAGTCGAAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCCGAAAATGGATGGCGCTTAAGCGCGACCTATACCCGGCCGTCGGGGCAAGAGCCAGGCCTCGATGAGTAGGAGGGCGCGGCGGTCGCTGCAAAACCTAGGGCGCGAGGCGCGGAGCGGCCGTCGGTGCAGATCTTGGTGGTAGTAGCAAATATTCAAATGAGAACTTTGAAGGCCGAAGAGGGGAAAGGTTCCATGTGAACGGCACTTGCACATGGGTTAGTCGATCCTAAGAGTCGGGGGAAACCCGTCTGATAGCGCTTAAGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGACGTGGCGGTTGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCGGGGGCCTCGGGAAGAGTTATCTTTTCTGTTTAACAGCCTGCCCACCCTGGAAACGGCTCAGCCGGAGGTAGGGTCCAGCGGCTGGAAGAGCACCGCACGTCGCGTGGTGTCCGGTGCGCCCCCGGGCGCCCTTGAAAATCCGGAGGACCGAGTGCCGCTCACGCCCGGTCGTACTCATAACCGCATCAGGTCTCCAAGGTGAACAGCCTCTGGTCGATGGAACAATGTAGGCAAGGGAAGTCGGCAAAATGGATCCGTAACTTCGGGAAAAGGATTGGCTCTGAGGGCTGGGCTCGGGGGTCCCAGTTCCGAACCCGTCGGCTGTCAGCGGACTGCTCGAGCTGCTTCCGCGGCGAGAGCGGGTCGCCGGCTGCCGGCCGGGGGACGACTGGGAACGGCTCTCTCGGGAGCTTTCCCCGGGCGTCGAACAGTCAGCTCAGAACTGGTACGGACAAGGGGAATCCGACTGTTTAATTAAAACAAAGCATTGCGATGGTCCCTGCGGATGCTAACGCAATGTGATTTCTGCCCAGTGCTCTGAATGTCAAAGTGAAGAAATTCAACCAAGCGCGGGTAAACGGCGGGAGTAACTATGACTCTCTTAAGGTAGCCAAATGCCTCGTCATCTAATTAGTGACGCGCATGAATGGATTAACGAGATTCCCACTGTCCCTGTCTACTATCCAGCGAAACCACAGCCAAGGGAACGGGCTTGGCAGAATCAGCGGGGAAAGAAGACCCTGTTGAGCTTGACTCTAGTCCGACTTTGTGAAATGACTTGAGAGGTGTAGGATAAGTGGGAGCTTCGGCGCAAGTGAAATACCACTACTTTTAACGTTATTTTACTTACTCCGTGAATCGGAGGCCGGGGTACAACCCCTGTTTTTGGTCCCAAGGCTCGCTTCGGCGGGTCGATCCGGGCGGAGGACATTGTCAGGTGGGGAGTTTGGCTGGGGCGGCACATCTGTTAAAAGATAACGCAGGTGTCCTAAGATGAGCTCAACGAGAACAGAAATCTCGTGTGGAACAAAAGGGTAAAAGCTCGTTTGATTCTGATTTTCAGTACGAATACGAACCGTGAAAGCGTGGCCTATCGATCCTTTAGACTTCGGAATTTGAAGCTAGAGGTGTCAGAAAAGTTACCACAGGGATAACTGGCTTGTGGCAGCCAAGCGTTCATAGCGACGTTGCTTTTTGATCCTTCGATGTCGGCTCTTCCTATCATTGTGAAGCAGAATTCACCAAGTGTTGGATTGTTCACCCACCAATAGGGAACGTGAGCTGGGTTTAGACCGTCGTGAGACAGGTTAGTTTTACCCTACTGATGCCCGCGTCGCGATAGTAATTCAACCTAGTACGAGAGGAACCGTTGATTCGCACAATTGGTCATCGCGCTTGGTTGAAAAGCCAGTGGCGCGAAGCTACCGTGCGCTGGATTATGACTGAACGCCTCTAAGTCAGAATCCGGGCTAGAAGCGACGCATGCGCCCGCCGCCCGATTGCCGACCCTCAGTAGGAGCTTAGGCTCCAAAGGCACGTGTCGTTGGCTAAGTCCGTTCGGCGGAACGGTCGTTCGGACCGCCTTGAATTATAATTACCACCGAGCGGCGGGTAGAATCCTTTGCAGACGACTTAAATACGCGACGGGGTATTGTAAGTGGCAGAGTGGCCTTGCTGCCACGATCCACTGAGATTCAGCCCTTTGTCGCTAAGATTCGA +>gi|20197903:2706-4513 Arabidopsis thaliana chromosome 2 BAC F23H14 genomic sequence, complete sequence +TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCTCAAAGATTAAGCCATGCATGTGTAAGTATGAACGAATTCAGACTGTGAAACTGCGAATGGCTCATTAAATCAGTTATAGTTTGTTTGATGGTAACTACTACTCGGATAACCGTAGTAATTCTAGAGCTAATACGTGCAACAAACCCCGACTTATGGAAGGGACGCATTTATTAGATAAAAGGTCGACGCGGGCTCTGCCCGTTGCTCTGATGATTCATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATCAACTTTCGATGGTAGGATAGTGGCCTACCATGGTGGTAACGGGTGACGGAGAATTAGGGTTCGATTCCGGAGAGGGAGCCTGAGAAACGGCTACCACATCCAAGGAAGGCAGCAGGCGCGCAAATTACCCAATCCTGACACGGGGAGGTAGTGACAATAAATAACAATACTGGGCTCTTTCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCCCTTAACGAGGATCCATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAACCTTGGGATGGGTCGGCCGGTCCGCCTTTGGTGTGCATTGGTCGGCTTGTCCCTTCGGTCGGCGATACGCTCCTGGTCTTAATTGGCCGGGTCGTGCCTCCGGCGCTGTTACTTTGAAGAAATTAGAGTGCTCAAAGCAAGCCTACGCTCTGGATACATTAGCATGGGATAACATCATAGGATTTCGATCCTATTGTGTTGGCCTTCGGGATCGGAGTAATGATTAACAGGGACAGTCGGGGGCATTCGTATTTCATAGTCAGAGGTGAAATTCTTGGATTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGGATGTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTCCTAGTCTCAACCATAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTATAGGACTCCGCTGGCACCTTATGAGAAATCAAAGTTTTTGGGTTCCGGGGGGAGTATGGTCGCAAGGCTGAAACTTAAAGGAATTGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGGAAACTTACCAGGTCCAGACATAGTAAGGATTGACAGACTGAGAGCTCTTTCTTGATTCTATGGGTGGTGGTGCATGGCCGTTCTTAGTTGGTGGAGCGATTTGTCTGGTTAATTCCGTTAATGAACGAGACCTCAGCCTGCTAACTAGCTACGTGGAGGCATCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCGTTTAGGCCAAGGAAGTTTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGCGCGCTACACTGATGTATTCAACGAGTTCACACCTTGGCCGACAGGCCCGGGTAATCTTTGAAATTTCATCGTGATGGGGATAGATCATTGCAATTGTTGGTCTTCAACGAGGAATTCCTAGTAAGCGCGAGTCATCAGCTCGCGTTGACTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAATGATCCGGTGAAGTGTTCGGATCGCGGCGACGTGGGTGGTTCGCCGCCCGCGACGTCGCGAGAAGTCCACTAAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTG +>Arabidopsis thaliana 1 +GGATGCGATCATACCAGCACTAATGCACCGGATCCCATCAGAACTCCGCAGTTAAGCGTGCTTGGGCGAGAGTAGTACTAGGATGGGTGACCTCCTGGGAAGTCCTCGTGTTGCATCCCTC +>gi|186498419|ref|NR_022453.1| Arabidopsis thaliana (AT2G01020) rRNA +AAAACGACTCTCGGCAACGGATATCTCGGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGATACTTGGTGTGAATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCCAAGCCTTCTGGCCGAGGGCACGTCTGCCTGGGTGTCACAA \ No newline at end of file diff -r 11f0f526dca9 -r 1c325ff557d9 reactivity_cal/react_cal.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/react_cal.py Tue Dec 09 03:03:11 2014 -0500 @@ -0,0 +1,135 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sys +from Bio import SeqIO +import math +from parse_dis_react import * +from react_norm_function import * +import os +import random +import string + + +dist_file1 = sys.argv[1] #plus library +dist_file2 = sys.argv[2] #minus library +seq_file = sys.argv[3] #Reference library(genome/cDNA) +nt_spec = sys.argv[4] #only show reactivity for AC or ATCG +flag_in = sys.argv[5] # perform 2-8% normalization (1) or not (0) +threshold = sys.argv[6] #Threshold to cap the reactivities +output_file = sys.argv[7] + + +distri_p = parse_dist(dist_file1) +distri_m = parse_dist(dist_file2) +threshold = float(threshold) + + +syspathrs = os.getcwd() + +h = file(syspathrs+"react.txt",'w') +flag_in = int(flag_in) + +seqs = SeqIO.parse(open(seq_file),'fasta'); +nt_s = set() +for i in range(len(nt_spec)): + nt_s.add(nt_spec[i]) + +flag = 0 +trans = [] +distri_p = distri_p[1] +distri_m = distri_m[1] + +#thres = int(threshold) + + +transcripts = {} +for seq in seqs: + n = seq.id + trans.append(n) + transcripts[n] = seq.seq.tostring() + + +#print(distri_p) + + +for i in range(0, len(trans)): + h.write(trans[i]) + h.write('\n') + for j in range(len(distri_p[trans[i]])): + distri_p[trans[i]][j] = math.log((int(distri_p[trans[i]][j])+1),math.e) + for j in range(len(distri_m[trans[i]])): + distri_m[trans[i]][j] = math.log((int(distri_m[trans[i]][j])+1),math.e) + s_p = sum(distri_p[trans[i]]) + s_m = sum(distri_m[trans[i]]) + length = len(distri_p[trans[i]]) + if s_p!= 0 and s_m!= 0: + r = [] + for j in range(0, len(distri_p[trans[i]])): + f_p = (float(distri_p[trans[i]][j]))/float(s_p)*length + f_m = (float(distri_m[trans[i]][j]))/float(s_m)*length + raw_react = f_p-f_m + r.append(max(0, raw_react)) + + if s_p!= 0 and s_m!= 0: + for k in range(1,(len(r)-1)): + if transcripts[trans[i]][k-1] in nt_s: + h.write(str(r[k])) + h.write('\t') + else: + h.write('NA') + h.write('\t') + k = k+1 + if transcripts[trans[i]][k-1] in nt_s: + h.write(str(r[k])) + h.write('\n') + else: + h.write('NA') + h.write('\n') + + +h.close() + +if flag_in: + react_norm((syspathrs+"react.txt"),output_file, threshold) +else: + h_o = file(output_file, 'w') + f_i = open(syspathrs+"react.txt") + for aline in f_i.readlines(): + h_o.write(aline.strip()) + h_o.write('\n') +os.system("rm -f "+syspathrs+"react.txt") + +#os.system("rm -r "+syspathrs) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 11f0f526dca9 -r 1c325ff557d9 reactivity_cal/react_norm_function.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/react_norm_function.py Tue Dec 09 03:03:11 2014 -0500 @@ -0,0 +1,82 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import sys +from Bio import SeqIO +import math +from parse_dis_react import * + +def cap(a,value): + if a>=value: + return value + else: + return a + +def react_norm(react_file, result_file, capped_value): + print("Normalizing.....") + react1 = parse_dist(react_file) + react = react1[1] + h = file(result_file, 'w') + + capped = int(capped_value) + + all_react = [] + + + for t in react: + if react[t]!='null': + for i in range(len(react[t])): + if react[t][i]!='NA': + all_react.append(float(react[t][i])) + + + all_react.sort(reverse = True) + + + eight = all_react[int(len(all_react)*0.02):int(len(all_react)*0.1)] + meight = sum(eight)/len(eight) + + for t in react: + h.write(t) + h.write('\n') + if react[t]!='null': + for i in range((len(react[t])-1)): + if react[t][i]!='NA': + h.write(str(cap((float(react[t][i])/meight),capped))) + else: + h.write('NA') + h.write('\t') + if react[t][i+1]!='NA': + h.write(str(cap((float(react[t][i+1])/meight),capped))) + else: + h.write('NA') + h.write('\n') + + h.close() + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 11f0f526dca9 -r 1c325ff557d9 reactivity_cal/react_norm_function.pyc Binary file reactivity_cal/react_norm_function.pyc has changed diff -r 11f0f526dca9 -r 1c325ff557d9 reactivity_cal/reactivity_calculation.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/reactivity_calculation.xml Tue Dec 09 03:03:11 2014 -0500 @@ -0,0 +1,60 @@ + + + react_cal.py $dist_file1 $dist_file2 $seq_file $nt_spec $flag_in $threshold $output + + biopython + numpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**TIPS**: + +----- + +**Input**: + +* 1. RTSC files (Output of Get RT Stop Counts) for (+) and (-) library +* 2. Reference file (fasta) used to map the reads to +* 3. Nucleotide Specificity (Type of nucleotides to have reactivity, e.g. AC for DMS and ACTG for SHAPE) +* [Optional]: +* 1. A threshold to cap the structural reactivities. {Default: 7} +* 2. Flag that determines whether to perform 2%-8% normalization {Default: Yes} + +----- + +**Output**: + +A text file with structural reactivity for each nucleotide (Reactivity file) + + + + + diff -r 11f0f526dca9 -r 1c325ff557d9 reactivity_cal/read_file.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reactivity_cal/read_file.py Tue Dec 09 03:03:11 2014 -0500 @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + + + +def read_t_file(in_file): + f = open(in_file); + result = []; + for aline in f.readlines(): + temp = []; + tline = aline.strip(); + tl = tline.split('\t'); + for i in range(0, len(tl)): + temp.append(tl[i].strip()); + result.append(temp); + f.close(); + return result; + + diff -r 11f0f526dca9 -r 1c325ff557d9 tool_dependencies.xml --- a/tool_dependencies.xml Tue Dec 09 03:02:49 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ - - - - - - - - - - - - - - - - - -