# HG changeset patch # User tyty # Date 1416345844 18000 # Node ID d2817a631a7bd8e2f6ff9d77df3501be1ec79f6c # Parent 36d912d5b1acdc0351cbb0a0461bd4ddb03828fb Uploaded diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/.DS_Store Binary file Iterative_mapping/.DS_Store has changed diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/._.DS_Store Binary file Iterative_mapping/._.DS_Store has changed diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/._iterative_map.xml Binary file Iterative_mapping/._iterative_map.xml has changed diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/iterative_map.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/iterative_map.py Tue Nov 18 16:24:04 2014 -0500 @@ -0,0 +1,122 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +import os +from read_file import * +from read_s_file import * +import random +import string + +type_input = sys.argv[1] +seq_file = sys.argv[2] +ref_file = sys.argv[3] +shift = sys.argv[4] +length = sys.argv[5] +t_end = sys.argv[6] +map_type = sys.argv[7] +output_file = sys.argv[8] + + +if map_type!="default": + s = "" + sm = "" + s = s+"-v "+sys.argv[9] + sm = sm+"-v "+sys.argv[9] + sm = sm+" -5 "+sys.argv[10] + sm = sm+" -3 "+sys.argv[11] + s = s+" -k "+sys.argv[12] + sm = sm+" -k "+sys.argv[12] + if sys.argv[13]: + s = s+" -a" + sm = sm+" -a" + if int(sys.argv[14])>=1: + s = s+" -m "+sys.argv[14] + sm = sm+" -m "+sys.argv[14] + if sys.argv[15]: + s = s+" --best --strata " + sm = sm+" --best --strata " + +else: + s = "-v 3 -a --best --strata " + sm = "-v 3 -a --best --strata " + +ospath = os.path.realpath(sys.argv[0]) +ost = ospath.split('/') +syspath = "" +for i in range(len(ost)-1): + syspath = syspath+ost[i].strip() + syspath = syspath+'/' + +syspathrs = os.getcwd() + +os.system("bowtie-build -f "+ref_file+" "+syspathrs+"ref > "+syspathrs+"log.txt") + +os.system("cp "+seq_file+" "+syspathrs+"seq0.fa") + +if type_input == "fasta": + tp = 'fasta' +if type_input == "fastq": + tp = 'fastq' + +k = 0 + +if type_input == "fasta": + os.system("bowtie "+sm+"-f "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam") +if type_input == "fastq": + os.system("bowtie "+sm+"-q "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam") + +while(True): + os.system("samtools view -Sb -F 0xfff "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"mapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get mapped reads + os.system("samtools view -Sb -f 0x4 "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"umapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get unmapped reads + os.system("samtools view -Sb -f 0x10 "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"rmapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get reversed mapped reads + os.system("samtools merge -f "+syspathrs+"unmapped"+str(k)+".bam "+syspathrs+"umapped"+str(k)+".bam "+syspathrs+"rmapped"+str(k)+".bam") #get reversed mapped reads + os.system("samtools view -h -o "+syspathrs+"unmapped"+str(k)+".sam "+syspathrs+"unmapped"+str(k)+".bam") #get reversed mapped reads + if k>0: + os.system("samtools view -h -o "+syspathrs+"mapped"+str(k)+".sam "+syspathrs+"mapped"+str(k)+".bam") #get reversed mapped reads + os.system("cut -f 1 "+syspathrs+"unmapped"+str(k)+".sam > "+syspathrs+"unmapped"+str(k)+".txt") + os.system("cut -f 1 "+syspathrs+"mapped"+str(k)+".sam > "+syspathrs+"mapped"+str(k)+".txt") + os.system("python "+syspath+"remove_map.py "+syspathrs+"unmapped"+str(k)+".txt "+syspathrs+"mapped"+str(k)+".txt "+syspathrs+"runmapped"+str(k)+".txt") + os.system("rm "+syspathrs+"mapped"+str(k)+".sam") + os.system("rm "+syspathrs+"mapped"+str(k)+".txt") + os.system("rm "+syspathrs+"unmapped"+str(k)+".txt") + else: + os.system("cut -f 1 "+syspathrs+"unmapped"+str(k)+".sam > "+syspathrs+"runmapped"+str(k)+".txt") + + os.system("rm "+syspathrs+"unmapped"+str(k)+".bam") + os.system("rm "+syspathrs+"umapped"+str(k)+".bam") + os.system("rm "+syspathrs+"rmapped"+str(k)+".bam") + os.system("python "+syspath+"seq_track.py "+syspathrs+"runmapped"+str(k)+".txt "+syspathrs+"seq"+str(k)+".fa "+syspathrs+"unmap_seq"+str(k)+".fa "+tp) #get unmapped sequence + os.system("python "+syspath+"truncate.py "+syspathrs+"unmap_seq"+str(k)+".fa "+shift+" "+syspathrs+"seq"+str(k+1)+".fa "+length+" "+t_end) #truncate unmapped sequence + os.system("rm "+syspathrs+"seq"+str(k)+".fa") #Remove sequences being mapped + os.system("rm "+syspathrs+"map"+str(k)+".sam") #Remove mapping file + os.system("rm "+syspathrs+"unmap_seq"+str(k)+".fa") #Remove unmapped sequnce + os.system("rm "+syspathrs+"runmapped"+str(k)+".txt") + os.system("rm "+syspathrs+"unmapped"+str(k)+".sam") + + os.system("wc -l "+syspathrs+"seq"+str(k+1)+".fa > "+syspathrs+"count"+str(k+1)+".txt") + c = read_sp_file(syspathrs+"count"+str(k+1)+".txt") + if c[0][0] == '0': #If no reads is in the sequence file, stop + os.system("rm "+syspathrs+"count"+str(k+1)+".txt") + os.system("rm "+syspathrs+"seq"+str(k+1)+".fa") + break + os.system("rm "+syspathrs+"count"+str(k+1)+".txt") + k = k+1 + if type_input == "fasta": + os.system("bowtie "+s+"-f "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam") + if type_input == "fastq": + os.system("bowtie "+s+"-q "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam") + + +ss = "" +for i in range(0,k+1): + ss = ss+" "+syspathrs+"mapped"+str(i)+".bam" + + +os.system("samtools merge -f "+output_file+" "+ss) +#print("samtools merge mapped_all.bam"+ss) +os.system("rm "+syspathrs+"mapped*.bam") +os.system("rm "+syspathrs+"ref*") +#os.system("rm -r "+syspathrs) + + diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/iterative_map.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/iterative_map.xml Tue Nov 18 16:24:04 2014 -0500 @@ -0,0 +1,96 @@ + + + + #if $mapping_file.type == "user" + iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $t_end $mapping_file.type $output $mapping_file.param_v $mapping_file.param_five $mapping_file.param_three $mapping_file.param_k $mapping_file.param_a $mapping_file.param_m $mapping_file.param_best + #else + iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $t_end $mapping_file.type $output + #end if + + + biopython + numpy + samtools + bowtie + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**TIPS**: + +----- + +**Input**: + +* 1. Sequence file type (FASTA/FASTQ) +* 2. Sequence file (fasta/fastq format) +* 3. Reference file (fasta) used to map the reads to +* 4. “Shift” (The length of the sequence that will be trimmed at the 3’end of the reads before each round of mapping) +* 5. “Length” (The minimum length of the reads for mapping after trimming) +* [Optional] +* 1. Bowtie mapping flags (options) [Default: -v 0 -a --best --strata] (-v flag indicates the number of allowed mismatches. Use -5/-3 flag to trim the nucleotides from 5'/3' end of the reads) + +----- + +**Output**: + +A bam file with all of the reads that are mapped + + + + + diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/log.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/log.txt Tue Nov 18 16:24:04 2014 -0500 @@ -0,0 +1,1 @@ +[samopen] SAM header is present: 4 sequences. diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/map_ex.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/map_ex.py Tue Nov 18 16:24:04 2014 -0500 @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * +from Bio import SeqIO + +map_file = sys.argv[1] +result_file = sys.argv[2] + + +#reads = read_t_file(read_file); + +f = open(map_file); +h = file(result_file, 'w') + +for aline in f.readlines(): + tline = aline.strip(); + tl = tline.split('\t'); + if len(tl)>4: + if int(tl[1].strip())== 0: + h.write(tline) + h.write('\n') + + +f.close(); +h.close() + + + + diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/rRNA.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/rRNA.txt Tue Nov 18 16:24:04 2014 -0500 @@ -0,0 +1,8 @@ +>25s rRNA 3375nts +GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTAAGCATATCAATAAGCGGAGGAAAAGAAACTAACAAGGATTCCCTTAGTAACGGCGAGCGAACCGGGAAGAGCCCAGCTTGAAAATCGGACGTCTTCGGCGTTCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGGAAAGGGGCGCCAGAGAGGGTGAGAGCCCGTCGTGCCCGGACCCTGTCGCACCACGAGGCGCTGTCTACGAGTCGGGTTGTTTGGGAATGCAGCCCCAATCGGGCGGTAAATTCCGTCCAAGGCTAAATACGGGCGAGAGACCGATAGCGAACAAGTACCGCGAGGTAAAGATGAAAAGGACTTTGAAAAGAGAGTCAAAGAGTGCTTGAAATTGTCGGGAGGGAAGCGGATGGGGGCCGGCGATGCGTCCTGGTCGGATGCGGAACGGAGCAATCCGGTCCGCCGATCGATTCGGGGCGTGGACCGACGCGGATTACGGTGGCGGCCTAAGCCCGGGCTTTTGATACGCTTGTGGAGACGTCGCTGCCGTGATCGTGGTCTGCAGCACGCGCCTAACGGCGTGCCTCGGCATCAGCGTGCTCCGGGCGTCGGCCTGTGGGCTCCCCATTCGACCCGTCTTGAAACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAACGGGTGAGTAAACCCGTAAGGCGCAAGGAAGCTGATTGGCGGGATCCTCGCGGGTGCACCGCCGACCGACCTTGATCTTCTGAGAAGGGTTCGAGTGTGAGCATGCCTGTCGGGACCCGAAAGATGGTGAACTATGCCTGAGCGGGGTAAAGCCAGAGGAAACTCTGGTGGAAGCCCGCAGCGATACTGACGTGCAAATCGTTCGTCTGACTTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGAGCTCGGACGCGAGTTCTATCGGGTAAAGCCAATGATTAGAGGCATTGGGGGCGCAACGCCTCGACCTATTCTCAAACTTTAAATAGGTAGGACGTGTCGGCTGCTTTGTTGAGCCGTCACACGGAATCGAGAGCTCCAAGTGGGCCATTTTTGGTAAGCAGAACTGGCGATGCGGGATGAACCGGAAGCCGGGTTACGGTGCCCAACTGCGCGCTAACCTAGAACCCACAAAGGGTGTTGGTCGATTAAGACAGCAGGACGGTGGTCATGGAAGTCGAAATCCGCTAAGGAGTGTGTAACAACTCACCTGCCGAATCAACTAGCCCCGAAAATGGATGGCGCTTAAGCGCGACCTATACCCGGCCGTCGGGGCAAGAGCCAGGCCTCGATGAGTAGGAGGGCGCGGCGGTCGCTGCAAAACCTAGGGCGCGAGGCGCGGAGCGGCCGTCGGTGCAGATCTTGGTGGTAGTAGCAAATATTCAAATGAGAACTTTGAAGGCCGAAGAGGGGAAAGGTTCCATGTGAACGGCACTTGCACATGGGTTAGTCGATCCTAAGAGTCGGGGGAAACCCGTCTGATAGCGCTTAAGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGACGTGGCGGTTGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCGGGGGCCTCGGGAAGAGTTATCTTTTCTGTTTAACAGCCTGCCCACCCTGGAAACGGCTCAGCCGGAGGTAGGGTCCAGCGGCTGGAAGAGCACCGCACGTCGCGTGGTGTCCGGTGCGCCCCCGGGCGCCCTTGAAAATCCGGAGGACCGAGTGCCGCTCACGCCCGGTCGTACTCATAACCGCATCAGGTCTCCAAGGTGAACAGCCTCTGGTCGATGGAACAATGTAGGCAAGGGAAGTCGGCAAAATGGATCCGTAACTTCGGGAAAAGGATTGGCTCTGAGGGCTGGGCTCGGGGGTCCCAGTTCCGAACCCGTCGGCTGTCAGCGGACTGCTCGAGCTGCTTCCGCGGCGAGAGCGGGTCGCCGGCTGCCGGCCGGGGGACGACTGGGAACGGCTCTCTCGGGAGCTTTCCCCGGGCGTCGAACAGTCAGCTCAGAACTGGTACGGACAAGGGGAATCCGACTGTTTAATTAAAACAAAGCATTGCGATGGTCCCTGCGGATGCTAACGCAATGTGATTTCTGCCCAGTGCTCTGAATGTCAAAGTGAAGAAATTCAACCAAGCGCGGGTAAACGGCGGGAGTAACTATGACTCTCTTAAGGTAGCCAAATGCCTCGTCATCTAATTAGTGACGCGCATGAATGGATTAACGAGATTCCCACTGTCCCTGTCTACTATCCAGCGAAACCACAGCCAAGGGAACGGGCTTGGCAGAATCAGCGGGGAAAGAAGACCCTGTTGAGCTTGACTCTAGTCCGACTTTGTGAAATGACTTGAGAGGTGTAGGATAAGTGGGAGCTTCGGCGCAAGTGAAATACCACTACTTTTAACGTTATTTTACTTACTCCGTGAATCGGAGGCCGGGGTACAACCCCTGTTTTTGGTCCCAAGGCTCGCTTCGGCGGGTCGATCCGGGCGGAGGACATTGTCAGGTGGGGAGTTTGGCTGGGGCGGCACATCTGTTAAAAGATAACGCAGGTGTCCTAAGATGAGCTCAACGAGAACAGAAATCTCGTGTGGAACAAAAGGGTAAAAGCTCGTTTGATTCTGATTTTCAGTACGAATACGAACCGTGAAAGCGTGGCCTATCGATCCTTTAGACTTCGGAATTTGAAGCTAGAGGTGTCAGAAAAGTTACCACAGGGATAACTGGCTTGTGGCAGCCAAGCGTTCATAGCGACGTTGCTTTTTGATCCTTCGATGTCGGCTCTTCCTATCATTGTGAAGCAGAATTCACCAAGTGTTGGATTGTTCACCCACCAATAGGGAACGTGAGCTGGGTTTAGACCGTCGTGAGACAGGTTAGTTTTACCCTACTGATGCCCGCGTCGCGATAGTAATTCAACCTAGTACGAGAGGAACCGTTGATTCGCACAATTGGTCATCGCGCTTGGTTGAAAAGCCAGTGGCGCGAAGCTACCGTGCGCTGGATTATGACTGAACGCCTCTAAGTCAGAATCCGGGCTAGAAGCGACGCATGCGCCCGCCGCCCGATTGCCGACCCTCAGTAGGAGCTTAGGCTCCAAAGGCACGTGTCGTTGGCTAAGTCCGTTCGGCGGAACGGTCGTTCGGACCGCCTTGAATTATAATTACCACCGAGCGGCGGGTAGAATCCTTTGCAGACGACTTAAATACGCGACGGGGTATTGTAAGTGGCAGAGTGGCCTTGCTGCCACGATCCACTGAGATTCAGCCCTTTGTCGCTAAGATTCGA +>gi|20197903:2706-4513 Arabidopsis thaliana chromosome 2 BAC F23H14 genomic sequence, complete sequence +TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCTCAAAGATTAAGCCATGCATGTGTAAGTATGAACGAATTCAGACTGTGAAACTGCGAATGGCTCATTAAATCAGTTATAGTTTGTTTGATGGTAACTACTACTCGGATAACCGTAGTAATTCTAGAGCTAATACGTGCAACAAACCCCGACTTATGGAAGGGACGCATTTATTAGATAAAAGGTCGACGCGGGCTCTGCCCGTTGCTCTGATGATTCATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATCAACTTTCGATGGTAGGATAGTGGCCTACCATGGTGGTAACGGGTGACGGAGAATTAGGGTTCGATTCCGGAGAGGGAGCCTGAGAAACGGCTACCACATCCAAGGAAGGCAGCAGGCGCGCAAATTACCCAATCCTGACACGGGGAGGTAGTGACAATAAATAACAATACTGGGCTCTTTCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCCCTTAACGAGGATCCATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAACCTTGGGATGGGTCGGCCGGTCCGCCTTTGGTGTGCATTGGTCGGCTTGTCCCTTCGGTCGGCGATACGCTCCTGGTCTTAATTGGCCGGGTCGTGCCTCCGGCGCTGTTACTTTGAAGAAATTAGAGTGCTCAAAGCAAGCCTACGCTCTGGATACATTAGCATGGGATAACATCATAGGATTTCGATCCTATTGTGTTGGCCTTCGGGATCGGAGTAATGATTAACAGGGACAGTCGGGGGCATTCGTATTTCATAGTCAGAGGTGAAATTCTTGGATTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGGATGTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTCCTAGTCTCAACCATAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTATAGGACTCCGCTGGCACCTTATGAGAAATCAAAGTTTTTGGGTTCCGGGGGGAGTATGGTCGCAAGGCTGAAACTTAAAGGAATTGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGACTCAACACGGGGAAACTTACCAGGTCCAGACATAGTAAGGATTGACAGACTGAGAGCTCTTTCTTGATTCTATGGGTGGTGGTGCATGGCCGTTCTTAGTTGGTGGAGCGATTTGTCTGGTTAATTCCGTTAATGAACGAGACCTCAGCCTGCTAACTAGCTACGTGGAGGCATCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCGTTTAGGCCAAGGAAGTTTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGCGCGCTACACTGATGTATTCAACGAGTTCACACCTTGGCCGACAGGCCCGGGTAATCTTTGAAATTTCATCGTGATGGGGATAGATCATTGCAATTGTTGGTCTTCAACGAGGAATTCCTAGTAAGCGCGAGTCATCAGCTCGCGTTGACTACGTCCCTGCCCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAATGATCCGGTGAAGTGTTCGGATCGCGGCGACGTGGGTGGTTCGCCGCCCGCGACGTCGCGAGAAGTCCACTAAACCTTATCATTTAGAGGAAGGAGAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTG +>Arabidopsis thaliana 1 +GGATGCGATCATACCAGCACTAATGCACCGGATCCCATCAGAACTCCGCAGTTAAGCGTGCTTGGGCGAGAGTAGTACTAGGATGGGTGACCTCCTGGGAAGTCCTCGTGTTGCATCCCTC +>gi|186498419|ref|NR_022453.1| Arabidopsis thaliana (AT2G01020) rRNA +AAAACGACTCTCGGCAACGGATATCTCGGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGATACTTGGTGTGAATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCCAAGCCTTCTGGCCGAGGGCACGTCTGCCTGGGTGTCACAA \ No newline at end of file diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/read_file.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/read_file.py Tue Nov 18 16:24:04 2014 -0500 @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + + + +def read_t_file(in_file): + f = open(in_file); + result = []; + for aline in f.readlines(): + temp = []; + tline = aline.strip(); + tl = tline.split('\t'); + for i in range(0, len(tl)): + temp.append(tl[i].strip()); + result.append(temp); + f.close(); + return result; + + diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/read_file.pyc Binary file Iterative_mapping/read_file.pyc has changed diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/read_s_file.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/read_s_file.py Tue Nov 18 16:24:04 2014 -0500 @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + + + +def read_sp_file(in_file): + f = open(in_file); + result = []; + for aline in f.readlines(): + temp = []; + tline = aline.strip(); + tl = tline.split(' '); + for i in range(0, len(tl)): + if len(tl[i].strip())>0: + temp.append(tl[i].strip()); + result.append(temp); + f.close(); + return result; + + diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/read_s_file.pyc Binary file Iterative_mapping/read_s_file.pyc has changed diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/remove_map.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/remove_map.py Tue Nov 18 16:24:04 2014 -0500 @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * + + +unmap_file = sys.argv[1] +map_file = sys.argv[2] +result_file = sys.argv[3] + + +unmap = read_t_file(unmap_file) +mapped = read_t_file(map_file) +h = file(result_file, 'w') + +maps = set() +for i in range(len(mapped)): + maps.add(mapped[i][0]) + + +for i in range(len(unmap)): + name = unmap[i][0] + if name not in maps: + h.write(name) + h.write('\n') + + +h.close() diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/sample.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/sample.fasta Tue Nov 18 16:24:04 2014 -0500 @@ -0,0 +1,1000 @@ +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1415:1662#0/1 +TCATTCATCCATTTCCAGTGCTCAGCTAACCCCAACT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1359:1735#0/1 +TGCTGGCGACGCATCATTCAAATTTCTGCCCTATCAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1469:1743#0/1 +TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1750:1642#0/1 +AACCGGGACGTGGCGGTTGACGGCAACGTTAGGGAGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1899:1627#0/1 +AAAAACGACTCTCGGCAACGGATATCTCGGCTCTCGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1859:1668#0/1 +TACGCTCCTGGTCTTAATTGGCCGGGTCGTGCCCCCC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1830:1700#0/1 +ATTTCGATCCTATTGTGTTGGCCTTCGGGATCGGAGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2000:1705#0/1 +TTAATGATTAACAGGGACAGTCGGGGGCATTCGTATT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2149:1632#0/1 +GATCATACATTACTGACTAAAAGAAGCAAAATCTTGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2052:1637#0/1 +AGAACATGAAACCGTAAGCTCCCAAGCAGTGGGAGGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2036:1706#0/1 +AAAACTTAGCTGAGACGACGCAGAAACAGGTGAGATC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2183:1706#0/1 +TTCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2068:1721#0/1 +AAGTAGCACGTCCCTCAGGAAAGAAGCTCTTCAGATT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2162:1740#0/1 +ATTAATCAAGAACGAAAGTTGGGGGCTCGAAGACGGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2339:1628#0/1 +TGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCCGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2338:1670#0/1 +TCGCGCGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2459:1678#0/1 +CCTCGTCGCTGCGTCTCTCTCCCGCAACCTTCGATTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2492:1685#0/1 +AAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2286:1687#0/1 +AAGGCGGTGGAGGCCGCCCAGCATTGCCCGCACCTAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2283:1704#0/1 +ACTCTTGTTACAATGATTGTATGACATTCCTGATGGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2475:1719#0/1 +TTATGCAGAAATTGCTATAAGAAGAAACCTAAACTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2299:1733#0/1 +CTGTGAAACTGCGAATGGCTCATTAAATCAGTTATAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2464:1744#0/1 +GTGTTTTTATCCAAATCCGGGGATAAACACATTTTGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2547:1662#0/1 +AGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2512:1664#0/1 +TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2675:1664#0/1 +GTACAGATCGGAAGAGCACACGTCTGAACTCCAGTCA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2694:1675#0/1 +ACTACGAGAGGAACCGTTGATTCGCACAATTGGTCAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2558:1692#0/1 +ATCTTTCATATCCAGAGAGAGAGAAAGAGAACAAAAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2587:1709#0/1 +CAACCGTGAAAGCGTGGCCTATCGATCCTTTAGACCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2560:1727#0/1 +TTTCCGAGAGTATGCAGATTTTGTTTTCCAAGAATAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2960:1646#0/1 +CTTGCACATGGGTTAGTCGATCCTAAGAGTCGGGGGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2981:1669#0/1 +GATTCATCCCAAAACATTACAAAACGTTACAATGGCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2884:1740#0/1 +TCTCGCGCTTGTACGGCTTTGGCTCGGATTCGTCCGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3172:1696#0/1 +TCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3046:1703#0/1 +AGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3218:1717#0/1 +TCTCGCGCTTGTACGGCTTTGGCTCGGATTCGTCCGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3140:1732#0/1 +ATAAGAAGGTTATTGATTTGGTTAAAGAATACAATGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3212:1744#0/1 +TCCTGGTCTTAATTGGCCGGGTCGTGGCCCCCGCGCC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3277:1654#0/1 +TACTCGGATAACCGTAGTAATTCTAGAGCTAATACGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3373:1695#0/1 +CGTTTAGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3255:1702#0/1 +ACGCGGGCTCTGCCCGTTGCTCTGATGATTCATGATA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3391:1733#0/1 +AAATAGAAGGGTCAAAAGCTAAGGAAGAAAAGAAAGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3606:1635#0/1 +TTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3710:1652#0/1 +CCCCGTGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3551:1662#0/1 +AGAACATGAAACCGTAAGCTCCCAAGCAGTGGGAGGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3696:1663#0/1 +ACGCATTTATTAGATAAAAGGTCGACGCGGGCTCTGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3679:1667#0/1 +ACGGGGGGCAGCAGTGGGGAATCTTGGACAATGGGCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3622:1671#0/1 +TCATCCAATTGGAGACGAATCATATCGAGCGATGGAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3506:1682#0/1 +TGCGTTGTGGAAGTCGAAGAGGTTGATGAATATTGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3540:1687#0/1 +AAATTAAACCTCACCGACGGATTCAATTCTCTCGTTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3627:1725#0/1 +TCATCCAGATCTCAACTTTCTCTCATCTTCAAATTAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3710:1735#0/1 +GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3787:1636#0/1 +AGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3996:1694#0/1 +AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3942:1696#0/1 +TGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4104:1635#0/1 +AGGTCGGCGGTTCCATCACCACAACGCCGGACGACAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4243:1713#0/1 +AGATACTCCTTTGACCGAAGAAACCATAACGGAAGCC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4091:1714#0/1 +AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4229:1733#0/1 +GGGCCTGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4060:1745#0/1 +TGACTGTGAAACTGCGAATGGCTCATTAAATCAGTTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4308:1693#0/1 +TCCTCGGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4384:1697#0/1 +AAAATTTCATCGTGATGGGGATAGATCATTGCAATTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4506:1651#0/1 +GATAATAAAGAGATGGAGATTGTTTTGGAGCAACGGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4535:1654#0/1 +AAAAAGGGTAAAAGCTCGTTTGATTCTGATTTTCAGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4592:1677#0/1 +CTACCTGGTTAATCCTGCCAGTAGTCATATGCTTGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4726:1686#0/1 +TATGTTTTTGTTTGTTCGTAAATTCTTGACATCACTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4544:1694#0/1 +TGGAGCTCCGCTGAATTTTCTTTGCTCCATTTCCGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4604:1707#0/1 +TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4996:1665#0/1 +CTTATTTTACGAAGCTCCCCTCGGTTACAGCATTGAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4860:1668#0/1 +AGCGGCTGGAAGAGCACCGCACGTCGCGTGGTGTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4786:1671#0/1 +TATGAAAGACGAACAACTGCGAAAGCATTTGCCAAGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4889:1691#0/1 +ATGGGGGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4843:1693#0/1 +AGGCAGTCCGATCCAACGGCTAGGCACTTACATGGCA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4977:1703#0/1 +ATTAGATAAAAGGTCGACGCGGGCTCTGCCCGTTGCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4791:1723#0/1 +TTGTGTTGGCCTTCGGGATCGGAGTAATGATTAACAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5019:1686#0/1 +ACAATGGAGCAACCACTCCTTCCGGCCTCGTCGCCGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5134:1686#0/1 +TCTGCTGTTGTAGAGCCTTACAACAGTGTGCTTTCAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5307:1664#0/1 +ACGTGCGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5278:1674#0/1 +GCATCAAAATCCTCCGACGATGACAACCATAGCTGCA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5486:1715#0/1 +TTGTGGTGGCGACGCATCATTCAAATTTCTGCCCTAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5541:1641#0/1 +TTCTCCCCGAAATGCGTTGAGGCGCAGCAGTTGACTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5706:1646#0/1 +ACGAGGATCCATTGGAGGGCAAGTCTGGTGCCTGCAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5593:1651#0/1 +AAGGGGCTTCTTGTCATTGATGATGAGCTAGCCACCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5503:1695#0/1 +GAGAGGGAACTAATCATAAGAGATGCAATGAGTGTGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5581:1706#0/1 +ATAAAAACCAGAAGAATCCGTATAAATTATCCTAACA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5713:1729#0/1 +TCTCTTGGAGAGTTCGATCCTGGCTCAGGATGAACGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5831:1656#0/1 +TTCACGTGGCTCAAGTCACTAGCAATGCTCTTGCTTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5942:1680#0/1 +TCCGGTGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5772:1711#0/1 +AGCGCGAACTTCGAAAGGGGATCTGGTTAAAATTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5957:1723#0/1 +TTACAATAAGTCTGCCTATAGTGGGAGAGGTGACAGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5981:1739#0/1 +GAGAAATTTGGAGTTTCGCCGGAATCTTCCTCTGTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6231:1644#0/1 +AATTGGAAAAGGTGGAGGAGTTGGCGGTGGCATCGGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6152:1724#0/1 +TGCAATTCTACCACGACCTCATCGACGAGCTCATAAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6100:1731#0/1 +TCCTCTGCGTTTTCACGAAACAGAGAACCTTATCGTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6174:1733#0/1 +TCGAATTGTAGTCTGGAGAAGCGTCCTCAGCGACGGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6310:1648#0/1 +AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6287:1659#0/1 +TCAACGAGTTCACACCTTGGCCGACAGGCCCGGGTAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6332:1666#0/1 +TTTGGGTTCTCTCCTTATAGTTTGATGAACATTGTTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6295:1704#0/1 +AGATAATCGGAACCTTCGTCCTTGTCTACACGGTCTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6590:1655#0/1 +AGAAGCCACCTCCGGTTCCGGTTTACAAGCCCCCGCC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6585:1691#0/1 +CTTTTCCCAGAGAAGAAGCAATGACGGTATCTGGGGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6722:1719#0/1 +AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6820:1642#0/1 +AAGCATTTGCCAAGGATGTTTTCATTAATCAAGAACG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:7000:1662#0/1 +GGGAAGCGTTGAGGCTTCACCCTCCACTCATCGTGTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6873:1673#0/1 +AGCACTTTAGGATGGCATAGCCTTAAAGTTAAGGGTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6770:1699#0/1 +AACTGTCTACAAAATCATTGAAGATCTACCCAAAAAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6782:1735#0/1 +GATCCGTCAAATTCAATTGATCCTCTCTCCAAATCAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:7159:1658#0/1 +ACCCACCTCCGGTTCCGGTTTACAAGCCACCGCCAAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:7183:1660#0/1 +ATTTCATCGTGATGGGGATAGATCATTGCAATTGTTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:7167:1679#0/1 +TTCTGCCTCCGGCGCTGTTACTTTGAAGAAATTAGAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:7223:1688#0/1 +TTGGACGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:7410:1647#0/1 +ACCATGCAGTATCGGTTTATATAACATCCACATTGTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:7387:1710#0/1 +TCTTCGAGTTTTGCTACTTGTATGGGATGATAAGACT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:7397:1741#0/1 +TTTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:7624:1680#0/1 +AATTGTTGTTGTTATCTTCAACGATAGCGTTGTGCTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:7600:1744#0/1 +AGTTGTTGCAGTTAAAAAGCTCGTAGTTGAACCTTGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:7789:1676#0/1 +ACCCGTCTGATAGCGCTTAAGCGCGAACTTCGAAAGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:7950:1684#0/1 +TGAACGAGACCTCAGCCTGCTAACTAGCTACGTGGAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:7926:1706#0/1 +ACGGTATCTGGGGAATAAGCATCGGCTAACTCTGTGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:7757:1711#0/1 +TTGCCCTTAGATGTTCTGGGCCGCACGCGCGCTACAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:7970:1747#0/1 +TCGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8054:1672#0/1 +CGAAAGGGGATCCGGTTAAAATTCCGGAACCGGGACG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8245:1672#0/1 +GATATATAATCATAGATTCCAGAATTTGACATTTTCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8088:1710#0/1 +GAAAGTTGGGGGCTCGAAGACGATCAGATACCGTCCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8244:1713#0/1 +GGGACGCATTTATTAGATAAAAGGTCGACGCGGGCTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8069:1727#0/1 +TCGTCTACACCGTCTACGCCACAGCCGTTGACCCCAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8018:1745#0/1 +TTCTCTACTGGATTGCTCAGCTTCTTGGCTCCGTCGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8394:1647#0/1 +CTACAAGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8475:1658#0/1 +ACACTTGATCGGAATAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8367:1664#0/1 +GGGTGAGTAAACCCGTAAGGCGCAAGGAAGCTGATTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8488:1695#0/1 +GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8442:1727#0/1 +CGGTCGGCGATACGCTCCTGGTCTTAATTGGCCGGGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8328:1744#0/1 +TTGTCATTCTCTTCGCCGGAATCTAATCTCTCTCGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8520:1660#0/1 +TTGGCCGTTGTTTCCTTCTTCTTCTTCTTCTTCTTTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8563:1665#0/1 +AATTCAAATTTCTGCCCTATCAACTTTCGATGGTAGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8739:1667#0/1 +TCTCTCGGGAGCTTTCCCCGGGCGTCGAACAGTCAGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8596:1682#0/1 +TAGCCCACGAATGCGGTCACAACGCATTCAGCGACTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8643:1688#0/1 +TTGCACGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8727:1745#0/1 +AGGGAGAGCTAATGCTTCTTGGGTATTTAGGTTTGAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8767:1649#0/1 +TCGGTACCAAATCGAGGCAAACTCTGAATACTAGATA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8932:1656#0/1 +GCTGGCGACGCATCATTCAAATTTCTGCCCTATCAAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8841:1666#0/1 +TCAAACGAGGAAAGGCTTACGGTGGATACCTAGGCAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8951:1673#0/1 +TGGTGGAGCTGGAGGAGGATTTGGTGGAGGAGCTGGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8876:1674#0/1 +AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:8993:1737#0/1 +TTACCGTGCGCTGGATTATGACTGAACGCCTCTAAGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:9241:1664#0/1 +AAGTCACTCTTTCTTGTTGCCTTACTTGTCGGCTCTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:9095:1710#0/1 +AGAACGAAAGTTGGGGGCTCGAAGACGATAAGATACC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:9231:1716#0/1 +TCGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:9171:1733#0/1 +AAAATTTCTGCCCTATCAACTTTCGATGGTAGGATAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:9471:1658#0/1 +GACACATACACACATAGCCATGGCCTCTTCTTTCTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:9383:1680#0/1 +TTCAGTGTTGATTCGTCTTCCTTCACGCCGTTCTTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:9627:1730#0/1 +ATTGTTTGTAAAGGGTGAGAGATTATTTTTCAGTGAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:9921:1680#0/1 +ATGGCTTCTCCGGTGAGATACCTGACTCGATTGGTTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:9931:1727#0/1 +AAAGCATCGGCTAACTCTGTGCCAGCAGCCGCGGTAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:10235:1668#0/1 +AAAAATCTTGAGTAAAAACAAATTTTCCTGTATCTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:10040:1686#0/1 +AAAACCCGTCTGATAGCGCTTAAGCGCGAACTTCGAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:10169:1689#0/1 +ACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:10213:1743#0/1 +GGCGCTTAAGCGCGCGACCTATACCCGGCCGTCGGGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:10021:1744#0/1 +CAAAGCAAGCCTACGCTCTGGATACATTAGCATGGGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:10294:1712#0/1 +TTCTCTCTCTAACATTCTTCAGAGAGGGAGACTTTAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:10392:1736#0/1 +TAGAGTGCTCAAAGCAAGCCTACGCTCTGGATACATT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:10582:1655#0/1 +ACGGTACTGGACAATGTGGAAGCTTCCCTTGTTCGGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:10777:1673#0/1 +CACTCTCACCCACAAGTTAGTCATAAAAAAAAAAAAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:10980:1714#0/1 +GTGGAGAGACTGAAAACCGCGAAGAGGATGTGAATGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:10931:1720#0/1 +TTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:11171:1717#0/1 +TGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:11458:1668#0/1 +GATCACAAGTTTTAAGCAGTATTTGTAAGAAAATGGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:11388:1696#0/1 +TTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:11317:1701#0/1 +ACAACTCAAGCTTCCATCAACTTGACCCACACCGAAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:11403:1706#0/1 +TAGATAACATCAAGACAACAACCGTCGGTCCCGGAAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:11291:1739#0/1 +GAAAATCAAATCTTTTCATTTACAATTATCTTTCTTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:11593:1673#0/1 +TCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:11537:1726#0/1 +CCATCTCCGTATTGTCTTCTACGTAGACAATGTGCCC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:11709:1733#0/1 +CGAGGCGCTGTCTACGAGTCGGGTTGTTTGGGAATGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:11851:1657#0/1 +TAATCAAGATCGAAAGTTGGGGGCTCGAAGACGATCA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:11822:1687#0/1 +TTGACGGCAACGTTAGGGAGTCCGGAGACGTCGGCGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:11950:1691#0/1 +TTATGTTTACAGCTCTCCTCCTCCTCCGGTGAAGTCC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:11752:1704#0/1 +TGAACGGCACTTGCACATGGGTTAGTCGATCCTAAGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:11895:1708#0/1 +TTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:11756:1739#0/1 +TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:12136:1709#0/1 +ATGCGAAAAGTGTAAAGGTGGGAAGATCGGAATTGCA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:12070:1715#0/1 +ACGAGCAGCTCCCAACCACAGACTACTGAATTAATCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:12204:1724#0/1 +GAAAATTTTCCCCATTAAACAAAAAAAAATCAAATCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:12140:1746#0/1 +ACTACTCTCTCCACTAAACAAAAACACTAGAGTTAAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:12332:1664#0/1 +GCGAAAGCATTTGCCAAGGATGCTTTCATTAATCAAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:12455:1665#0/1 +ACAAGTCTTGTCTCTCGAGTGTTCTTCAAATGTTAGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:12252:1728#0/1 +GGAACTCCATGCGAATATGAAGCGCATGGATACAAGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:12315:1747#0/1 +TGGACCGGTAATTTCATTACATCGCCGGACGGCCGGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:12640:1665#0/1 +TCTCTTCTACAGTAAACAAAAAATGGCAATGAATGGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:12666:1668#0/1 +TTATTCTGCACTTGGAAGAAGAACTAGAAAAAGGAAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:12735:1728#0/1 +ATCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:12751:1677#0/1 +TCCTCCTGTTTACAAGTCCCCACCACCACCGGTTAAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:12821:1679#0/1 +TCTTCTCCGGTGATTTACTTACTTAACAATCATGGCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:12935:1691#0/1 +ACTGCCTTCGGATTTGATTTGGTTCGTGGCACCAAGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:12996:1719#0/1 +TAATCTCTCTGTTTAATCTTATGATCTGCTGTTTTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:13223:1661#0/1 +GTTGATGTGTTTATTCCCAGAGATCGAAGGACTGGTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:13442:1665#0/1 +TTTAACAGCCTGCCCACCCTGGAAACGGCTCAGCCGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:13331:1684#0/1 +GGGGGTCGCAGTGACCAGGCCCGGGCGACTGTTTACC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:13451:1685#0/1 +TGCTCAAATTTCTTCTTTAGAAGATTAAAAATCTTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:13309:1690#0/1 +TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:13312:1742#0/1 +AAAACGATGCCGACCAGGGATCAGCGGATGTTGCTTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:13704:1696#0/1 +GACTCGGAGCAGAGATTTAGGGTCTGTAATTTGTATA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:13563:1703#0/1 +TATAGTCAGAGGTGAAATTCTTGGATTTATGAAAGAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:13735:1728#0/1 +AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:13899:1667#0/1 +CGTTATTTTACTTACTCCGTGAATCGGAGGCGGGGTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:13836:1707#0/1 +AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:13886:1718#0/1 +GAACAGAAAATAGAAAGAAGTAACAAAACCAAAGCAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:13866:1724#0/1 +TCAATGAGCCATGGGTTTTCTCGCACGCTGGCTATGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:13943:1728#0/1 +TCGAAGACGATCAGATACCGTCCTAGTCTCAACCATA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:14097:1712#0/1 +CCCTCGACCTATTCTCAAACTTTAAATAGGTAGGACG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:14116:1729#0/1 +GTTCTCACGTTCGAAGACCATTTCTTGTTCTTTGGAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:14699:1669#0/1 +AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:14594:1718#0/1 +AATTTCCTCATGTTGAGAGGTACTTCTGGACTGTGGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:14743:1729#0/1 +AACTCACCTGCCGAATCAACTAGCCCCGAAAATGGAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:14510:1734#0/1 +TTCACCCACCGCAGCTTCACCACCGGCACCTCCGACG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:14821:1705#0/1 +AACTGGCTAACACGTATTGGGTTGAGTATCTCTCTAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:14892:1717#0/1 +CTTTCGGATATTCCACCAGTCTCTCGCAATCTTCGCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:14835:1743#0/1 +AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:15239:1702#0/1 +TCTGACGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:15059:1717#0/1 +TGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:15152:1718#0/1 +GGGCAGGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:15444:1702#0/1 +GATATCTATTTATTCAATAACCCTTACAACACCGAAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:15407:1747#0/1 +TCGGATACGGGTCGGGCTTCTTAGCTGTTTGAAGAAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:15681:1670#0/1 +AATTGCCACAACATGGGCGTTCTTCTTAGCAAGAATT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:15630:1695#0/1 +TTATTAGATAAAAGGTCGACGCGGGCTCTGCCCGGTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:15506:1715#0/1 +TTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:15976:1688#0/1 +ATCATCCATCACATCTCCTCTTGCAAACCAACGTAAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:15940:1745#0/1 +AACAAGGTAGCCGTACTGGAAGGTGCGGCTGGATCAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:16043:1680#0/1 +ACGTATGTGGCAAAATACGGGGATGACTTGTGGCTAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:16175:1683#0/1 +ATCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:16074:1695#0/1 +AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:16238:1700#0/1 +AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:16229:1727#0/1 +TCGAAGACGATCAGATACCGTCCTAGTCTCAACCATA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:16025:1727#0/1 +CCGGGAAGGACGCACCTCTGGTGTACCAGTTATCGTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:16429:1678#0/1 +ACGGATCGCATGGCCTCTGTGCTGGCGACGCATCATT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:16255:1695#0/1 +CTAAGAAAGTTGATCCACCGCCGGTGCCAGTCCACAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:16494:1739#0/1 +AGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:16656:1687#0/1 +TTAAATCAGTTATAGTTTGTTTGATGGTAACTACTAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:16685:1715#0/1 +TAAAACGTCGTGGACTTTTTGAGTCTGACGCTGCATT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:16885:1690#0/1 +CTCAGGATCGGAAGAGCACACGTCTGAACTCCAGTCA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:16994:1709#0/1 +ATAACCGTAGTAATTCTAGAGCTAATACGTGCAACAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:16768:1715#0/1 +TCGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:16899:1742#0/1 +GGTAAACTTCGAGCCTGTCCGACCCAGAAGGCACAAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:17201:1693#0/1 +TCTTTGGATAACAATATCCCAAACTGAAAATGGCTAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:17179:1704#0/1 +TTAAGTCCTCCGCTGCCTTCCCAGCCACCCGCAAGGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:17158:1711#0/1 +ACTAATGTAAAGGAAGCCTGTGCTTGGCTTGGATATA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:17051:1725#0/1 +ATAACGGTCCTAAGGTAGCGAAATTCCTTGTCGGGTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:17448:1685#0/1 +ACAATGACTGGGCCTCCACCTGCCATGGTGATGCCTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:17551:1710#0/1 +GGTTTGTCTTCAAAATCGTCAGAAGAAGAAGAAGAAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:17662:1746#0/1 +GCAACACGGGGAAACTTACCAGGTCCAGACATAGTAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:17823:1706#0/1 +TCCCCCGGTTGGATTGAAGGGTTGAAAAAATTAGACA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:17951:1723#0/1 +TGATCGGAGGATGTTGCGACGGAGCCGTCCTTTGACC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:18174:1676#0/1 +ATTAAATCCTAAAATCCATTATTGATTGAATCTTCGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:18241:1694#0/1 +CCAAATCTAATAAATCTAAGTGTAGTTTTCGGTGTAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:18095:1727#0/1 +ACAAGCAACGGCGGAAGAGTTAACTGCATGCAGGTGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:18074:1746#0/1 +ACCACCAAATCGCCGCCATGTTTAAACAAGCTTCTCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:18318:1679#0/1 +GAAAGCATTTGCCAAGGATGTTTTCATTAATCAAGAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:18410:1680#0/1 +GCAAGCCTACGCTCTGGATACATTAGCATGGGATAAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:18290:1681#0/1 +TCAGGCCTTGGAGGACCGAACCCACGTATGTGGCAAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:18280:1722#0/1 +GAGTTCGATCCTGGCTCAGGATGAACGCTGGCGGCAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:18429:1742#0/1 +TAGACTACGGATGGGACACCGCCGGACTTTCAGCTGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:18675:1682#0/1 +TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:18535:1684#0/1 +CCTTTGTACACACCGCCCGTCGCTCCTACCGATTGAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:18657:1722#0/1 +GAGAAGATAAAGAGATAAAGGAAAGACTAACGTTAAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:18618:1729#0/1 +TGAACACACACAAACACACACACACACAGCCTTTTTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:18823:1701#0/1 +ACCTGACCCAGATAGCGAGAAGTTTCATGGATAAGGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:19063:1699#0/1 +ACGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:19104:1705#0/1 +AAAAACGACTCTCGGCAACGGATATCTCGGCTCTCGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:19019:1718#0/1 +AGACGATCAGATACCGTCCTGGTCTCAACCATAAACG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:19433:1728#0/1 +AAATAGCGTATATTTAAGTTGTTGCAGTTAAAAAGCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:19355:1731#0/1 +AATCAAGAACGAAAGTTGGGGGCTCGAAGACGATCAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:19500:1683#0/1 +TTGTAACACGGACCAAGGAGTCTGACATGTGTGCGAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:19657:1698#0/1 +GGGATTGGCTTTGGGCTTTTCCTGCGCAGCTTAGGTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:19640:1749#0/1 +TTTAATTAATAACAATAAATGTTCTTTTTCAGTTTTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:19866:1690#0/1 +AAGCCCGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:19966:1707#0/1 +AAAATGCCAAGACGGACGATCAGGATACGAGGCTTAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:20090:1682#0/1 +CAGGTCCCCGAGTGGCTCACACGATATGCTTCACGTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:20183:1689#0/1 +GATATCGAGCGATGGATTTGGCAGAACTGTGGGCGAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:20206:1747#0/1 +TCGAAAGTTGGGGGCTCGAAGACGATCAGATGCCGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:20292:1695#0/1 +AGGAACGGAGACGGCAGGAACGATGAGTTCTATAGTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:20306:1713#0/1 +CCGATGGTTGTGGACAACATGTATTATAAGAACATCA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:20402:1713#0/1 +TTACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:20275:1726#0/1 +AACTGCGAATGGCTCATTAAATCAGTTATAGTTTGTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:20403:1737#0/1 +AAATAAGAAGACATATTTATCAACTTGATCAACTTGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:20539:1702#0/1 +CCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCGTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:20612:1731#0/1 +AGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:20566:1745#0/1 +AGTTCGGTCTCTCTGCCGGAGTCGGATCATTAAACGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:20889:1691#0/1 +CTTTCATTTCAGAGTCTTGGTGTTGTTTATGGTGATT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:20841:1709#0/1 +TTTGCTGGCATAATGGGAATGGGTTTTCCACCTTACA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:20980:1742#0/1 +TCCACTACTTTTAACGTTATTTTACTTACTCCGTGAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:21184:1708#0/1 +CGATCAGATACCGTCCTAGTCTCAACCATAAACGATG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:21173:1745#0/1 +TCGGTTGCTAATGGTTTGATCAATTTCCTCAACATAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:21427:1722#0/1 +CGTTAACGAACGAGACCTCAGCCTGCTAACTAGCTAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1449:1766#0/1 +TCAACTGCGAAAGCATTTGCCAAGGATGTTTTCATTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1303:1793#0/1 +TAACCATAAACGATGCCGACCAGGGATCAGCGGATGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1430:1865#0/1 +TCTTCATCCTCGTCTACTGCACCGCCGGTATCTCTGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1450:1892#0/1 +AGCAACTGCCAAAGCACCCGCAACAAAATTATAAAGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1342:1935#0/1 +ATTGATGATGTCTTTACTTCTTCAAGAGGATCTACCA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1406:1941#0/1 +AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1676:1764#0/1 +TGGTGAGACAAAAACGACGATATTTTCCTCTTCCTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1512:1774#0/1 +AAAAACAATAAACGAAAACTGAGAGAGAGATTGAAAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1677:1808#0/1 +ATTGGATCACTTTCAATGAGCCATGGGTTTTCTCGCA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1579:1824#0/1 +TTTTGTGTTTGTCCACCACCTCCTCCATGTTATTCTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1744:1840#0/1 +GGGTGAGAGCCCCGTTGTGCCCGGACCCTGTCGCACC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1579:1852#0/1 +AGAAGAAGAAGAAGAAAAAGTGGAGCAAGCTTCGTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1550:1869#0/1 +AACTCTGTGCCAGCAGCCGCGGTAATACAGAGGATGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1565:1898#0/1 +AGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCACGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1738:1918#0/1 +ACATTCCTCACCAAACCCTCTCCAAAACACACCCACA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1547:1922#0/1 +TCGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1732:1964#0/1 +TCTCGCGCTTGTACGGCTTTGGCTCGGATTCGTCCGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1654:1991#0/1 +GATTATATAAGGGAATGTTCAGTTCCAGTGACTGAGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1518:1992#0/1 +GGTGGAGGTTTACAAACACCACCAATCTTCTCTCTTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1865:1796#0/1 +GATTCATCCCAAAACATTACAAAACGTTACAATGGCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1886:1815#0/1 +GGGCGTAAAGCGTCTGTAGGTGGCTTTTTAAGTCCGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1855:1840#0/1 +TGGTATCGGTGGACTCGGCGGTGCAGGTGGGCTAGGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1960:1855#0/1 +ACCTCACTGCCGCCGATTAGACTCCGCCGGATGCAAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1808:1879#0/1 +TTACCATAAACGATGCCGACCAGGGATCAGCGGATGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1869:1890#0/1 +TGCTTCTTGGGTATTTAGGTTTGACACAGCTTCACAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1898:1925#0/1 +TCCCATGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:1765:1950#0/1 +AACTCATATCAACCTTCGCCGGAAATAATGGCTTTCC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2096:1773#0/1 +ATAGCACCGATAACAATCTACTCTCTTTAAAAGAAAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2186:1774#0/1 +TTGAGAAAATAAACGAGGAGGTGGCTCATCCTGAGAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2019:1774#0/1 +AAGACGATCAGATACCGTCCTAGTCTCAACCATAAAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2236:1797#0/1 +GTGTAGTCTGGAGAAGCGTCCTCAGCGACGGACCGGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2218:1825#0/1 +ACCTATCTCACTCTAAAATCTCTCTCTGCCAATCTCA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2067:1830#0/1 +ACATGTCTGTGAAGATGCGGACTACCTGCACCTGGAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2183:1833#0/1 +AGGGCGCGAGCCCGGGCGGAGCGGCCGTCGGTGCAGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2095:1843#0/1 +GACACAACACACCTCAAGCTTTATAACTTCTAAAACA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2184:1923#0/1 +AAGAGTCGGGGGAAACCCGTCTGATAGCGCTTAAGCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2086:1933#0/1 +AAAAACGACTCTCGGCAACGGATATCTCGGCTCTCGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2134:1944#0/1 +AGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2220:1966#0/1 +AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2066:1971#0/1 +TGAAGCATTTTCCAAAGAGAAAGAGAGAGAAATGGGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2239:1991#0/1 +ATTATCCATTGGAGGGCAAGTCTGGTGCCAGCAGCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2113:1996#0/1 +TTTGGTGTGCATTGGTCGGCTTGTCCCTTCGGTCGGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2369:1755#0/1 +TGATCCTTTCGTCTTTTTCTGACTCTTCAATCTCTCC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2281:1791#0/1 +TTAATTCCGGAACCGGGACGTGGCGGTTGACGGCAAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2431:1837#0/1 +AAAATTTCATCGTGATGGGGATAGATCATTGCAATTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2272:1843#0/1 +GGGGATCCGGTTAAAATTCCGGAACCGGGACGTGGCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2316:1843#0/1 +TGGCTTGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2451:1850#0/1 +TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2264:1872#0/1 +ATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2443:1872#0/1 +ACAGGTCTGTGATGCCCTTAGATGTTCTGGGTCGCAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2297:1890#0/1 +ATAACATCATAGGATTTCGATCCTATTGTGTTGGCCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2472:1986#0/1 +ACTACCACTCTCCACCTCCTCCGGCGAAGTCCCCACC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2564:1768#0/1 +GGGGCCTGATAGGCGGTGGTTTACCCTGTGGCGGATG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2674:1786#0/1 +TAATAACAGGTCTGTGATGCCCTTAGATGTTCTGGGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2648:1802#0/1 +GCGATAATACAGAGGATGCAAGCGTTATCCGGAATGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2691:1831#0/1 +CAGTATAATCAATCAGAAAACAAGTAGAAACTTTAAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2560:1839#0/1 +TCGAAGACGATCAGATACCGTCCTAGTCTCGACCATA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2612:1876#0/1 +TCTTGGGCCTTTGCCACCAACTTTGTTCCCGGAAAGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2503:1919#0/1 +TCAGACGAGGAAAGGCTTACGGTGGATACCTAGGCAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2640:1953#0/1 +CACCGGGAGAAAATCCTCCTCCGCTGTCGTGAGAGCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2553:1969#0/1 +GATCATCACCTTCCCCACTAATACTCTATAGTTTGTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2669:1987#0/1 +GGATCGTGAGACTCCGATGGTTGTGGACAACATGTAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2782:1800#0/1 +GACATTTTCTGTTTCCGTCTACAAGAACCACTTTGTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2957:1811#0/1 +ATTAATTCACATTTAAACACTTCTCTGCATATATTTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2990:1818#0/1 +TATTTTACTTACTCCGTGAATCGGAGGCGGGGTACAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2806:1852#0/1 +TGAAAATAAGCGTAGATCCGGAGATTCCCGAATAGGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2899:1856#0/1 +ACTGATGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2903:1882#0/1 +AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2913:1931#0/1 +TTGAGGCAATAACAGGTCTGTGATGCCCTTAGATGTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2816:1932#0/1 +GCATCAGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2795:1958#0/1 +TCTTTCTTGATTCTATGGGTGGTGGTGCATGGCCGTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:2993:1984#0/1 +GACGATCAGATACCGTCCTAGTCTCAACCATAAACGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3163:1754#0/1 +AGGCAAGAGACAACCTGGCGAACTGAAACATCTTAGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3082:1793#0/1 +AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3054:1825#0/1 +TAGATGTTGAGACTCTAATCCCTAACCACAATGACTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3100:1835#0/1 +CTATCTTAAAACTTCTTACCTAAAATAGAAATTTGAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3199:1865#0/1 +TTTCTTAAAAATTGGATTTTGTGTTGGGTTTTTCTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3032:1907#0/1 +AGAGAGAAGAAGTAGGCAGACAAAGAAGAAGAAGAAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3232:1919#0/1 +ATCACAAGCAAACAAGAGAAAACATTTTATTGTTATT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3075:1937#0/1 +ACAGACTGTGAAACTGCGAATGGCTCATTAAATCAGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3005:1941#0/1 +TTTTAAGTCCGCCGTCAAATCCCAGGGCTCAACCCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3026:1990#0/1 +AAGAAAGAGATTCCCCCGCCGGTTCCGGTTTACGATC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3131:1992#0/1 +GGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3319:1777#0/1 +GCGACCCCAGGTCAGGCGGGATTACCCGCTGAGTTTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3337:1789#0/1 +GACGCGGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3416:1846#0/1 +AACAGGTCTGTGATGCCCTTAGATGTTCTGGGCCGCA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3292:1846#0/1 +AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3491:1888#0/1 +ATCTTTTCGAAGAGCGGAAGCTAAGGAGAGGCGAATT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3266:1903#0/1 +TATAGAAATGGCCAAAGACGTGGAAGGACCTGAGGGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3339:1904#0/1 +GTGAAGACACCAGAGACGCCTAGTTTGGTGGGAAAGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3435:1918#0/1 +AGCGTCCTCAGCGACGGACCGGGCCTAAGTTCCCTGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3478:1938#0/1 +CCGGAGAATAATATCCGATCTGCTAGTGCGGTTAATA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3381:1938#0/1 +ATCAGCGACTCTCCCACTCGCTCGTGTCGTCGAAGTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3311:1972#0/1 +ACATTCATCCCAAAACATTACAAAACGTTACAATGGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3471:1996#0/1 +CCCATGTCCACCTAAATACAGTCCTCCTGTGGAGGTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3408:1998#0/1 +AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3714:1760#0/1 +TGTGCTGGCGACGCATCATTCAAATTTCTGCCCTATC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3734:1790#0/1 +ATACAGCAGCTGAAGCTGTGTATCGGAAAGCTCAATT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3699:1795#0/1 +ACAAAAGACAAGTTTTTAAACTGCAGAACCGCATTTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3651:1798#0/1 +TCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3557:1799#0/1 +TGATAGTGCTTAAGCGCGAACTTCGAAAGGGGATCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3531:1811#0/1 +TACAGACACACACGTGGCTCATCACCTGTTCTCGACA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3526:1856#0/1 +TCTTTGAAGAAATTAGAGTGCTCAAAGCAAGCCTACG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3693:1860#0/1 +AAAACAACTCTGCTTCACTCTCTATCTTTCTTAAGTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3616:1916#0/1 +AGCTTGAAAATCGGACGTCTTCGGCGTTCGAATTGTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3599:1930#0/1 +AGACGATCAGATACCGTCCTAGTCTCAACCATAAACG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3620:1956#0/1 +GAATCTCTCTGTGTTTTTTCTATCTCTCTCTTTCTTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3596:1973#0/1 +TGTAAGAGCTAGGCAGCAGGGATTATGTGTACGCAAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3721:1974#0/1 +AAGCCTGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3507:1998#0/1 +ACAAGGTGCTGGTGCTGGAGGAGGATATGGAGGTGGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3756:1769#0/1 +ACGCCCTCGACCTATTCTCAAACTTTAAATAGGTAGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3901:1846#0/1 +TACAAGTCTCCTCCGCCACCAACTCCGACATATGTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3892:1871#0/1 +ATCACCGCCTCCATATTCTTTCGCGTCTTTTGCTTCC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3765:1925#0/1 +AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3842:1953#0/1 +TCGAGCATTTTTGACGCCAAGGCTGGAATTGCATTCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3758:1970#0/1 +AACCCCGACTTATGGAAGGGACGCATTTATTAGATAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3931:1972#0/1 +CTCCGTGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:3828:1991#0/1 +CCTTCGGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4152:1771#0/1 +TCCTGAGATTTTTTATATATTTTCTCCAGATCTGCTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4212:1780#0/1 +GACTCAACCAATTTCTTCTCAGGTAATACTCGTAAGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4130:1806#0/1 +TTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4183:1813#0/1 +AAGGCACGTGTCGTTGGCTAAGTCCGTTCGGCGGAAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4010:1857#0/1 +ACTCTGCTTCACTCTCTATCTTTCTTAAGTAAACAAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4206:1859#0/1 +TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4051:1895#0/1 +TCAAGCGTTATCCGGAATGATTGGGCGTAAAGCGTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4226:1939#0/1 +TGATCATCACTTTCACAATCTTCTTCATCGATTTCTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4381:1809#0/1 +GCAGCATCAGCAACAACAGAGCAGTCCTGGCTTTCTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4456:1814#0/1 +TTCATGGACGTTGATAAGATCTTTCCATTTAGCAGCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4311:1831#0/1 +AAGAGTTATCTTTTCTGTTTAACAGCCTGCCCACCCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4464:1844#0/1 +TACGCTTGGGCCTTCGTCGCCAAGACTTCTCAAGTTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4254:1850#0/1 +AAAAGGAGGTAGGGGTGCAGAGACAGCCAGGAGGTTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4286:1883#0/1 +ATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4465:1887#0/1 +AACACGGACCAAGGAGTCTGACATGTGTGCGAGTCAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4329:1895#0/1 +CCGTTCGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4379:1906#0/1 +ACTACTCTCTCCACTAAACAAAAACACTAGAGTTAAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4486:1907#0/1 +TAAGAGCCAAAGGTTGAGAATGTGACTCTTGGACCAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4456:1996#0/1 +GAGGATGCGAAAAGATACATCCGGCAACTTCCCAACT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4674:1782#0/1 +TTTATGACTGAACGCCTCTAAGTCAGAATCCGGGCTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4634:1791#0/1 +GAATCACTTCACTCTCTCTAATCAAAAAGCTTTTAAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4501:1806#0/1 +TCGAAAGTTGGGGGCTCGAAGACGATCAGATACCGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4629:1820#0/1 +TGATCGTTCTTATTGACCCTAGCCGCTACACACTTTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4590:1838#0/1 +ATTCATTTCAATCAATCTTCTTCTTCTTCTTCTTCTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4741:1847#0/1 +AACGACTCTCGGCAACGGATATCTCGGCTCTCGCATC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4708:1892#0/1 +TTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4508:1902#0/1 +TTAGCATGGGATAACATCATAGGATTTCGATCCTATT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4691:1903#0/1 +AGTGAAATACCACTACTTTTTACGTTATTTTACTTAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4648:1966#0/1 +AAATTTCAAAATCAGATCCAACAAATCTTCTTCTTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4963:1798#0/1 +AGACGTCGGCGGGGGCCTCGGAAAGAGTTATCTTTTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4766:1882#0/1 +CTGTCGCACCACGAGGCGCTGTCTACGAGTCGGGTTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4860:1888#0/1 +ATACTCGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4947:1899#0/1 +CATCAATACTCCTGTGAGAACAAAATGAAGCTTTCTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4982:1926#0/1 +AGACTCGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4879:1942#0/1 +TCGAGTCAGGTAATTGGAATGAGTACAATCTAAATCC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:4914:1944#0/1 +GTGCAAAGGTTTCCTCGGGCCGGACGGAGATTGGCCC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5091:1754#0/1 +TTTATGTTTCTCATTATTACTGCGGGAATTTCAATTA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5145:1756#0/1 +TATCGGTAGGGGAGCGTTCCGCCTTAGGGGGAAGCAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5015:1781#0/1 +TTTTCATTAATCAAGAACGAAAGTTGGGGGCTCGAAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5166:1833#0/1 +TTTATGAAAGACGAACAACTGCGAAAGCATTTGCCAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5035:1836#0/1 +TGGCGACGCATCATTCAAATTTCTGCCCTATCAACTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5195:1862#0/1 +TTCACCCATTTCTTGGCTTACAACAACAAATCTTAAA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5062:1871#0/1 +TTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5235:1878#0/1 +ACGGAAATGTCTTCATGGGTTCTTCCGAACATGTTCC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5231:1897#0/1 +GGAACGAACACACCACCTTCTCTCCAATCTGGATCTG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5168:1901#0/1 +ACTCACCTGCCGAATCAACTAGCCCCGAAAATGGATG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5220:1915#0/1 +CCACCTCCGGTTCCGGTTTACAAGCCCCCGCCAAAGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5013:1950#0/1 +ACTTTCAATGAGCCATGGGTTTTCTCGCACGCTGGCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5175:1985#0/1 +CGTGAAATACCACTACTTTTAACGTTATTTTACTTAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5145:1985#0/1 +ATTTAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGAAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5453:1770#0/1 +CATCAAAACCCAGACGCTGTATGGTTATGGGGAATGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5412:1781#0/1 +ATGTCGGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5412:1816#0/1 +TAGGACTCCGCTGGCACCTTATGAGAAATCAAAGTTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5496:1822#0/1 +AAAAACCTAAAACAAAAAAAATCTCTTTCCTTCTTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5346:1823#0/1 +AAGCCACCACCAAAGGTGGAGCTTCCACCGCCTATTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5333:1831#0/1 +TCGAGTCTGGTAATTGGAATGAGTACAATCTAAATCC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5305:1833#0/1 +AGAACGAAAGTTGGGGGCTCGAAGACGATCAGATACC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5371:1867#0/1 +GTAGCGTCCTCAGCAACGGACCGGGCCTAAGTTCCCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5374:1893#0/1 +ATGTTTTCATTAATCAAGAACGAAAGTTGGGGGCTCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5377:1943#0/1 +TGTTCGGGAAACATTGACGACAAAGGAAAGTTTGGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5313:1973#0/1 +TGTATACAAGTCTCCTCCGCCACCAACTCCGACATAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5620:1756#0/1 +AAAAACTTTCTCTCAATTCTCTCTACCGTGATCAAGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5716:1779#0/1 +TCCCTTCACGGCCGGCTTCTTAGAGGGACTATGGCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5667:1857#0/1 +GTATTCAACGAGTTCACACCTTGGCCGACAGGCCCGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5654:1887#0/1 +GTAACGGGTGACGGAGAATTAGGGTTCGATTCCGGAG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5636:1908#0/1 +TTCTCGCGCTTGTACGGCTTTGGCTCGGATTCGTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5715:1927#0/1 +CCGTCGGGGCAAGAGCCAGGCCTCGATGAGTAGGAGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5610:1936#0/1 +GAAGAAGCAATGACGGTATCTGGGGAATAAGCATCGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5832:1784#0/1 +GCCCGGATCGGAAGAGCACACGTCTGAACTCCAGTCA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5917:1794#0/1 +AGTCGCGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5769:1883#0/1 +AGCAAAGTTTTATGTAATCAAATCGTACAGTGAAGAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5796:1889#0/1 +TTGTGCTGGCGACGCATCATTCAAATTTCTGCCCTAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5992:1905#0/1 +ATGTGTGTATTAATTTATCTTCTTGTTTTAAAGAGAC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5885:1923#0/1 +AAGAAGAGCCAATGGCGATGAAGACATCACATGTTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5976:1937#0/1 +AGCGCGAACTTCGAAAGGGGATCCGGTTAAAATTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:5957:1978#0/1 +TTAGTTGAACCTTGGGATGGGTCGGCCGGTCCGCCTT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6197:1781#0/1 +ACTTAGGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6180:1789#0/1 +ATTTTGGCTGGTGCTGAATACGGTAGTGGAAGTTCTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6038:1789#0/1 +TACCTGGTTGATCCTGCCAGTAGTCATATGCTTGTCT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6177:1825#0/1 +TTGATCCCGAGACTGAAGCTATGACCAGGAGAATTGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6159:1830#0/1 +CGGGCCGATCGGAAGAGCACACGTCTGAACTCCAGTC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6151:1878#0/1 +ATACGGTGTATGAATCCGAATTACACAGAGTTCAAGT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6123:1878#0/1 +AGCGCTAACTTCGAAAGGGGATCCGGTTAAAATTCCG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6196:1879#0/1 +ATTTCTGCCCTATCAACTTTCGATGGTAGGATAGTGG +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6237:1895#0/1 +AAGATCCCAGACGAAATGGCTCAGAAAGTGGTGCTGA +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6023:1896#0/1 +ACAACCGCTGAATATTTGGCTTATGAATGTGGAAAGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6123:1904#0/1 +ATGATAACTCGACGGATCGCATGGCCTCTGTGCTGGC +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6125:1922#0/1 +ATGGATCGTTGCACGTCGTCGTTTTTGCTTCCCACAT +>DGM97JN1_120925_0255_AD166MACXX:4:1101:6184:1926#0/1 +GATCATCAAAAAACACCTCAAAGAATTATTCATTCAG diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/seq_track.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/seq_track.py Tue Nov 18 16:24:04 2014 -0500 @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * +from Bio import SeqIO + +unmap_file = sys.argv[1] +reads_file = sys.argv[2] +result_file = sys.argv[3] +tp = sys.argv[4] + + +unmap = read_t_file(unmap_file); + +h = file(result_file, 'w') + +reads = SeqIO.parse(reads_file,tp) +um = set() +for i in range(0, len(unmap)): + id_r = unmap[i][0] + um.add(id_r) + +for read in reads: + if read.id in um: + h.write('>') + h.write(read.id) + h.write('\n') + h.write(read.seq.tostring()) + h.write('\n') + + + +h.close() + + + + diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/truncate.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/truncate.py Tue Nov 18 16:24:04 2014 -0500 @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from Bio import SeqIO + +fasta_file = sys.argv[1] +shift_in = sys.argv[2] +result_file = sys.argv[3] +length = sys.argv[4] +t_end = sys.argv[5] + +shift = int(shift_in) + +fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); +h = file(result_file,'w') +for seq in fasta_sequences: + nuc = seq.id; + sequence = seq.seq.tostring(); + if (len(sequence)-shift)>=int(length): + h.write('>'+nuc) + h.write('\n') + if t_end == 'three_end': + h.write(sequence[0:(len(sequence)-shift)]) + if t_end == 'five_end': + h.write(sequence[(shift):(len(sequence))]) + h.write('\n') + + + + +h.close() + + + + diff -r 36d912d5b1ac -r d2817a631a7b Iterative_mapping/unmap.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/unmap.py Tue Nov 18 16:24:04 2014 -0500 @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * +from Bio import SeqIO + +map_file = sys.argv[1] +result_file = sys.argv[2] + + +#reads = read_t_file(read_file); + +f = open(map_file); +h = file(result_file, 'w') + +for aline in f.readlines(): + tline = aline.strip(); + tl = tline.split('\t'); + if len(tl)>4: + if int(tl[1].strip()) != 0: + h.write(tl[0].strip()); + h.write('\n'); + + +f.close(); +h.close() + + + +