# HG changeset patch # User tyty # Date 1410806473 14400 # Node ID d56631911cc16db2c852e30f8db287717eb1897a Uploaded diff -r 000000000000 -r d56631911cc1 Iterative_mapping/.DS_Store Binary file Iterative_mapping/.DS_Store has changed diff -r 000000000000 -r d56631911cc1 Iterative_mapping/iterative_map.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/iterative_map.py Mon Sep 15 14:41:13 2014 -0400 @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +import os +from read_file import * +from read_s_file import * + +type_input = sys.argv[1] +seq_file = sys.argv[2] +ref_file = sys.argv[3] +shift = sys.argv[4] +length = sys.argv[5] +map_type = sys.argv[6] +output_file = sys.argv[7] + + +if map_type!="default": + s = "" + s = s+"-v "+sys.argv[8] + s = s+" -5 "+sys.argv[9] + s = s+" -3 "+sys.argv[10] + s = s+" -k "+sys.argv[11] + if sys.argv[12]: + s = s+" -a" + if int(sys.argv[13])>=1: + s = s+" -m "+sys.argv[13] + if sys.argv[14]: + s = s+" --best --strata " + +else: + s = "-v 3 -a --best --strata " + +ospath = os.path.realpath(sys.argv[0]) +ost = ospath.split('/') +syspath = "" +for i in range(len(ost)-1): + syspath = syspath+ost[i].strip() + syspath = syspath+'/' + +os.system("bowtie-build -f "+ref_file+" "+syspath+"ref > "+syspath+"log.txt") + +os.system("cp "+seq_file+" "+syspath+"seq0.fa") + +if type_input == "fasta": + tp = 'fasta' +if type_input == "fastq": + tp = 'fastq' + +k = 0 +print(type_input) +while(True): + if type_input == "fasta": + os.system("bowtie "+s+"-f "+syspath+"ref"+" "+syspath+"seq"+str(k)+".fa --quiet -S > "+syspath+"map"+str(k)+".sam") + if type_input == "fastq": + os.system("bowtie "+s+"-q "+syspath+"ref"+" "+syspath+"seq"+str(k)+".fa --quiet -S > "+syspath+"map"+str(k)+".sam") + os.system("samtools view -Sb -F 0xfff "+syspath+"map"+str(k)+".sam > "+syspath+"mapped"+str(k)+".bam 2>"+syspath+"log.txt") #get mapped reads + os.system("samtools view -Sb -f 0x4 "+syspath+"map"+str(k)+".sam > "+syspath+"umapped"+str(k)+".bam 2>"+syspath+"log.txt") #get unmapped reads + os.system("samtools view -Sb -f 0x10 "+syspath+"map"+str(k)+".sam > "+syspath+"rmapped"+str(k)+".bam 2>"+syspath+"log.txt") #get reversed mapped reads + os.system("samtools merge -f "+syspath+"unmapped"+str(k)+".bam "+syspath+"umapped"+str(k)+".bam "+syspath+"rmapped"+str(k)+".bam") #get reversed mapped reads + os.system("samtools view -h -o "+syspath+"unmapped"+str(k)+".sam "+syspath+"unmapped"+str(k)+".bam") #get reversed mapped reads + if k>0: + os.system("samtools view -h -o "+syspath+"mapped"+str(k)+".sam "+syspath+"mapped"+str(k)+".bam") #get reversed mapped reads + os.system("cut -f 1 "+syspath+"unmapped"+str(k)+".sam > "+syspath+"unmapped"+str(k)+".txt") + os.system("cut -f 1 "+syspath+"mapped"+str(k)+".sam > "+syspath+"mapped"+str(k)+".txt") + os.system("python "+syspath+"remove_map.py "+syspath+"unmapped"+str(k)+".txt "+syspath+"mapped"+str(k)+".txt "+syspath+"runmapped"+str(k)+".txt") + os.system("rm "+syspath+"mapped"+str(k)+".sam") + os.system("rm "+syspath+"mapped"+str(k)+".txt") + os.system("rm "+syspath+"unmapped"+str(k)+".txt") + else: + os.system("cut -f 1 "+syspath+"unmapped"+str(k)+".sam > "+syspath+"runmapped"+str(k)+".txt") + + os.system("rm "+syspath+"unmapped"+str(k)+".bam") + os.system("rm "+syspath+"umapped"+str(k)+".bam") + os.system("rm "+syspath+"rmapped"+str(k)+".bam") + os.system("python "+syspath+"seq_track.py "+syspath+"runmapped"+str(k)+".txt "+syspath+"seq"+str(k)+".fa "+syspath+"unmap_seq"+str(k)+".fa "+tp) #get unmapped sequence + os.system("python "+syspath+"truncate.py "+syspath+"unmap_seq"+str(k)+".fa "+shift+" "+syspath+"seq"+str(k+1)+".fa "+length) #truncate unmapped sequence + os.system("rm "+syspath+"seq"+str(k)+".fa") #Remove sequences being mapped + os.system("rm "+syspath+"map"+str(k)+".sam") #Remove mapping file + os.system("rm "+syspath+"unmap_seq"+str(k)+".fa") #Remove unmapped sequnce + os.system("rm "+syspath+"runmapped"+str(k)+".txt") + os.system("rm "+syspath+"unmapped"+str(k)+".sam") + + os.system("wc -l "+syspath+"seq"+str(k+1)+".fa > "+syspath+"count"+str(k+1)+".txt") + c = read_sp_file(syspath+"count"+str(k+1)+".txt") + if c[0][0] == '0': #If no reads is in the sequence file, stop + os.system("rm "+syspath+"count"+str(k+1)+".txt") + os.system("rm "+syspath+"seq"+str(k+1)+".fa") + break + os.system("rm "+syspath+"count"+str(k+1)+".txt") + k = k+1 + +ss = "" +for i in range(0,k+1): + ss = ss+" "+syspath+"mapped"+str(i)+".bam" + + +os.system("samtools merge -f "+output_file+" "+ss) +#print("samtools merge mapped_all.bam"+ss) +os.system("rm "+syspath+"mapped*.bam") +os.system("rm "+syspath+"ref*") + + diff -r 000000000000 -r d56631911cc1 Iterative_mapping/iterative_map.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/iterative_map.xml Mon Sep 15 14:41:13 2014 -0400 @@ -0,0 +1,80 @@ + + + + #if $mapping_file.type == "user" + iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $mapping_file.type $output $mapping_file.param_v $mapping_file.param_five $mapping_file.param_three $mapping_file.param_k $mapping_file.param_a $mapping_file.param_m $mapping_file.param_best + #else + iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $mapping_file.type $output + #end if + + + biopython + numpy + samtools + bowtie + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**TIPS**: + +----- + +**Input**: + +* 1. Sequence file type (FASTA/FASTQ) +* 2. Sequence file (fasta/fastq format) {Default: fastq file} +* 3. Reference file (e.g. cDNA library [fasta]) +* 4. “Shift” (The length of the sequence that will be trimmed at the 3’end of the reads before each round of mapping) +* 5. “Length” (The minimum length of the reads for mapping after trimming) +* [Optional] +* 1. Bowtie mapping flags (options) [Default: -v 0 -a --best --strata] (-v flag indicates the number of allowed mismatches. use -5/-3 flag to trim nucleotides from 5'/3' end of the reads) + +----- + +**Output**: + +A bam file with all of the reads that are mapped + + + + + diff -r 000000000000 -r d56631911cc1 Iterative_mapping/map_ex.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/map_ex.py Mon Sep 15 14:41:13 2014 -0400 @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * +from Bio import SeqIO + +map_file = sys.argv[1] +result_file = sys.argv[2] + + +#reads = read_t_file(read_file); + +f = open(map_file); +h = file(result_file, 'w') + +for aline in f.readlines(): + tline = aline.strip(); + tl = tline.split('\t'); + if len(tl)>4: + if int(tl[1].strip())== 0: + h.write(tline) + h.write('\n') + + +f.close(); +h.close() + + + + diff -r 000000000000 -r d56631911cc1 Iterative_mapping/read_file.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/read_file.py Mon Sep 15 14:41:13 2014 -0400 @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + + + +def read_t_file(in_file): + f = open(in_file); + result = []; + for aline in f.readlines(): + temp = []; + tline = aline.strip(); + tl = tline.split('\t'); + for i in range(0, len(tl)): + temp.append(tl[i].strip()); + result.append(temp); + f.close(); + return result; + + diff -r 000000000000 -r d56631911cc1 Iterative_mapping/read_s_file.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/read_s_file.py Mon Sep 15 14:41:13 2014 -0400 @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + + + +def read_sp_file(in_file): + f = open(in_file); + result = []; + for aline in f.readlines(): + temp = []; + tline = aline.strip(); + tl = tline.split(' '); + for i in range(0, len(tl)): + if len(tl[i].strip())>0: + temp.append(tl[i].strip()); + result.append(temp); + f.close(); + return result; + + diff -r 000000000000 -r d56631911cc1 Iterative_mapping/remove_map.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/remove_map.py Mon Sep 15 14:41:13 2014 -0400 @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * + + +unmap_file = sys.argv[1] +map_file = sys.argv[2] +result_file = sys.argv[3] + + +unmap = read_t_file(unmap_file) +mapped = read_t_file(map_file) +h = file(result_file, 'w') + +maps = set() +for i in range(len(mapped)): + maps.add(mapped[i][0]) + + +for i in range(len(unmap)): + name = unmap[i][0] + if name not in maps: + h.write(name) + h.write('\n') + + +h.close() diff -r 000000000000 -r d56631911cc1 Iterative_mapping/seq_track.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/seq_track.py Mon Sep 15 14:41:13 2014 -0400 @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * +from Bio import SeqIO + +unmap_file = sys.argv[1] +reads_file = sys.argv[2] +result_file = sys.argv[3] +tp = sys.argv[4] + + +unmap = read_t_file(unmap_file); + +h = file(result_file, 'w') + +reads = SeqIO.parse(reads_file,tp) +um = set() +for i in range(0, len(unmap)): + id_r = unmap[i][0] + um.add(id_r) + +for read in reads: + if read.id in um: + h.write('>') + h.write(read.id) + h.write('\n') + h.write(read.seq.tostring()) + h.write('\n') + + + +h.close() + + + + diff -r 000000000000 -r d56631911cc1 Iterative_mapping/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/tool_dependencies.xml Mon Sep 15 14:41:13 2014 -0400 @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff -r 000000000000 -r d56631911cc1 Iterative_mapping/truncate.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/truncate.py Mon Sep 15 14:41:13 2014 -0400 @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from Bio import SeqIO + +fasta_file = sys.argv[1] +shift_in = sys.argv[2] +result_file = sys.argv[3] +length = sys.argv[4] + +shift = int(shift_in) + +fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); +h = file(result_file,'w') +for seq in fasta_sequences: + nuc = seq.id; + sequence = seq.seq.tostring(); + if (len(sequence)-shift)>=int(length): + h.write('>'+nuc) + h.write('\n') + h.write(sequence[0:(len(sequence)-shift)]) + h.write('\n') + + + + +h.close() + + + + diff -r 000000000000 -r d56631911cc1 Iterative_mapping/unmap.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/unmap.py Mon Sep 15 14:41:13 2014 -0400 @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * +from Bio import SeqIO + +map_file = sys.argv[1] +result_file = sys.argv[2] + + +#reads = read_t_file(read_file); + +f = open(map_file); +h = file(result_file, 'w') + +for aline in f.readlines(): + tline = aline.strip(); + tl = tline.split('\t'); + if len(tl)>4: + if int(tl[1].strip()) != 0: + h.write(tl[0].strip()); + h.write('\n'); + + +f.close(); +h.close() + + + +