annotate Iterative_mapping/iterative_map.py @ 3:f4cc06e92530 draft

Uploaded
author tyty
date Mon, 15 Sep 2014 14:52:20 -0400
parents d56631911cc1
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d56631911cc1 Uploaded
tyty
parents:
diff changeset
1 #!/usr/bin/env python
d56631911cc1 Uploaded
tyty
parents:
diff changeset
2 # -*- coding: utf-8 -*-
d56631911cc1 Uploaded
tyty
parents:
diff changeset
3
d56631911cc1 Uploaded
tyty
parents:
diff changeset
4 import sys
d56631911cc1 Uploaded
tyty
parents:
diff changeset
5 import os
d56631911cc1 Uploaded
tyty
parents:
diff changeset
6 from read_file import *
d56631911cc1 Uploaded
tyty
parents:
diff changeset
7 from read_s_file import *
d56631911cc1 Uploaded
tyty
parents:
diff changeset
8
d56631911cc1 Uploaded
tyty
parents:
diff changeset
9 type_input = sys.argv[1]
d56631911cc1 Uploaded
tyty
parents:
diff changeset
10 seq_file = sys.argv[2]
d56631911cc1 Uploaded
tyty
parents:
diff changeset
11 ref_file = sys.argv[3]
d56631911cc1 Uploaded
tyty
parents:
diff changeset
12 shift = sys.argv[4]
d56631911cc1 Uploaded
tyty
parents:
diff changeset
13 length = sys.argv[5]
d56631911cc1 Uploaded
tyty
parents:
diff changeset
14 map_type = sys.argv[6]
d56631911cc1 Uploaded
tyty
parents:
diff changeset
15 output_file = sys.argv[7]
d56631911cc1 Uploaded
tyty
parents:
diff changeset
16
d56631911cc1 Uploaded
tyty
parents:
diff changeset
17
d56631911cc1 Uploaded
tyty
parents:
diff changeset
18 if map_type!="default":
d56631911cc1 Uploaded
tyty
parents:
diff changeset
19 s = ""
d56631911cc1 Uploaded
tyty
parents:
diff changeset
20 s = s+"-v "+sys.argv[8]
d56631911cc1 Uploaded
tyty
parents:
diff changeset
21 s = s+" -5 "+sys.argv[9]
d56631911cc1 Uploaded
tyty
parents:
diff changeset
22 s = s+" -3 "+sys.argv[10]
d56631911cc1 Uploaded
tyty
parents:
diff changeset
23 s = s+" -k "+sys.argv[11]
d56631911cc1 Uploaded
tyty
parents:
diff changeset
24 if sys.argv[12]:
d56631911cc1 Uploaded
tyty
parents:
diff changeset
25 s = s+" -a"
d56631911cc1 Uploaded
tyty
parents:
diff changeset
26 if int(sys.argv[13])>=1:
d56631911cc1 Uploaded
tyty
parents:
diff changeset
27 s = s+" -m "+sys.argv[13]
d56631911cc1 Uploaded
tyty
parents:
diff changeset
28 if sys.argv[14]:
d56631911cc1 Uploaded
tyty
parents:
diff changeset
29 s = s+" --best --strata "
d56631911cc1 Uploaded
tyty
parents:
diff changeset
30
d56631911cc1 Uploaded
tyty
parents:
diff changeset
31 else:
d56631911cc1 Uploaded
tyty
parents:
diff changeset
32 s = "-v 3 -a --best --strata "
d56631911cc1 Uploaded
tyty
parents:
diff changeset
33
d56631911cc1 Uploaded
tyty
parents:
diff changeset
34 ospath = os.path.realpath(sys.argv[0])
d56631911cc1 Uploaded
tyty
parents:
diff changeset
35 ost = ospath.split('/')
d56631911cc1 Uploaded
tyty
parents:
diff changeset
36 syspath = ""
d56631911cc1 Uploaded
tyty
parents:
diff changeset
37 for i in range(len(ost)-1):
d56631911cc1 Uploaded
tyty
parents:
diff changeset
38 syspath = syspath+ost[i].strip()
d56631911cc1 Uploaded
tyty
parents:
diff changeset
39 syspath = syspath+'/'
d56631911cc1 Uploaded
tyty
parents:
diff changeset
40
d56631911cc1 Uploaded
tyty
parents:
diff changeset
41 os.system("bowtie-build -f "+ref_file+" "+syspath+"ref > "+syspath+"log.txt")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
42
d56631911cc1 Uploaded
tyty
parents:
diff changeset
43 os.system("cp "+seq_file+" "+syspath+"seq0.fa")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
44
d56631911cc1 Uploaded
tyty
parents:
diff changeset
45 if type_input == "fasta":
d56631911cc1 Uploaded
tyty
parents:
diff changeset
46 tp = 'fasta'
d56631911cc1 Uploaded
tyty
parents:
diff changeset
47 if type_input == "fastq":
d56631911cc1 Uploaded
tyty
parents:
diff changeset
48 tp = 'fastq'
d56631911cc1 Uploaded
tyty
parents:
diff changeset
49
d56631911cc1 Uploaded
tyty
parents:
diff changeset
50 k = 0
d56631911cc1 Uploaded
tyty
parents:
diff changeset
51 print(type_input)
d56631911cc1 Uploaded
tyty
parents:
diff changeset
52 while(True):
d56631911cc1 Uploaded
tyty
parents:
diff changeset
53 if type_input == "fasta":
d56631911cc1 Uploaded
tyty
parents:
diff changeset
54 os.system("bowtie "+s+"-f "+syspath+"ref"+" "+syspath+"seq"+str(k)+".fa --quiet -S > "+syspath+"map"+str(k)+".sam")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
55 if type_input == "fastq":
d56631911cc1 Uploaded
tyty
parents:
diff changeset
56 os.system("bowtie "+s+"-q "+syspath+"ref"+" "+syspath+"seq"+str(k)+".fa --quiet -S > "+syspath+"map"+str(k)+".sam")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
57 os.system("samtools view -Sb -F 0xfff "+syspath+"map"+str(k)+".sam > "+syspath+"mapped"+str(k)+".bam 2>"+syspath+"log.txt") #get mapped reads
d56631911cc1 Uploaded
tyty
parents:
diff changeset
58 os.system("samtools view -Sb -f 0x4 "+syspath+"map"+str(k)+".sam > "+syspath+"umapped"+str(k)+".bam 2>"+syspath+"log.txt") #get unmapped reads
d56631911cc1 Uploaded
tyty
parents:
diff changeset
59 os.system("samtools view -Sb -f 0x10 "+syspath+"map"+str(k)+".sam > "+syspath+"rmapped"+str(k)+".bam 2>"+syspath+"log.txt") #get reversed mapped reads
d56631911cc1 Uploaded
tyty
parents:
diff changeset
60 os.system("samtools merge -f "+syspath+"unmapped"+str(k)+".bam "+syspath+"umapped"+str(k)+".bam "+syspath+"rmapped"+str(k)+".bam") #get reversed mapped reads
d56631911cc1 Uploaded
tyty
parents:
diff changeset
61 os.system("samtools view -h -o "+syspath+"unmapped"+str(k)+".sam "+syspath+"unmapped"+str(k)+".bam") #get reversed mapped reads
d56631911cc1 Uploaded
tyty
parents:
diff changeset
62 if k>0:
d56631911cc1 Uploaded
tyty
parents:
diff changeset
63 os.system("samtools view -h -o "+syspath+"mapped"+str(k)+".sam "+syspath+"mapped"+str(k)+".bam") #get reversed mapped reads
d56631911cc1 Uploaded
tyty
parents:
diff changeset
64 os.system("cut -f 1 "+syspath+"unmapped"+str(k)+".sam > "+syspath+"unmapped"+str(k)+".txt")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
65 os.system("cut -f 1 "+syspath+"mapped"+str(k)+".sam > "+syspath+"mapped"+str(k)+".txt")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
66 os.system("python "+syspath+"remove_map.py "+syspath+"unmapped"+str(k)+".txt "+syspath+"mapped"+str(k)+".txt "+syspath+"runmapped"+str(k)+".txt")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
67 os.system("rm "+syspath+"mapped"+str(k)+".sam")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
68 os.system("rm "+syspath+"mapped"+str(k)+".txt")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
69 os.system("rm "+syspath+"unmapped"+str(k)+".txt")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
70 else:
d56631911cc1 Uploaded
tyty
parents:
diff changeset
71 os.system("cut -f 1 "+syspath+"unmapped"+str(k)+".sam > "+syspath+"runmapped"+str(k)+".txt")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
72
d56631911cc1 Uploaded
tyty
parents:
diff changeset
73 os.system("rm "+syspath+"unmapped"+str(k)+".bam")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
74 os.system("rm "+syspath+"umapped"+str(k)+".bam")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
75 os.system("rm "+syspath+"rmapped"+str(k)+".bam")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
76 os.system("python "+syspath+"seq_track.py "+syspath+"runmapped"+str(k)+".txt "+syspath+"seq"+str(k)+".fa "+syspath+"unmap_seq"+str(k)+".fa "+tp) #get unmapped sequence
d56631911cc1 Uploaded
tyty
parents:
diff changeset
77 os.system("python "+syspath+"truncate.py "+syspath+"unmap_seq"+str(k)+".fa "+shift+" "+syspath+"seq"+str(k+1)+".fa "+length) #truncate unmapped sequence
d56631911cc1 Uploaded
tyty
parents:
diff changeset
78 os.system("rm "+syspath+"seq"+str(k)+".fa") #Remove sequences being mapped
d56631911cc1 Uploaded
tyty
parents:
diff changeset
79 os.system("rm "+syspath+"map"+str(k)+".sam") #Remove mapping file
d56631911cc1 Uploaded
tyty
parents:
diff changeset
80 os.system("rm "+syspath+"unmap_seq"+str(k)+".fa") #Remove unmapped sequnce
d56631911cc1 Uploaded
tyty
parents:
diff changeset
81 os.system("rm "+syspath+"runmapped"+str(k)+".txt")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
82 os.system("rm "+syspath+"unmapped"+str(k)+".sam")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
83
d56631911cc1 Uploaded
tyty
parents:
diff changeset
84 os.system("wc -l "+syspath+"seq"+str(k+1)+".fa > "+syspath+"count"+str(k+1)+".txt")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
85 c = read_sp_file(syspath+"count"+str(k+1)+".txt")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
86 if c[0][0] == '0': #If no reads is in the sequence file, stop
d56631911cc1 Uploaded
tyty
parents:
diff changeset
87 os.system("rm "+syspath+"count"+str(k+1)+".txt")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
88 os.system("rm "+syspath+"seq"+str(k+1)+".fa")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
89 break
d56631911cc1 Uploaded
tyty
parents:
diff changeset
90 os.system("rm "+syspath+"count"+str(k+1)+".txt")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
91 k = k+1
d56631911cc1 Uploaded
tyty
parents:
diff changeset
92
d56631911cc1 Uploaded
tyty
parents:
diff changeset
93 ss = ""
d56631911cc1 Uploaded
tyty
parents:
diff changeset
94 for i in range(0,k+1):
d56631911cc1 Uploaded
tyty
parents:
diff changeset
95 ss = ss+" "+syspath+"mapped"+str(i)+".bam"
d56631911cc1 Uploaded
tyty
parents:
diff changeset
96
d56631911cc1 Uploaded
tyty
parents:
diff changeset
97
d56631911cc1 Uploaded
tyty
parents:
diff changeset
98 os.system("samtools merge -f "+output_file+" "+ss)
d56631911cc1 Uploaded
tyty
parents:
diff changeset
99 #print("samtools merge mapped_all.bam"+ss)
d56631911cc1 Uploaded
tyty
parents:
diff changeset
100 os.system("rm "+syspath+"mapped*.bam")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
101 os.system("rm "+syspath+"ref*")
d56631911cc1 Uploaded
tyty
parents:
diff changeset
102
d56631911cc1 Uploaded
tyty
parents:
diff changeset
103