| 0 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 import sys, tempfile, os | 
|  | 4 | 
|  | 5 assert sys.version_info[:2] >= (2.4) | 
|  | 6 | 
|  | 7 def stop_err(msg): | 
|  | 8     sys.stderr.write(msg) | 
|  | 9     sys.exit() | 
|  | 10 | 
|  | 11 def main(): | 
|  | 12 | 
|  | 13     out_fname = sys.argv[1] | 
|  | 14     in_fname = sys.argv[2] | 
|  | 15     chr_col = int(sys.argv[3])-1 | 
|  | 16     coord_col = int(sys.argv[4])-1 | 
|  | 17     track_type = sys.argv[5] | 
|  | 18     if track_type == 'coverage' or track_type == 'both': | 
|  | 19         coverage_col = int(sys.argv[6])-1 | 
|  | 20         cname = sys.argv[7] | 
|  | 21         cdescription = sys.argv[8] | 
|  | 22         ccolor = sys.argv[9].replace('-',',') | 
|  | 23         cvisibility = sys.argv[10] | 
|  | 24     if track_type == 'snp' or track_type == 'both': | 
|  | 25         if track_type == 'both': | 
|  | 26             j = 5 | 
|  | 27         else: | 
|  | 28             j = 0 | 
|  | 29         #sname = sys.argv[7+j] | 
|  | 30         sdescription = sys.argv[6+j] | 
|  | 31         svisibility = sys.argv[7+j] | 
|  | 32         #ref_col = int(sys.argv[10+j])-1 | 
|  | 33         read_col = int(sys.argv[8+j])-1 | 
|  | 34 | 
|  | 35 | 
|  | 36     # Sort the input file based on chromosome (alphabetically) and start co-ordinates (numerically) | 
|  | 37     sorted_infile = tempfile.NamedTemporaryFile() | 
|  | 38     try: | 
|  | 39         os.system("sort -k %d,%d -k %dn -o %s %s" %(chr_col+1,chr_col+1,coord_col+1,sorted_infile.name,in_fname)) | 
|  | 40     except Exception, exc: | 
|  | 41         stop_err( 'Initialization error -> %s' %str(exc) ) | 
|  | 42 | 
|  | 43     #generate chr list | 
|  | 44     sorted_infile.seek(0) | 
|  | 45     chr_vals = [] | 
|  | 46     for line in file( sorted_infile.name ): | 
|  | 47         line = line.strip() | 
|  | 48         if not(line): | 
|  | 49             continue | 
|  | 50         try: | 
|  | 51             fields = line.split('\t') | 
|  | 52             chr = fields[chr_col] | 
|  | 53             if chr not in chr_vals: | 
|  | 54                 chr_vals.append(chr) | 
|  | 55         except: | 
|  | 56             pass | 
|  | 57     if not(chr_vals): | 
|  | 58         stop_err("Skipped all lines as invalid.") | 
|  | 59 | 
|  | 60     if track_type == 'coverage' or track_type == 'both': | 
|  | 61         if track_type == 'coverage': | 
|  | 62             fout = open( out_fname, "w" ) | 
|  | 63         else: | 
|  | 64             fout = tempfile.NamedTemporaryFile() | 
|  | 65         fout.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \ | 
|  | 66                       % ( cname, cdescription, ccolor, cvisibility )) | 
|  | 67     if track_type == 'snp' or track_type == 'both': | 
|  | 68         fout_a = tempfile.NamedTemporaryFile() | 
|  | 69         fout_t = tempfile.NamedTemporaryFile() | 
|  | 70         fout_g = tempfile.NamedTemporaryFile() | 
|  | 71         fout_c = tempfile.NamedTemporaryFile() | 
|  | 72         fout_ref = tempfile.NamedTemporaryFile() | 
|  | 73 | 
|  | 74         fout_a.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \ | 
|  | 75                       % ( "Track A", sdescription, '255,0,0', svisibility )) | 
|  | 76         fout_t.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \ | 
|  | 77                       % ( "Track T", sdescription, '0,255,0', svisibility )) | 
|  | 78         fout_g.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \ | 
|  | 79                       % ( "Track G", sdescription, '0,0,255', svisibility )) | 
|  | 80         fout_c.write('''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\n''' \ | 
|  | 81                       % ( "Track C", sdescription, '255,0,255', svisibility )) | 
|  | 82 | 
|  | 83 | 
|  | 84     sorted_infile.seek(0) | 
|  | 85     for line in file( sorted_infile.name ): | 
|  | 86         line = line.strip() | 
|  | 87         if not(line): | 
|  | 88             continue | 
|  | 89         try: | 
|  | 90             fields = line.split('\t') | 
|  | 91             chr = fields[chr_col] | 
|  | 92             start = int(fields[coord_col]) | 
|  | 93             assert start > 0 | 
|  | 94         except: | 
|  | 95             continue | 
|  | 96         try: | 
|  | 97             ind = chr_vals.index(chr)    #encountered chr for the 1st time | 
|  | 98             del chr_vals[ind] | 
|  | 99             prev_start = '' | 
|  | 100             header = "variableStep chrom=%s\n" %(chr) | 
|  | 101             if track_type == 'coverage' or track_type == 'both': | 
|  | 102                 coverage = int(fields[coverage_col]) | 
|  | 103                 line1 = "%s\t%s\n" %(start,coverage) | 
|  | 104                 fout.write("%s%s" %(header,line1)) | 
|  | 105             if track_type == 'snp' or track_type == 'both': | 
|  | 106                 a = t = g = c = 0 | 
|  | 107                 fout_a.write("%s" %(header)) | 
|  | 108                 fout_t.write("%s" %(header)) | 
|  | 109                 fout_g.write("%s" %(header)) | 
|  | 110                 fout_c.write("%s" %(header)) | 
|  | 111                 try: | 
|  | 112                     #ref_nt = fields[ref_col].capitalize() | 
|  | 113                     read_nt = fields[read_col].capitalize() | 
|  | 114                     try: | 
|  | 115                         nt_ind = ['A','T','G','C'].index(read_nt) | 
|  | 116                         if nt_ind == 0: | 
|  | 117                             a+=1 | 
|  | 118                         elif nt_ind == 1: | 
|  | 119                             t+=1 | 
|  | 120                         elif nt_ind == 2: | 
|  | 121                             g+=1 | 
|  | 122                         else: | 
|  | 123                             c+=1 | 
|  | 124                     except ValueError: | 
|  | 125                         pass | 
|  | 126                 except: | 
|  | 127                     pass | 
|  | 128             prev_start = start | 
|  | 129         except ValueError: | 
|  | 130             if start != prev_start: | 
|  | 131                 if track_type == 'coverage' or track_type == 'both': | 
|  | 132                     coverage = int(fields[coverage_col]) | 
|  | 133                     fout.write("%s\t%s\n" %(start,coverage)) | 
|  | 134                 if track_type == 'snp' or track_type == 'both': | 
|  | 135                     if a: | 
|  | 136                         fout_a.write("%s\t%s\n" %(prev_start,a)) | 
|  | 137                     if t: | 
|  | 138                         fout_t.write("%s\t%s\n" %(prev_start,t)) | 
|  | 139                     if g: | 
|  | 140                         fout_g.write("%s\t%s\n" %(prev_start,g)) | 
|  | 141                     if c: | 
|  | 142                         fout_c.write("%s\t%s\n" %(prev_start,c)) | 
|  | 143                     a = t = g = c = 0 | 
|  | 144                     try: | 
|  | 145                         #ref_nt = fields[ref_col].capitalize() | 
|  | 146                         read_nt = fields[read_col].capitalize() | 
|  | 147                         try: | 
|  | 148                             nt_ind = ['A','T','G','C'].index(read_nt) | 
|  | 149                             if nt_ind == 0: | 
|  | 150                                 a+=1 | 
|  | 151                             elif nt_ind == 1: | 
|  | 152                                 t+=1 | 
|  | 153                             elif nt_ind == 2: | 
|  | 154                                 g+=1 | 
|  | 155                             else: | 
|  | 156                                 c+=1 | 
|  | 157                         except ValueError: | 
|  | 158                             pass | 
|  | 159                     except: | 
|  | 160                         pass | 
|  | 161                 prev_start = start | 
|  | 162             else: | 
|  | 163                 if track_type == 'snp' or track_type == 'both': | 
|  | 164                     try: | 
|  | 165                         #ref_nt = fields[ref_col].capitalize() | 
|  | 166                         read_nt = fields[read_col].capitalize() | 
|  | 167                         try: | 
|  | 168                             nt_ind = ['A','T','G','C'].index(read_nt) | 
|  | 169                             if nt_ind == 0: | 
|  | 170                                 a+=1 | 
|  | 171                             elif nt_ind == 1: | 
|  | 172                                 t+=1 | 
|  | 173                             elif nt_ind == 2: | 
|  | 174                                 g+=1 | 
|  | 175                             else: | 
|  | 176                                 c+=1 | 
|  | 177                         except ValueError: | 
|  | 178                             pass | 
|  | 179                     except: | 
|  | 180                         pass | 
|  | 181 | 
|  | 182     if track_type == 'snp' or track_type == 'both': | 
|  | 183         if a: | 
|  | 184             fout_a.write("%s\t%s\n" %(prev_start,a)) | 
|  | 185         if t: | 
|  | 186             fout_t.write("%s\t%s\n" %(prev_start,t)) | 
|  | 187         if g: | 
|  | 188             fout_g.write("%s\t%s\n" %(prev_start,g)) | 
|  | 189         if c: | 
|  | 190             fout_c.write("%s\t%s\n" %(prev_start,c)) | 
|  | 191 | 
|  | 192         fout_a.seek(0) | 
|  | 193         fout_g.seek(0) | 
|  | 194         fout_t.seek(0) | 
|  | 195         fout_c.seek(0) | 
|  | 196 | 
|  | 197     if track_type == 'snp': | 
|  | 198         os.system("cat %s %s %s %s >> %s" %(fout_a.name,fout_t.name,fout_g.name,fout_c.name,out_fname)) | 
|  | 199     elif track_type == 'both': | 
|  | 200         fout.seek(0) | 
|  | 201         os.system("cat %s %s %s %s %s | cat > %s" %(fout.name,fout_a.name,fout_t.name,fout_g.name,fout_c.name,out_fname)) | 
|  | 202 if __name__ == "__main__": | 
|  | 203     main() |