0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Extract importent information from the standard output file and put it in some standard format, like BED and tabular.
|
|
5 """
|
|
6
|
|
7 import sys
|
|
8
|
|
9 bed = open(sys.argv[2], 'w+')
|
|
10 tabular = open(sys.argv[3], 'w+')
|
|
11
|
|
12 for line in open(sys.argv[1]):
|
|
13 # ORGANISM: gi|21226102|ref|NC_003901.1| Methanosarcina mazei Go1 chromosome, complete genome
|
|
14 if line.startswith('ORGANISM:'):
|
|
15 organism = line.lstrip('ORGANISM:').strip()
|
|
16 # CRISPR 1 Range: 679197 - 682529
|
|
17 if line.startswith('CRISPR '):
|
|
18 start,end = line.split('Range:')[1].strip().split('-')
|
|
19 start = start.strip()
|
|
20 end = end.strip()
|
|
21 bed.write('%s\t%s\t%s\n' % (organism, start, end))
|
|
22 if line.rstrip().endswith(']'):
|
|
23 cols = line.split()
|
|
24 tabular.write("%s\t%s\t%s\t%s\t%s\t%s\n" % (organism, cols[0], cols[1], cols[2], cols[4].rstrip(','), cols[5]))
|