Mercurial > repos > brenninc > subread_featurecounts1_5_0_p1
comparison name_changer.py @ 0:58ad7b512590 draft default tip
Uploaded
| author | brenninc |
|---|---|
| date | Thu, 12 May 2016 09:48:15 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:58ad7b512590 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import optparse | |
| 4 import os.path | |
| 5 | |
| 6 | |
| 7 def fix_header_line(start_header, header_line, new_names): | |
| 8 header_parts = header_line.split("\t") | |
| 9 if len(header_parts) <= len(start_header): | |
| 10 raise Exception("Only found {0} columns in second (header) line expected at least {1}.".format(len(header_parts), (len(start_header) + 1))) | |
| 11 data_headers = header_parts[:len(start_header)] | |
| 12 if data_headers != start_header: | |
| 13 raise Exception("Unexpected start to second (header) line Found: ") | |
| 14 new_header = "\t".join(start_header) | |
| 15 file_headers = header_parts[len(start_header):] | |
| 16 if len(file_headers) != len(new_names): | |
| 17 raise Exception("Found {0} file columns in header line, but {1} new_name paramters provided.".format(len(file_headers), len(new_names))) | |
| 18 for i in range(len(file_headers)): | |
| 19 new_header += "\t" | |
| 20 new_header += new_names[i] | |
| 21 new_header += "\n" | |
| 22 return new_header | |
| 23 | |
| 24 | |
| 25 def clean_names(prefix, old_names): | |
| 26 if len(old_names) > 1: | |
| 27 shared_start = old_names[0].strip() | |
| 28 shared_ends = old_names[0].strip() | |
| 29 for name in old_names: | |
| 30 clean = name.strip() | |
| 31 while len(shared_start) > 0 and (not clean.startswith(shared_start)): | |
| 32 shared_start = shared_start[:-1] | |
| 33 while len(shared_ends) > 0 and (not clean.endswith(shared_ends)): | |
| 34 shared_ends = shared_ends[1:] | |
| 35 start = len(shared_start) | |
| 36 end = 0 - len(shared_ends) | |
| 37 else: | |
| 38 start = 0 | |
| 39 end = 0 | |
| 40 new_names = [] | |
| 41 if end < 0: | |
| 42 for name in old_names: | |
| 43 new_names.append(prefix + name.strip()[start:end]) | |
| 44 else: | |
| 45 for name in old_names: | |
| 46 new_names.append(prefix + name.strip()[start:]) | |
| 47 return new_names | |
| 48 | |
| 49 | |
| 50 def main(): | |
| 51 #Parse Command Line | |
| 52 parser = optparse.OptionParser() | |
| 53 parser.add_option("--raw_count_file", action="store", type="string", default=None, help="path to file original with the counts") | |
| 54 parser.add_option("--fixed_count_file", action="store", type="string", default=None, help="new path for renamaned counts file") | |
| 55 parser.add_option("--raw_summary_file", action="store", type="string", default=None, help="path to file original with the summary") | |
| 56 parser.add_option("--fixed_summary_file", action="store", type="string", default=None, help="new path for renamaned summary file") | |
| 57 parser.add_option("--names_file", action="store", type="string", default=None, help="path to file which contains the names.") | |
| 58 parser.add_option("--new_name", action="append", type="string", default=None, | |
| 59 help="Names to be used. Must be the same length as in the raw_count_file") | |
| 60 parser.add_option("--names_prefix", action="store", type="string", default="", help="Prefix to add in from of every name.") | |
| 61 | |
| 62 (options, args) = parser.parse_args() | |
| 63 | |
| 64 if not os.path.exists(options.raw_count_file): | |
| 65 parser.error("Unable to find raw_count_file {0}.".format(options.raw_count_file)) | |
| 66 if options.names_file: | |
| 67 if options.new_name: | |
| 68 parser.error("names_file parameter clashes with new_names paramter(s)") | |
| 69 if not os.path.exists(options.names_file): | |
| 70 parser.error("Unable to find names_file {0}.".format(options.names_file)) | |
| 71 new_names = [] | |
| 72 with open(options.names_file, "r") as names_file: | |
| 73 for line in names_file: | |
| 74 new_names.append(line.strip()) | |
| 75 new_names = clean_names(options.names_prefix, new_names) | |
| 76 else: | |
| 77 if not options.new_name: | |
| 78 parser.error("No names_file or new_name paraters provided.") | |
| 79 new_names = options.new_name | |
| 80 | |
| 81 print "Changing column names to ", new_names | |
| 82 | |
| 83 with open(options.raw_count_file, "r") as input_file: | |
| 84 with open(options.fixed_count_file, "w") as output_file: | |
| 85 input_file.readline() # job line | |
| 86 start_header = ["Geneid", "Chr", "Start", "End", "Strand", "Length"] | |
| 87 header_line = fix_header_line(start_header, input_file.readline(), new_names) | |
| 88 output_file.write(header_line) | |
| 89 for line in input_file: | |
| 90 output_file.write(line) | |
| 91 | |
| 92 with open(options.raw_summary_file, "r") as input_file: | |
| 93 with open(options.fixed_summary_file, "w") as output_file: | |
| 94 start_header = ["Status"] | |
| 95 header_line = fix_header_line(start_header, input_file.readline(), new_names) | |
| 96 output_file.write(header_line) | |
| 97 for line in input_file: | |
| 98 output_file.write(line) | |
| 99 | |
| 100 | |
| 101 if __name__ == "__main__": | |
| 102 main() |
