Mercurial > repos > devteam > cufflinks
comparison mass.py @ 12:d080005cffe1 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/cufflinks/cufflinks commit a0b0845a9d1b3e7ecdeacd1e606133617e3918bd"
author | iuc |
---|---|
date | Tue, 16 Jun 2020 13:00:32 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
11:e04dbae2abe0 | 12:d080005cffe1 |
---|---|
1 import shutil | |
2 import sys | |
3 import tempfile | |
4 | |
5 | |
6 def parse_gff_attributes(attr_str): | |
7 """ | |
8 Parses a GFF/GTF attribute string and returns a dictionary of name-value | |
9 pairs. The general format for a GFF3 attributes string is | |
10 | |
11 name1=value1;name2=value2 | |
12 | |
13 The general format for a GTF attribute string is | |
14 | |
15 name1 "value1" ; name2 "value2" | |
16 | |
17 The general format for a GFF attribute string is a single string that | |
18 denotes the interval's group; in this case, method returns a dictionary | |
19 with a single key-value pair, and key name is 'group' | |
20 """ | |
21 attributes_list = attr_str.split(";") | |
22 attributes = {} | |
23 for name_value_pair in attributes_list: | |
24 # Try splitting by '=' (GFF3) first because spaces are allowed in GFF3 | |
25 # attribute; next, try double quotes for GTF. | |
26 pair = name_value_pair.strip().split("=") | |
27 if len(pair) == 1: | |
28 pair = name_value_pair.strip().split("\"") | |
29 if len(pair) == 1: | |
30 # Could not split for some reason -- raise exception? | |
31 continue | |
32 if pair == '': | |
33 continue | |
34 name = pair[0].strip() | |
35 if name == '': | |
36 continue | |
37 # Need to strip double quote from values | |
38 value = pair[1].strip(" \"") | |
39 attributes[name] = value | |
40 | |
41 if len(attributes) == 0: | |
42 # Could not split attributes string, so entire string must be | |
43 # 'group' attribute. This is the case for strictly GFF files. | |
44 attributes['group'] = attr_str | |
45 return attributes | |
46 | |
47 | |
48 def gff_attributes_to_str(attrs, gff_format): | |
49 """ | |
50 Convert GFF attributes to string. Supported formats are GFF3, GTF. | |
51 """ | |
52 if gff_format == 'GTF': | |
53 format_string = '%s "%s"' | |
54 # Convert group (GFF) and ID, parent (GFF3) attributes to transcript_id, gene_id | |
55 id_attr = None | |
56 if 'group' in attrs: | |
57 id_attr = 'group' | |
58 elif 'ID' in attrs: | |
59 id_attr = 'ID' | |
60 elif 'Parent' in attrs: | |
61 id_attr = 'Parent' | |
62 if id_attr: | |
63 attrs['transcript_id'] = attrs['gene_id'] = attrs[id_attr] | |
64 elif gff_format == 'GFF3': | |
65 format_string = '%s=%s' | |
66 attrs_strs = [] | |
67 for name, value in attrs.items(): | |
68 attrs_strs.append(format_string % (name, value)) | |
69 return " ; ".join(attrs_strs) | |
70 | |
71 | |
72 stderr = sys.argv[1] | |
73 global_model_file_name = sys.argv[2] | |
74 transcripts = sys.argv[3] | |
75 | |
76 # Read standard error to get total map/upper quartile mass. | |
77 total_map_mass = -1 | |
78 with open(stderr, 'r') as tmp_stderr2: | |
79 for line in tmp_stderr2: | |
80 if line.lower().find("map mass") >= 0 or line.lower().find("upper quartile") >= 0: | |
81 total_map_mass = float(line.split(":")[1].strip()) | |
82 break | |
83 | |
84 if global_model_file_name != "None": | |
85 # Global model is simply total map mass from original run. | |
86 with open(global_model_file_name, 'r') as global_model_file: | |
87 global_model_total_map_mass = float(global_model_file.readline()) | |
88 | |
89 # Ratio of global model's total map mass to original run's map mass is | |
90 # factor used to adjust FPKM. | |
91 fpkm_map_mass_ratio = total_map_mass / global_model_total_map_mass | |
92 | |
93 # Update FPKM values in transcripts.gtf file. | |
94 with open(transcripts, 'r') as transcripts_file: | |
95 with tempfile.NamedTemporaryFile(dir=".", delete=False) as new_transcripts_file: | |
96 for line in transcripts_file: | |
97 fields = line.split('\t') | |
98 attrs = parse_gff_attributes(fields[8]) | |
99 attrs["FPKM"] = str(float(attrs["FPKM"]) * fpkm_map_mass_ratio) | |
100 attrs["conf_lo"] = str(float(attrs["conf_lo"]) * fpkm_map_mass_ratio) | |
101 attrs["conf_hi"] = str(float(attrs["conf_hi"]) * fpkm_map_mass_ratio) | |
102 fields[8] = gff_attributes_to_str(attrs, "GTF") | |
103 new_transcripts_file.write("%s\n" % '\t'.join(fields)) | |
104 shutil.move(new_transcripts_file.name, transcripts) | |
105 | |
106 if total_map_mass > -1: | |
107 with open("global_model.txt", 'w') as f: | |
108 f.write("%f\n" % total_map_mass) |