comparison rsem.py @ 0:e5e836936d60 draft

planemo upload for repository https://github.com/artbio/tools-artbio/tree/master/tools/rsem commit d84a0359354698a4b29df12ab581c2618bffcf80
author artbio
date Sat, 31 Mar 2018 21:30:07 -0400
parents
children 49795544dac7
comparison
equal deleted inserted replaced
-1:000000000000 0:e5e836936d60
1 """
2 RSEM datatypes
3 """
4 import logging
5 import os
6 import os.path
7
8 from galaxy.datatypes.images import Html
9 from galaxy.datatypes.metadata import MetadataElement
10 from galaxy.datatypes.sniff import get_headers
11 from galaxy.datatypes.tabular import Tabular
12
13
14 log = logging.getLogger(__name__)
15
16
17 class RsemIsoformsResults(Tabular):
18 file_ext = "rsem.isoforms.results"
19 """
20 required columns:
21 transcript_id gene_id length effective_length expected_count TPM
22 FPKM IsoPct
23 optional columns:
24 pme_expected_count pme_TPM pme_FPKM IsoPct_from_pme_TPM TPM_ci_lower_bound
25 TPM_ci_upper_bound FPKM_ci_lower_bound FPKM_ci_upper_bound
26 """
27
28 def __init__(self, **kwd):
29 Tabular.__init__(self, **kwd)
30 """Initialize RsemResults datatype"""
31 self.comment_lines = 1
32
33 def sniff(self, filename):
34 headers = get_headers(filename, '\n', count=1)
35 return (len(headers) > 0 and len(headers[0]) >= 8 and
36 headers[0][0] == "transcript_id" and
37 headers[0][1] == "gene_id" and headers[0][6] == "FPKM")
38
39 def set_meta(self, dataset, **kwd):
40 Tabular.set_meta(self, dataset, skip=None, **kwd)
41
42
43 class RsemGenesResults(Tabular):
44 file_ext = "rsem.genes.results"
45 """
46 required columns:
47 gene_id transcript_id(s) length effective_length expected_count TPM FPKM
48 optional columns:
49 pme_expected_count pme_TPM pme_FPKM TPM_ci_lower_bound TPM_ci_upper_bound
50 FPKM_ci_lower_bound FPKM_ci_upper_bound
51 """
52
53 def __init__(self, **kwd):
54 Tabular.__init__(self, **kwd)
55 """Initialize RsemResults datatype"""
56 self.comment_lines = 1
57
58 def sniff(self, filename):
59 headers = get_headers(filename, '\n', count=1)
60 return (len(headers) > 0 and len(headers[0]) >= 7 and
61 headers[0][0] == "gene_id" and
62 headers[0][1].startswith("transcript_id") and
63 headers[0][6] == "FPKM")
64
65 def set_meta(self, dataset, **kwd):
66 Tabular.set_meta(self, dataset, skip=None, **kwd)
67
68
69 class RsemReference(Html):
70 """Class describing an RSEM reference"""
71 MetadataElement(name='reference_name', default='rsem_ref',
72 desc='RSEM Reference Name', readonly=True, visible=True,
73 set_in_upload=True, no_value='rsem_ref')
74 file_ext = 'rsem_ref'
75 allow_datatype_change = False
76 composite_type = 'auto_primary_file'
77
78 def __init__(self, **kwd):
79 Html.__init__(self, **kwd)
80 """
81 Expecting files:
82 extra_files_path/<reference_name>.grp
83 extra_files_path/<reference_name>.ti
84 extra_files_path/<reference_name>.seq
85 extra_files_path/<reference_name>.transcripts.fa
86 Optionally includes files:
87 extra_files_path/<reference_name>.chrlist
88 extra_files_path/<reference_name>.idx.fa
89 extra_files_path/<reference_name>.1.ebwt
90 extra_files_path/<reference_name>.2.ebwt
91 extra_files_path/<reference_name>.3.ebwt
92 extra_files_path/<reference_name>.4.ebwt
93 extra_files_path/<reference_name>.rev.1.ebwt
94 extra_files_path/<reference_name>.rev.2.ebwt
95 """
96 self.add_composite_file('%s.grp', description='Group File',
97 substitute_name_with_metadata='reference_name',
98 is_binary=False)
99 self.add_composite_file('%s.ti', description='',
100 substitute_name_with_metadata='reference_name',
101 is_binary=False)
102 self.add_composite_file('%s.seq', description='',
103 substitute_name_with_metadata='reference_name',
104 is_binary=False)
105 self.add_composite_file('%s.transcripts.fa', description='',
106 substitute_name_with_metadata='reference_name',
107 is_binary=False)
108 self.add_composite_file('%s.chrlist', description='',
109 substitute_name_with_metadata='reference_name',
110 is_binary=False, optional=True)
111 self.add_composite_file('%s.idx.fa', description='',
112 substitute_name_with_metadata='reference_name',
113 is_binary=False, optional=True)
114 self.add_composite_file('%s.1.ebwt', description='',
115 substitute_name_with_metadata='reference_name',
116 is_binary=True, optional=True)
117 self.add_composite_file('%s.2.ebwt', description='',
118 substitute_name_with_metadata='reference_name',
119 is_binary=True, optional=True)
120 self.add_composite_file('%s.3.ebwt', description='',
121 substitute_name_with_metadata='reference_name',
122 is_binary=True, optional=True)
123 self.add_composite_file('%s.4.ebwt', description='',
124 substitute_name_with_metadata='reference_name',
125 is_binary=True, optional=True)
126 self.add_composite_file('%s.rev.1.ebwt', description='',
127 substitute_name_with_metadata='reference_name',
128 is_binary=True, optional=True)
129 self.add_composite_file('%s.rev.2.ebwt', description='',
130 substitute_name_with_metadata='reference_name',
131 is_binary=True, optional=True)
132
133 def generate_primary_file(self, dataset=None):
134 """
135 This is called only at upload to write the file
136 cannot rename the datasets here - they come with
137 the default unfortunately
138 """
139
140 def regenerate_primary_file(self, dataset):
141 """
142 cannot do this until we are setting metadata
143 """
144 link_to_exts = ['.grp', '.ti', '.seq', '.fa', '.chrlist', '.log']
145 ref_name = dataset.metadata.reference_name
146 efp = dataset.extra_files_path
147 flist = os.listdir(efp)
148 rval = ['<html><head><title>%s</title></head><body><p/>RSEM \
149 Reference %s files:<p/><ul>' % (dataset.name, ref_name)]
150 rvalb = []
151 for i, fname in enumerate(flist):
152 sfname = os.path.split(fname)[-1]
153 f, e = os.path.splitext(fname)
154 if e in link_to_exts:
155 rval.append('<li><a href="%s">%s</a></li>' % (sfname, sfname))
156 else:
157 rvalb.append('<li>%s</li>' % (sfname))
158 if len(rvalb) > 0:
159 rval += rvalb
160 rval.append('</ul></body></html>')
161 fh = file(dataset.file_name, 'w')
162 fh.write("\n".join(rval))
163 fh.write('\n')
164 fh.close()
165
166 def set_meta(self, dataset, **kwd):
167 Html.set_meta(self, dataset, **kwd)
168 efp = dataset.extra_files_path
169 flist = os.listdir(efp)
170 for i, fname in enumerate(flist):
171 if fname.endswith('.grp'):
172 dataset.metadata.reference_name = fname[:-4]
173 break
174 self.regenerate_primary_file(dataset)