annotate tools/data_source/ucsc_filter.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 # runs after the job (and after the default post-filter)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 from galaxy import datatypes, jobs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 def validate(incoming):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 """Validator"""
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 #raise Exception, 'not quite right'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 def exec_before_job( app, inp_data, out_data, param_dict, tool=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 """Sets the name of the data"""
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 outputType = param_dict.get( 'hgta_outputType', None )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 if isinstance(outputType, list) and len(outputType)>0: outputType = outputType[-1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 items = out_data.items()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 for name, data in items:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 data.name = param_dict.get('display', data.name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 data.dbkey = param_dict.get('dbkey', '???')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 if outputType == 'wigData':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 ext = "wig"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 elif outputType == 'maf':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 ext = "maf"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 elif outputType == 'gff':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 ext = "gff"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 elif outputType == 'gff3':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 ext = "gff3"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 if 'hgta_doPrintSelectedFields' in param_dict:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 ext = "interval"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 elif 'hgta_doGetBed' in param_dict:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 ext = "bed"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 elif 'hgta_doGenomicDna' in param_dict:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 ext = "fasta"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 elif 'hgta_doGenePredSequence' in param_dict:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 ext = "fasta"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 ext = "interval"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 data = app.datatypes_registry.change_datatype(data, ext)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 out_data[name] = data
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 """Verifies the data after the run"""
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 items = out_data.items()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 for name, data in items:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 data.set_size()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 err_msg, err_flag = 'Errors:', False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 line_count = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 num_lines = len(file(data.file_name).readlines())
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 for line in file(data.file_name):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 line_count += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 if line and line[0] == '-':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 if line_count + 3 == num_lines and not err_flag:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 err_flag = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 err_msg = "Warning: It appears that your results have been truncated by UCSC. View the bottom of your result file for details."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 break
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 err_flag = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 err_msg = err_msg +" (line "+str(line_count)+")"+line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 data.set_peek()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 if isinstance(data.datatype, datatypes.interval.Interval) and data.missing_meta():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 data = app.datatypes_registry.change_datatype(data, 'tabular')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 out_data[name] = data
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 if err_flag:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 raise Exception(err_msg)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 except Exception, exc:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 data.info = data.info + "\n" + str(exc)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 data.blurb = "error"