comparison infernal.py @ 8:c9e29ac5d099 draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit a812ed8de433ac4e8b42afd636e70cfbb180d2b9"
author bgruening
date Thu, 23 Sep 2021 19:38:58 +0000
parents 2c2c5e5e495b
children
comparison
equal deleted inserted replaced
7:477d829d3250 8:c9e29ac5d099
1 # -*- coding: utf-8 -*- 1 # -*- coding: utf-8 -*-
2 2
3 from galaxy.datatypes.data import Text 3 import logging
4 from galaxy.datatypes.sniff import get_headers, get_test_fname 4 import os
5 from galaxy.datatypes.data import get_file_peek
6 import subprocess 5 import subprocess
7 import os
8 6
7 from galaxy.datatypes.data import get_file_peek, Text
9 from galaxy.datatypes.metadata import MetadataElement 8 from galaxy.datatypes.metadata import MetadataElement
10 from galaxy.datatypes import metadata
11 9
12 def count_special_lines( word, filename, invert = False ): 10
11 def count_special_lines(word, filename, invert=False):
13 """ 12 """
14 searching for special 'words' using the grep tool 13 searching for special 'words' using the grep tool
15 grep is used to speed up the searching and counting 14 grep is used to speed up the searching and counting
16 The number of hits is returned. 15 The number of hits is returned.
17 """ 16 """
18 try: 17 try:
19 cmd = ["grep", "-c"] 18 cmd = ["grep", "-c"]
20 if invert: 19 if invert:
21 cmd.append('-v') 20 cmd.append("-v")
22 cmd.extend([word, filename]) 21 cmd.extend([word, filename])
23 out = subprocess.Popen(cmd, stdout=subprocess.PIPE) 22 out = subprocess.Popen(cmd, stdout=subprocess.PIPE)
24 return int(out.communicate()[0].split()[0]) 23 return int(out.communicate()[0].split()[0])
25 except: 24 except Exception:
26 pass
27 return 0
28
29 def count_lines( filename, non_empty = False):
30 """
31 counting the number of lines from the 'filename' file
32 """
33 try:
34 if non_empty:
35 out = subprocess.Popen(['grep', '-cve', '^\s*$', filename], stdout=subprocess.PIPE)
36 else:
37 out = subprocess.Popen(['wc', '-l', filename], stdout=subprocess.PIPE)
38 return int(out.communicate()[0].split()[0])
39 except:
40 pass 25 pass
41 return 0 26 return 0
42 27
43 28
44 class Infernal_CM_1_1( Text ): 29 def count_lines(filename, non_empty=False):
30 """
31 counting the number of lines from the 'filename' file
32 """
33 try:
34 if non_empty:
35 out = subprocess.Popen(
36 ["grep", "-cve", "^\s*$", filename], stdout=subprocess.PIPE # noqa W605
37 )
38 else:
39 out = subprocess.Popen(["wc", "-l", filename], stdout=subprocess.PIPE)
40 return int(out.communicate()[0].split()[0])
41 except Exception:
42 pass
43 return 0
44
45
46 class Infernal_CM_1_1(Text):
45 file_ext = "cm" 47 file_ext = "cm"
46 48
47 MetadataElement( name="number_of_models", default=0, desc="Number of covariance models", readonly=True, visible=True, optional=True, no_value=0 ) 49 MetadataElement(
50 name="number_of_models",
51 default=0,
52 desc="Number of covariance models",
53 readonly=True,
54 visible=True,
55 optional=True,
56 no_value=0,
57 )
48 58
49 def set_peek( self, dataset, is_multi_byte=False ): 59 def set_peek(self, dataset, is_multi_byte=False):
50 if not dataset.dataset.purged: 60 if not dataset.dataset.purged:
51 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) 61 dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte)
52 if (dataset.metadata.number_of_models == 1): 62 if dataset.metadata.number_of_models == 1:
53 dataset.blurb = "1 model" 63 dataset.blurb = "1 model"
54 else: 64 else:
55 dataset.blurb = "%s models" % dataset.metadata.number_of_models 65 dataset.blurb = "%s models" % dataset.metadata.number_of_models
56 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) 66 dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte)
57 else: 67 else:
58 dataset.peek = 'file does not exist' 68 dataset.peek = "file does not exist"
59 dataset.blurb = 'file purged from disc' 69 dataset.blurb = "file purged from disc"
60 70
61 def sniff( self, filename ): 71 def sniff(self, filename):
62 if count_special_lines("^INFERNAL1/a", filename) > 0: 72 if count_special_lines("^INFERNAL1/a", filename) > 0:
63 return True 73 return True
64 else: 74 else:
65 return False 75 return False
66 76
67 def set_meta( self, dataset, **kwd ): 77 def set_meta(self, dataset, **kwd):
68 """ 78 """
69 Set the number of models in dataset. 79 Set the number of models in dataset.
70 """ 80 """
71 dataset.metadata.number_of_models = count_special_lines("^INFERNAL1/a", dataset.file_name) 81 dataset.metadata.number_of_models = count_special_lines(
82 "^INFERNAL1/a", dataset.file_name
83 )
72 84
73 def split( cls, input_datasets, subdir_generator_function, split_params): 85 def split(cls, input_datasets, subdir_generator_function, split_params):
74 """ 86 """
75 Split the input files by model records. 87 Split the input files by model records.
76 """ 88 """
77 if split_params is None: 89 if split_params is None:
78 return None 90 return None
80 if len(input_datasets) > 1: 92 if len(input_datasets) > 1:
81 raise Exception("CM-file splitting does not support multiple files") 93 raise Exception("CM-file splitting does not support multiple files")
82 input_files = [ds.file_name for ds in input_datasets] 94 input_files = [ds.file_name for ds in input_datasets]
83 95
84 chunk_size = None 96 chunk_size = None
85 if split_params['split_mode'] == 'number_of_parts': 97 if split_params["split_mode"] == "number_of_parts":
86 raise Exception('Split mode "%s" is currently not implemented for CM-files.' % split_params['split_mode']) 98 raise Exception(
87 elif split_params['split_mode'] == 'to_size': 99 'Split mode "%s" is currently not implemented for CM-files.'
88 chunk_size = int(split_params['split_size']) 100 % split_params["split_mode"]
101 )
102 elif split_params["split_mode"] == "to_size":
103 chunk_size = int(split_params["split_size"])
89 else: 104 else:
90 raise Exception('Unsupported split mode %s' % split_params['split_mode']) 105 raise Exception("Unsupported split mode %s" % split_params["split_mode"])
91 106
92 def _read_cm_records( filename ): 107 def _read_cm_records(filename):
93 lines = [] 108 lines = []
94 with open(filename) as handle: 109 with open(filename) as handle:
95 for line in handle: 110 for line in handle:
96 if line.startswith("INFERNAL1/a") and lines: 111 if line.startswith("INFERNAL1/a") and lines:
97 yield lines 112 yield lines
98 lines = [line] 113 lines = [line]
99 else: 114 else:
100 lines.append( line ) 115 lines.append(line)
101 yield lines 116 yield lines
102 117
103 def _write_part_cm_file( accumulated_lines ): 118 def _write_part_cm_file(accumulated_lines):
104 part_dir = subdir_generator_function() 119 part_dir = subdir_generator_function()
105 part_path = os.path.join( part_dir, os.path.basename( input_files[0] ) ) 120 part_path = os.path.join(part_dir, os.path.basename(input_files[0]))
106 part_file = open( part_path, 'w' ) 121 part_file = open(part_path, "w")
107 part_file.writelines( accumulated_lines ) 122 part_file.writelines(accumulated_lines)
108 part_file.close() 123 part_file.close()
109 124
110 try: 125 try:
111 cm_records = _read_cm_records( input_files[0] ) 126 cm_records = _read_cm_records(input_files[0])
112 cm_lines_accumulated = [] 127 cm_lines_accumulated = []
113 for counter, cm_record in enumerate( cm_records, start = 1): 128 for counter, cm_record in enumerate(cm_records, start=1):
114 cm_lines_accumulated.extend( cm_record ) 129 cm_lines_accumulated.extend(cm_record)
115 if counter % chunk_size == 0: 130 if counter % chunk_size == 0:
116 _write_part_cm_file( cm_lines_accumulated ) 131 _write_part_cm_file(cm_lines_accumulated)
117 cm_lines_accumulated = [] 132 cm_lines_accumulated = []
118 if cm_lines_accumulated: 133 if cm_lines_accumulated:
119 _write_part_cm_file( cm_lines_accumulated ) 134 _write_part_cm_file(cm_lines_accumulated)
120 except Exception, e: 135 except Exception as e:
121 log.error('Unable to split files: %s' % str(e)) 136 logging.error("Unable to split files: %s" % str(e))
122 raise 137 raise
138
123 split = classmethod(split) 139 split = classmethod(split)
124 140
125 if __name__ == '__main__': 141
142 if __name__ == "__main__":
126 Infernal_CM_1_1() 143 Infernal_CM_1_1()
127 Stockholm_1_0() 144 # Stockholm_1_0() # ???
128