comparison scripts/functions.py @ 0:acc3674e515b draft default tip

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author abims-sbr
date Fri, 01 Feb 2019 10:28:50 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:acc3674e515b
1 #!/usr/bin/env python
2 #coding: utf-8
3
4 import itertools, os
5
6 def dico(fasta_file, path_in):
7 """
8 Stores a fasta file in a dictionary : key/value -> header/sequence
9
10 Args:
11 - fasta_file (String) : the name of fasta file
12 - path_in (String) : path to the fasta file
13
14 Return:
15 - bash1 (dict) : the dictionary header/sequence
16 """
17 bash1 = {}
18
19 with open(path_in+'/'+fasta_file, 'r') as F1:
20 for h,s in itertools.izip_longest(*[F1]*2):
21 fasta_name = h[1:3]
22 sequence = s[:-1]
23 if fasta_name not in bash1.keys():
24 bash1[fasta_name] = sequence
25 else:
26 print fasta_name
27
28 return bash1 # same length for all (alignment)
29
30 def write_output(names, sps_list, out_dir, results_dict):
31 """ Write results in csv files. There is one file per counted element (one file per amino-acid, one file per indice ...)
32
33 Args:
34 - names (list) : list with the names of elems
35 - sps_list (list) : species names, sorted alphabetically
36 - out_dir (String) : output directory
37 - results_dict (dict) : vcounts values of each element for each input file (keys names : elems from 'names argument')
38
39 """
40 for name in names:
41 out = open(name+".csv", 'w')
42 out.write('Group,' + sps_list[0:-1]+'\n')
43 for group in results_dict.keys():
44 count_of_elems = ''
45 for specs in sorted(results_dict[group].keys()):
46 count_of_elems += str(results_dict[group][specs][name]) + ','
47 out.write(group + ',' + count_of_elems[0:-1] + '\n')
48 out.close()
49 os.system('mv %s.csv %s/' %(name, out_dir))
50
51 def fill_with_NaN(what):
52 """ Used to create a dict only with NaN values ; used when a species is not present in an orthogroup
53
54 Args:
55 - what (list of Strings) : the names of the elements studied (nucleotide, amino-acids, indices of thermostability ...)
56
57 Return:
58 - NaN_values (dict) : dictionary with keys=elems of what, values=NaN
59 """
60
61 NaN_values = {}
62 for elem in what:
63 NaN_values[elem] = 'NaN'
64
65 return NaN_values