diff scripts/functions.py @ 0:acc3674e515b draft default tip

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author abims-sbr
date Fri, 01 Feb 2019 10:28:50 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/functions.py	Fri Feb 01 10:28:50 2019 -0500
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+#coding: utf-8
+
+import itertools, os
+
+def dico(fasta_file, path_in):
+    """
+    Stores a fasta file in a dictionary : key/value -> header/sequence
+
+    Args:
+        - fasta_file (String) : the name of fasta file
+        - path_in (String) : path to the fasta file
+
+    Return:
+        - bash1 (dict) : the dictionary header/sequence        
+    """
+    bash1 = {}    
+
+    with open(path_in+'/'+fasta_file, 'r') as F1:
+        for h,s in itertools.izip_longest(*[F1]*2):            
+            fasta_name = h[1:3]
+            sequence = s[:-1]
+            if fasta_name not in bash1.keys():
+                bash1[fasta_name] = sequence
+            else:
+                print fasta_name
+   
+    return bash1 # same length for all (alignment)
+
+def write_output(names, sps_list, out_dir, results_dict):
+    """ Write results in csv files. There is one file per counted element (one file per amino-acid, one file per indice ...)
+
+    Args:
+        - names (list) : list with the names of elems
+        - sps_list (list) : species names, sorted alphabetically
+        - out_dir (String) : output directory
+        - results_dict (dict) : vcounts values of each element for each input file (keys names : elems from 'names argument')
+
+    """
+    for name in names:
+        out = open(name+".csv", 'w')
+        out.write('Group,' + sps_list[0:-1]+'\n')
+        for group in results_dict.keys():
+            count_of_elems = ''
+            for specs in sorted(results_dict[group].keys()):
+                count_of_elems += str(results_dict[group][specs][name]) + ','
+            out.write(group + ',' + count_of_elems[0:-1] + '\n')
+        out.close()
+        os.system('mv %s.csv %s/' %(name, out_dir))
+
+def fill_with_NaN(what):
+    """ Used to create a dict only with NaN values ; used when a species is not present in an orthogroup
+
+    Args:
+        - what (list of Strings) : the names of the elements studied (nucleotide, amino-acids, indices of thermostability ...)
+
+    Return:
+        - NaN_values (dict) : dictionary with keys=elems of what, values=NaN
+    """
+
+    NaN_values = {}
+    for elem in what:
+        NaN_values[elem] = 'NaN'
+
+    return NaN_values