Mercurial > repos > abims-sbr > mutcount
diff scripts/functions.py @ 0:acc3674e515b draft default tip
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author | abims-sbr |
---|---|
date | Fri, 01 Feb 2019 10:28:50 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/functions.py Fri Feb 01 10:28:50 2019 -0500 @@ -0,0 +1,65 @@ +#!/usr/bin/env python +#coding: utf-8 + +import itertools, os + +def dico(fasta_file, path_in): + """ + Stores a fasta file in a dictionary : key/value -> header/sequence + + Args: + - fasta_file (String) : the name of fasta file + - path_in (String) : path to the fasta file + + Return: + - bash1 (dict) : the dictionary header/sequence + """ + bash1 = {} + + with open(path_in+'/'+fasta_file, 'r') as F1: + for h,s in itertools.izip_longest(*[F1]*2): + fasta_name = h[1:3] + sequence = s[:-1] + if fasta_name not in bash1.keys(): + bash1[fasta_name] = sequence + else: + print fasta_name + + return bash1 # same length for all (alignment) + +def write_output(names, sps_list, out_dir, results_dict): + """ Write results in csv files. There is one file per counted element (one file per amino-acid, one file per indice ...) + + Args: + - names (list) : list with the names of elems + - sps_list (list) : species names, sorted alphabetically + - out_dir (String) : output directory + - results_dict (dict) : vcounts values of each element for each input file (keys names : elems from 'names argument') + + """ + for name in names: + out = open(name+".csv", 'w') + out.write('Group,' + sps_list[0:-1]+'\n') + for group in results_dict.keys(): + count_of_elems = '' + for specs in sorted(results_dict[group].keys()): + count_of_elems += str(results_dict[group][specs][name]) + ',' + out.write(group + ',' + count_of_elems[0:-1] + '\n') + out.close() + os.system('mv %s.csv %s/' %(name, out_dir)) + +def fill_with_NaN(what): + """ Used to create a dict only with NaN values ; used when a species is not present in an orthogroup + + Args: + - what (list of Strings) : the names of the elements studied (nucleotide, amino-acids, indices of thermostability ...) + + Return: + - NaN_values (dict) : dictionary with keys=elems of what, values=NaN + """ + + NaN_values = {} + for elem in what: + NaN_values[elem] = 'NaN' + + return NaN_values