view scripts/functions.py @ 0:acc3674e515b draft default tip

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author abims-sbr
date Fri, 01 Feb 2019 10:28:50 -0500
parents
children
line wrap: on
line source

#!/usr/bin/env python
#coding: utf-8

import itertools, os

def dico(fasta_file, path_in):
    """
    Stores a fasta file in a dictionary : key/value -> header/sequence

    Args:
        - fasta_file (String) : the name of fasta file
        - path_in (String) : path to the fasta file

    Return:
        - bash1 (dict) : the dictionary header/sequence        
    """
    bash1 = {}    

    with open(path_in+'/'+fasta_file, 'r') as F1:
        for h,s in itertools.izip_longest(*[F1]*2):            
            fasta_name = h[1:3]
            sequence = s[:-1]
            if fasta_name not in bash1.keys():
                bash1[fasta_name] = sequence
            else:
                print fasta_name
   
    return bash1 # same length for all (alignment)

def write_output(names, sps_list, out_dir, results_dict):
    """ Write results in csv files. There is one file per counted element (one file per amino-acid, one file per indice ...)

    Args:
        - names (list) : list with the names of elems
        - sps_list (list) : species names, sorted alphabetically
        - out_dir (String) : output directory
        - results_dict (dict) : vcounts values of each element for each input file (keys names : elems from 'names argument')

    """
    for name in names:
        out = open(name+".csv", 'w')
        out.write('Group,' + sps_list[0:-1]+'\n')
        for group in results_dict.keys():
            count_of_elems = ''
            for specs in sorted(results_dict[group].keys()):
                count_of_elems += str(results_dict[group][specs][name]) + ','
            out.write(group + ',' + count_of_elems[0:-1] + '\n')
        out.close()
        os.system('mv %s.csv %s/' %(name, out_dir))

def fill_with_NaN(what):
    """ Used to create a dict only with NaN values ; used when a species is not present in an orthogroup

    Args:
        - what (list of Strings) : the names of the elements studied (nucleotide, amino-acids, indices of thermostability ...)

    Return:
        - NaN_values (dict) : dictionary with keys=elems of what, values=NaN
    """

    NaN_values = {}
    for elem in what:
        NaN_values[elem] = 'NaN'

    return NaN_values