Mercurial > repos > laurenmarazzi > netisce_test
diff tools/myTools/bin/SFA_insilico.py @ 1:7e5c71b2e71f draft default tip
Uploaded
author | laurenmarazzi |
---|---|
date | Wed, 22 Dec 2021 16:00:34 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/myTools/bin/SFA_insilico.py Wed Dec 22 16:00:34 2021 +0000 @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 + + +import os +import numpy as np +import pandas as pd +import networkx as nx +import random +import sfa +import csv +import sys +import statistics + + + +fpath = os.path.join(sys.argv[1]) #location of networkfile +qdata_all=pd.read_csv(sys.argv[2],index_col = 0) +all_samples=pd.read_csv(sys.argv[3],delim_whitespace=True, index_col = False) +npert=int(sys.argv[4]) +phenotypes = all_samples.phenotype.unique() +class ThreeNodeCascade(sfa.base.Data): + def __init__(self): + super().__init__() + self._abbr = "TNC" + self._name = "A simple three node cascade" + + signs = {'activates':1, 'inhibits':-1} + A, n2i, dg = sfa.read_sif(fpath, signs=signs, as_nx=True) + self._A = A + self._n2i = n2i + self._dg = dg + self._i2n = {idx: name for name, idx in n2i.items()} + + # end of def __init__ +# end of def class + + +def gen_basal_states(npert,nnode,pre,opts): + numpert=int(npert) # number of perturbations to generate + numnodes=int(nnode) # number of nodes to generate them for + prefix = pre # prefix for column labels + options=opts.split(',') + df1=pd.DataFrame() + if numpert > len(options)**numnodes: + numpert = len(options)**numnodes + while len(df1.index)<numpert: + temp=np.random.choice(a=options,size=[500000,numnodes]) # set to 200,000 to ensure unique random combinations in a timely manner (or 500,000 to run faster) + df1=pd.DataFrame(temp) + df1=df1.drop_duplicates() + if len(df1.index)>numpert: + df1=df1.iloc[0:numpert,] + l1=[] + for i in range(0,numpert): + l1.append(prefix+'_' + str(i+1).rjust(len(str(numpert)), '0')) + df1.index=l1 + + return df1 + + +if __name__ == "__main__": +## initalize parameters from SFA + data = ThreeNodeCascade() + algs = sfa.AlgorithmSet() + alg = algs.create('SP') + alg.data = data + alg.params.apply_weight_norm = True + alg.initialize() + alg.params.exsol_forbidden=True + alg.params.alpha=0.9 + + + + netnodes=list(data.dg.nodes) + expnodes=list(set(netnodes) & set(qdata_all.index)) + samples=gen_basal_states(npert,len(expnodes),'attr','0,-1,1') + samples.columns=expnodes + n = data.dg.number_of_nodes() #the number of nodes + b = np.zeros((n,)) + switch_num = int(len(samples)/len(phenotypes)) + (len(phenotypes) % len(samples) > 0) + l=0 + logss=pd.DataFrame(index=samples.index,columns=netnodes,copy=True) + for i in range(len(phenotypes)): + m=switch_num*i + qs = all_samples[all_samples.isin([phenotypes[i]]).any(axis=1)]['name'].tolist() + qdata=qdata_all.loc[:,qs] + pi=[] + samples2=samples.iloc[l:switch_num+m] + minv=pd.Series(index = qdata.index, data = [np.amin(qdata.loc[node,]) for node in qdata.index]) + maxv=pd.Series(index = qdata.index, data = [np.amax(qdata.loc[node,]) for node in qdata.index]) + q1=pd.Series(index = qdata.index, data = [np.quantile(qdata.loc[node,],.33) for node in qdata.index]) + q2=pd.Series(index = qdata.index, data = [np.quantile(qdata.loc[node,],.66) for node in qdata.index]) + + for name, item in samples2.iterrows(): #for each simulated initial condition + enodes=item.index.tolist() + for node in enodes: # set initial state to simulated value + if item.loc[node]==1: # if 1 + number=np.random.uniform(low=q2[node], high=maxv[node]) #generate a random value for the node in the upper quartile + elif item.loc[node]==-1: # if -1 + number=np.random.uniform(low=minv[node], high=q1[node]) #generate a random value for the node in the lower quartile + else: #item.loc[node]==0 + number=np.random.uniform(low=q1[node], high=q2[node]) #generate a random value for the node in the middle + b[data.n2i[node]]=number + x = alg.compute(b,pi) # Run SFA calculation + logss.loc[name,netnodes]=x[0] + l=switch_num+m + logss=logss.astype(float).round(3) + logss.to_csv('attrs_insilico.tsv', sep=' ',float_format='%.3f',index_label="name",chunksize=10000) \ No newline at end of file