diff tools/myTools/bin/SFA_virtscreen.py @ 1:7e5c71b2e71f draft default tip

Uploaded
author laurenmarazzi
date Wed, 22 Dec 2021 16:00:34 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/myTools/bin/SFA_virtscreen.py	Wed Dec 22 16:00:34 2021 +0000
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+import os
+import numpy as np
+import pandas as pd
+import networkx as nx
+import random
+import sfa
+import csv
+from sys import argv
+from sklearn.preprocessing import StandardScaler
+import itertools
+########INPUTS############
+fpath =  os.path.join( argv[1])            # location of networkfile
+samples=pd.read_csv(argv[2],index_col = 0)          # input samples (normexp initial states)
+inits=argv[3]                    #the initial states of interest
+
+phenotypes = pd.read_csv(argv[4],delim_whitespace=True, index_col = False)
+initz = phenotypes[phenotypes.isin([inits]).any(axis=1)]['name'].tolist()
+samples = samples[initz]
+
+FC_nodes=open(argv[5]).read().strip().split('\n')   # FC set
+FC_perts=pd.read_csv(argv[6],delim_whitespace=True,index_col=0,header=0,names=FC_nodes)
+
+
+class ThreeNodeCascade(sfa.base.Data):
+    def __init__(self):
+        super().__init__()
+        self._abbr = "TNC"
+        self._name = "A simple three node cascade"
+
+        signs = {'activates':1, 'inhibits':-1}
+        A, n2i, dg = sfa.read_sif(fpath, signs=signs, as_nx=True)
+        self._A = A
+        self._n2i = n2i
+        self._dg = dg
+        self._i2n = {idx: name for name, idx in n2i.items()}
+        
+    # end of def __init__
+# end of def class
+
+if __name__ == "__main__":
+## initalize parameters from SFA
+    data = ThreeNodeCascade()
+    algs = sfa.AlgorithmSet()
+    alg = algs.create('SP')
+    alg.data = data
+    alg.params.apply_weight_norm = True
+    alg.initialize()
+    alg.params.exsol_forbidden=True
+    alg.params.alpha=0.9
+
+ 
+    netnodes= list(data.dg.nodes)    
+    
+    n = data.dg.number_of_nodes() #the number of nodes
+    b = np.zeros((n,))
+    
+    alllogss=pd.DataFrame()
+    for name, item in samples.iteritems():                    #for each simulated initial condition
+        logss=pd.DataFrame(index=FC_perts.index,columns=netnodes,copy=True) # create dataframe for the FC perturbations
+        enodes=item.index.tolist()                            # get expressed nodes
+        for node in enodes:                                     # set initial state to simulated value
+            b[data.n2i[node]]=float(str(item.loc[node]))
+        for name2,item2 in FC_perts.iterrows():                  # for each FCnode perturbation
+            pnode=item2.index.tolist()
+            pi = []
+            for node in pnode: #if logFC.loc[node].at['logFC']>0: # if the logfc is postive (res > veh)
+                if int(item2.loc[node])==-1:          #if the perturbation is 0
+                    if node in enodes:
+                        b[data.n2i[node]]=float(str(item.loc[node]))-2.5*(float(str(item.loc[node])))        #downregulate the expression. fix to 0
+                    else:
+                        b[data.n2i[node]]=-2.5    #downregulate the expression. fix to 0
+                    pi.append(data.n2i[node])
+                if int(item2.loc[node])==1:          #if the perturbation is 1
+                    if node in enodes:
+                        b[data.n2i[node]]=float(str(item.loc[node]))+2.5*(float(str(item.loc[node]))) #upregulate the expression
+                    else:
+                        b[data.n2i[node]]=2.5 #upregulate the expression
+                    pi.append(data.n2i[node])
+            x = alg.compute(b,pi)                                      # Run SFA calculation
+            logss.loc[name2,netnodes]=x[0]
+        logss=logss.astype(float).round(3)
+        alllogss=pd.concat([alllogss,logss],axis=0)
+    multi=pd.MultiIndex.from_product([samples.columns.tolist(),FC_perts.index.tolist()],names=['replicate', 'perturbation'])
+    alllogss.set_index(multi,inplace=True)
+    alllogss.to_csv('pert_logss.txt', sep=' ',float_format='%.3f',chunksize=10000)
\ No newline at end of file