Mercurial > repos > laurenmarazzi > netisce_test
comparison tools/myTools/bin/SFA_virtscreen.py @ 1:7e5c71b2e71f draft default tip
Uploaded
| author | laurenmarazzi |
|---|---|
| date | Wed, 22 Dec 2021 16:00:34 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:f24d4892aaed | 1:7e5c71b2e71f |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 import os | |
| 3 import numpy as np | |
| 4 import pandas as pd | |
| 5 import networkx as nx | |
| 6 import random | |
| 7 import sfa | |
| 8 import csv | |
| 9 from sys import argv | |
| 10 from sklearn.preprocessing import StandardScaler | |
| 11 import itertools | |
| 12 ########INPUTS############ | |
| 13 fpath = os.path.join( argv[1]) # location of networkfile | |
| 14 samples=pd.read_csv(argv[2],index_col = 0) # input samples (normexp initial states) | |
| 15 inits=argv[3] #the initial states of interest | |
| 16 | |
| 17 phenotypes = pd.read_csv(argv[4],delim_whitespace=True, index_col = False) | |
| 18 initz = phenotypes[phenotypes.isin([inits]).any(axis=1)]['name'].tolist() | |
| 19 samples = samples[initz] | |
| 20 | |
| 21 FC_nodes=open(argv[5]).read().strip().split('\n') # FC set | |
| 22 FC_perts=pd.read_csv(argv[6],delim_whitespace=True,index_col=0,header=0,names=FC_nodes) | |
| 23 | |
| 24 | |
| 25 class ThreeNodeCascade(sfa.base.Data): | |
| 26 def __init__(self): | |
| 27 super().__init__() | |
| 28 self._abbr = "TNC" | |
| 29 self._name = "A simple three node cascade" | |
| 30 | |
| 31 signs = {'activates':1, 'inhibits':-1} | |
| 32 A, n2i, dg = sfa.read_sif(fpath, signs=signs, as_nx=True) | |
| 33 self._A = A | |
| 34 self._n2i = n2i | |
| 35 self._dg = dg | |
| 36 self._i2n = {idx: name for name, idx in n2i.items()} | |
| 37 | |
| 38 # end of def __init__ | |
| 39 # end of def class | |
| 40 | |
| 41 if __name__ == "__main__": | |
| 42 ## initalize parameters from SFA | |
| 43 data = ThreeNodeCascade() | |
| 44 algs = sfa.AlgorithmSet() | |
| 45 alg = algs.create('SP') | |
| 46 alg.data = data | |
| 47 alg.params.apply_weight_norm = True | |
| 48 alg.initialize() | |
| 49 alg.params.exsol_forbidden=True | |
| 50 alg.params.alpha=0.9 | |
| 51 | |
| 52 | |
| 53 netnodes= list(data.dg.nodes) | |
| 54 | |
| 55 n = data.dg.number_of_nodes() #the number of nodes | |
| 56 b = np.zeros((n,)) | |
| 57 | |
| 58 alllogss=pd.DataFrame() | |
| 59 for name, item in samples.iteritems(): #for each simulated initial condition | |
| 60 logss=pd.DataFrame(index=FC_perts.index,columns=netnodes,copy=True) # create dataframe for the FC perturbations | |
| 61 enodes=item.index.tolist() # get expressed nodes | |
| 62 for node in enodes: # set initial state to simulated value | |
| 63 b[data.n2i[node]]=float(str(item.loc[node])) | |
| 64 for name2,item2 in FC_perts.iterrows(): # for each FCnode perturbation | |
| 65 pnode=item2.index.tolist() | |
| 66 pi = [] | |
| 67 for node in pnode: #if logFC.loc[node].at['logFC']>0: # if the logfc is postive (res > veh) | |
| 68 if int(item2.loc[node])==-1: #if the perturbation is 0 | |
| 69 if node in enodes: | |
| 70 b[data.n2i[node]]=float(str(item.loc[node]))-2.5*(float(str(item.loc[node]))) #downregulate the expression. fix to 0 | |
| 71 else: | |
| 72 b[data.n2i[node]]=-2.5 #downregulate the expression. fix to 0 | |
| 73 pi.append(data.n2i[node]) | |
| 74 if int(item2.loc[node])==1: #if the perturbation is 1 | |
| 75 if node in enodes: | |
| 76 b[data.n2i[node]]=float(str(item.loc[node]))+2.5*(float(str(item.loc[node]))) #upregulate the expression | |
| 77 else: | |
| 78 b[data.n2i[node]]=2.5 #upregulate the expression | |
| 79 pi.append(data.n2i[node]) | |
| 80 x = alg.compute(b,pi) # Run SFA calculation | |
| 81 logss.loc[name2,netnodes]=x[0] | |
| 82 logss=logss.astype(float).round(3) | |
| 83 alllogss=pd.concat([alllogss,logss],axis=0) | |
| 84 multi=pd.MultiIndex.from_product([samples.columns.tolist(),FC_perts.index.tolist()],names=['replicate', 'perturbation']) | |
| 85 alllogss.set_index(multi,inplace=True) | |
| 86 alllogss.to_csv('pert_logss.txt', sep=' ',float_format='%.3f',chunksize=10000) |
