diff armdb_mirgene.py @ 1:d133caf020a7 draft

Uploaded
author glogobyte
date Wed, 13 Oct 2021 16:23:13 +0000
parents
children 41f5a0616dbb
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/armdb_mirgene.py	Wed Oct 13 16:23:13 2021 +0000
@@ -0,0 +1,90 @@
+import subprocess
+import argparse
+import time
+import urllib.request
+from multiprocessing import Process, Queue
+
+#--------------------------------------------Arguments-----------------------------------------------
+subprocess.call(['mkdir', 'out'])
+parser = argparse.ArgumentParser()
+parser.add_argument("-pos", "--positions", help="", action="store")
+parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store")
+parser.add_argument("-sym", "--symbol", help="",action="store")
+args = parser.parse_args()
+
+#------------------------------------Read Fasta files from MirGene-----------------------------------
+
+def read_url(path):
+
+    data = urllib.request.urlopen(path).read()
+    data1 = data.decode('utf-8')
+    data1 = data1.split("\n")
+    del data1[-1]
+    q.put(data1)
+
+#-------------------------------------Generation of Custom Arms--------------------------------------
+
+def custom_arms_mirgene(mat_mirna,pri_mirna):
+   mat_ext=[]
+   for i in range(0,len(mat_mirna),2):
+       if args.symbol in mat_mirna[i]:
+          mat_seq=mat_mirna[i+1]
+
+          for j in range(0,len(pri_mirna),2):
+              if mat_mirna[i].split("_")[0] == pri_mirna[j].split("_")[0]:
+                 temp_ext = pri_mirna[j+1].split(mat_seq)
+                 mat_ext.append([mat_mirna[i],temp_ext[0][-int(args.positions):]+mat_seq+temp_ext[1][0:int(args.positions)]])
+
+   print(str(args.positions)+" positions shifted")
+   return(mat_ext)
+
+#----------------------------------------Export of Fasta files ---------------------------------------
+
+def write_custom_arms(list,name,c):
+
+    f = open(name, "w")
+    for x in list:
+       if c==1:
+          f.write(x[0]+'\n')
+          f.write(x[1]+'\n')
+       else:
+          f.write(x+'\n')
+
+#==================================================================================================================================
+
+if __name__=='__main__':
+
+    starttime = time.time()
+    print(args.symbol)
+    q = Queue()
+
+    # Read of all Fasta files
+    mat_url = 'https://mirgenedb.org/fasta/ALL?mat=1'
+    p = [Process(target=read_url(mat_url))]
+    star_url = 'https://mirgenedb.org/fasta/ALL?star=1'
+    p.extend([Process(target=read_url(star_url))])
+    pri_url = 'https://mirgenedb.org/static/data/ALL/ALL--pri-30-30.fas'
+    p.extend([Process(target=read_url(pri_url))])
+    [x.start() for x in p]
+    [x.join() for x in p]
+
+    mat_mirna=q.get()
+    star_mirna=q.get()
+    pri_mirna=q.get()
+
+    # Replace U with T in seqeunces
+    mat_mirna.extend(star_mirna)
+    for i in range(1,len(mat_mirna),2):
+        mat_mirna[i]=mat_mirna[i].replace("U","T")
+
+    #Generation of Custom Arms
+    mat_ext=custom_arms_mirgene(mat_mirna,pri_mirna)
+
+    #Export of all Fasta files
+    p1 = [Process(target=write_custom_arms(mat_mirna,"shifted_mirnas.bed",0))]
+    p1.extend([Process(target=write_custom_arms(pri_mirna,"original_mirnas.bed",0))])
+    p1.extend([Process(target=write_custom_arms(mat_ext,"new_ref.fa",1))])
+    [x.start() for x in p1]
+    [x.join() for x in p1]
+
+    print('That runtime was {} seconds'.format(time.time() - starttime))