diff append_gene_name.py @ 0:12d69a5e4303 draft

Uploaded
author mandorodriguez
date Thu, 12 May 2016 21:47:50 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/append_gene_name.py	Thu May 12 21:47:50 2016 -0400
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+import argparse
+import os
+import sys
+import pdb
+
+
+
+
+
+
+#-------------------------------------------------------------------------------
+# Main function call
+def __main__():
+
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("-t", "--table", type=str,required=True,
+                        help="The table gene name conversion")
+    parser.add_argument("-e", "--ensgene", type=str,required=True,
+                        help="Ensegene IDs")
+    parser.add_argument("-o", "--out", type=str,default="table.txt",
+                        help="output file")
+    
+
+    args = parser.parse_args()
+
+
+    table_file = args.table
+    ensgene_file = args.ensgene
+    outfile = args.out
+    
+    ensgene = {}
+    gene_name = {}
+        
+    lines = None
+
+    with open(table_file,'r') as tf:
+        lines = tf.readlines()
+            
+        for line in lines:
+            
+            parts = line.split()
+
+            gene_name[ parts[0] ] = parts[1]
+
+
+
+    #---------------------------------------------------------------------------
+
+    new_ensgene = []
+
+    with open(ensgene_file,'r') as ef:
+        lines = ef.readlines()
+
+        for line in lines:
+            
+            parts = line.split()
+
+            if parts[0] == "tracking_id":
+
+                parts.append("gene_name")
+
+            else:
+
+                # check for the gene name in the ensign ids
+                if gene_name.has_key(parts[0]):
+
+                    parts.append( gene_name[parts[0]] )
+
+                else:
+
+                    parts.append("")
+
+            new_ensgene.append(parts)
+            
+
+
+    #--- done getting stuff ----------------------------------------------------
+
+    print "Writing %d lines to output file %s" % (len(new_ensgene),outfile)
+    
+    with open(outfile,'w') as of:
+
+        for row in new_ensgene:
+
+            of.write("\t".join(row)+"\n")
+
+
+
+    print "Done!"
+    
+#-------------------------------------------------------------------------------
+if __name__=="__main__": __main__()