# HG changeset patch # User mandorodriguez # Date 1463104070 14400 # Node ID 12d69a5e430309f4b3d2a1f7aff71ec436b4f829 Uploaded diff -r 000000000000 -r 12d69a5e4303 append_gene_name.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/append_gene_name.py Thu May 12 21:47:50 2016 -0400 @@ -0,0 +1,95 @@ +#!/usr/bin/env python + +import argparse +import os +import sys +import pdb + + + + + + +#------------------------------------------------------------------------------- +# Main function call +def __main__(): + + parser = argparse.ArgumentParser() + + parser.add_argument("-t", "--table", type=str,required=True, + help="The table gene name conversion") + parser.add_argument("-e", "--ensgene", type=str,required=True, + help="Ensegene IDs") + parser.add_argument("-o", "--out", type=str,default="table.txt", + help="output file") + + + args = parser.parse_args() + + + table_file = args.table + ensgene_file = args.ensgene + outfile = args.out + + ensgene = {} + gene_name = {} + + lines = None + + with open(table_file,'r') as tf: + lines = tf.readlines() + + for line in lines: + + parts = line.split() + + gene_name[ parts[0] ] = parts[1] + + + + #--------------------------------------------------------------------------- + + new_ensgene = [] + + with open(ensgene_file,'r') as ef: + lines = ef.readlines() + + for line in lines: + + parts = line.split() + + if parts[0] == "tracking_id": + + parts.append("gene_name") + + else: + + # check for the gene name in the ensign ids + if gene_name.has_key(parts[0]): + + parts.append( gene_name[parts[0]] ) + + else: + + parts.append("") + + new_ensgene.append(parts) + + + + #--- done getting stuff ---------------------------------------------------- + + print "Writing %d lines to output file %s" % (len(new_ensgene),outfile) + + with open(outfile,'w') as of: + + for row in new_ensgene: + + of.write("\t".join(row)+"\n") + + + + print "Done!" + +#------------------------------------------------------------------------------- +if __name__=="__main__": __main__()