view predict/predict_RNAs.py @ 86:e581e89e3318 draft

Uploaded
author tyty
date Fri, 19 Dec 2014 13:22:36 -0500
parents 63c41304b221
children
line wrap: on
line source

#RNA structure prediction & Output and illustrate reactivities

import sys
import shlex
import subprocess
import tarfile
from parse_dis_pac import *
from read_file import *
from Bio import SeqIO
import os
from rtts_plot import *
import random
import string


id_file = sys.argv[1]
seq_file = sys.argv[2]
predict_type = sys.argv[3]
temperature = sys.argv[4]
output_file = sys.argv[5]


flag = False
if predict_type!='silico': #input reactivity file if provided
    react_file = sys.argv[6]
    slope = sys.argv[7]
    intercept = sys.argv[8]
    react = parse_dist(react_file)
    react = react[1]
    flag = True

syspath = os.getcwd()

ids = read_t_file(id_file)
sequences = SeqIO.parse(seq_file, 'fasta')


seqs = {}
for seq in sequences:
    seqs[seq.id] = seq.seq.tostring()

if len(ids)>100: #setup a limit of the number of sequence to be predicted
    print("Number of sequences exceeds limitation!")
    sys.exit(0)
    

#predict RNA structures
output_directory = os.path.join(syspath, "output_files")
if not os.path.exists(output_directory):
    os.makedirs(output_directory)
for i in range(len(ids)):
    flag2 = 0
    id_s = ids[i][0]
    #print(id_s)
    #Put RNA sequence and reactivities into files
    if id_s in seqs:
        fh = file(os.path.join(syspath,"temp.txt"), 'w')        
        fh.write('>'+id_s)
        fh.write('\n')
        fh.write(seqs[id_s])
        fh.close()
        if not flag:
            command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s)))
            subprocess.call(command)
        else:
            if id_s in react:
                fh = file(os.path.join(syspath, "constraint.txt"), 'w')
                make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA
                for j in range(0, (len(react[id_s]))):
                    if react[id_s][j]!='NA':
                        fh.write(str(j+1))
                        fh.write('\t')
                        fh.write(str(react[id_s][j]))
                        fh.write('\n')
                    #h.write(str(react[id_s][j])) #Output the reactivities
                    #h.write('\t')
                fh.close()
                #h.write('\n')
                #h.write('\n')
                command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), 
                                                             os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, 
                                                             os.path.join(output_directory, "%s.ct" % id_s)))
                subprocess.call(command)
            else:
                print(id_s+" not in the data of react!")
                flag2 = 1
        if flag2 == 0:
            command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
            subprocess.call(command)
    else:
        print(id_s+" not in the data of sequences!")

#Remove the unnecessary files
tarball = tarfile.open(output_file, 'w:')
for filename in os.listdir(output_directory):
    filepath = os.path.join(output_directory, filename)
    print filepath
    tarball.add(filepath, arcname=filename)
#print os.listdir(syspath)
#print os.listdir(output_directory)
# tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s)
tarball.close()