Mercurial > repos > cpt > cpt_disruptin_table
comparison Disruptin_hydrophobicity_helicity_table_package.py @ 1:a99be535e99d draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:41:05 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:f3fc78cc4c43 | 1:a99be535e99d |
---|---|
1 """ | |
2 This program is intended to create the output table for the disruptin finder workflow | |
3 """ | |
4 from Bio import SeqIO | |
5 from Bio.SeqUtils.ProtParam import ProteinAnalysis | |
6 from Bio.SeqUtils import ProtParamData | |
7 import csv | |
8 import argparse | |
9 import sys | |
10 | |
11 | |
12 def disruptin_table(garnier_file, fasta_file): | |
13 # Iterable variables | |
14 position = 1 | |
15 net_charge = 0 | |
16 charge_res = 0 | |
17 record_number = 0 | |
18 | |
19 # loop structures | |
20 names = [] | |
21 sec_struct = [] | |
22 | |
23 # reading the lines from the garnier csv file | |
24 # with open(garnier_file,'r') as csvfile: | |
25 # garnierreader = csv.reader(csvfile) | |
26 for row in garnier_file: | |
27 if row[0] == "Sequence: ": | |
28 names += [row[1]] | |
29 elif row[0] in "HETC": | |
30 row = row.split("\t") | |
31 sec_struct += ["".join(row)] | |
32 | |
33 record = [] | |
34 p = [] | |
35 r = [] | |
36 c = [] | |
37 h = [] | |
38 s = [] | |
39 | |
40 # Parse the .fasta file and get the sequence | |
41 for rec in SeqIO.parse(fasta_file, "fasta"): | |
42 sequence = str(rec.seq) | |
43 | |
44 # Set up the information vectors: for position #, residue, hydrophobic/charge/polar/nonpolar, and secondary | |
45 # structure | |
46 record += [rec.id] | |
47 position_vec = [] | |
48 residue_vec = [] | |
49 charge_sym_vec = [] | |
50 sec_struct_vec = [] | |
51 | |
52 for aa in sequence: | |
53 position_vec += [str(position)] | |
54 residue_vec += [str(aa)] | |
55 sec_struct_vec += [str(sec_struct[record_number][position - 1])] | |
56 | |
57 # For R and K residues a positive charge is given | |
58 if aa in "RK": | |
59 symbol = "+" | |
60 # For D and E residues a negative charge is given | |
61 elif aa in "DE": | |
62 symbol = "-" | |
63 elif aa in "AVMILPWFG": | |
64 symbol = "N" | |
65 elif aa in "HSYTCQN": | |
66 symbol = "P" | |
67 charge_sym_vec += symbol | |
68 position += 1 | |
69 | |
70 # Calculating hyrophobicity based on Kyte and Doolittle scale. Using binning value of 9. Since the binning | |
71 # is 9, the first 4 residues and last 4 residues as set blank so as to center the values to their | |
72 # approximate position on the sequence. | |
73 prot_ana_seq = ProteinAnalysis(sequence) | |
74 hydro = [0] * 4 + prot_ana_seq.protein_scale(ProtParamData.kd, 9) + [0] * 4 | |
75 | |
76 record_number += 1 | |
77 position = 1 | |
78 | |
79 p += [position_vec] | |
80 r += [residue_vec] | |
81 c += [charge_sym_vec] | |
82 h += [hydro] | |
83 s += [sec_struct_vec] | |
84 | |
85 # returns values for name of the sequence | |
86 return record, p, r, c, h, s | |
87 | |
88 | |
89 if __name__ == "__main__": | |
90 # Grab all of the filters from our plugin loader | |
91 parser = argparse.ArgumentParser(description="Disruptin Table Output") | |
92 parser.add_argument( | |
93 "garnier_file", type=argparse.FileType("r"), help="csv file from garnier reader" | |
94 ) | |
95 parser.add_argument( | |
96 "fasta_file", | |
97 type=argparse.FileType("r"), | |
98 help="fasta file of disruptin candidates", | |
99 ) | |
100 args = parser.parse_args() | |
101 | |
102 # Set up output location | |
103 # f = open(sys.stdout, 'w', newline='') | |
104 # writer1 = csv.writer(f) | |
105 | |
106 iden, position, residue, charge, hydro, struct = disruptin_table(**vars(args)) | |
107 | |
108 for i in range(len(iden)): | |
109 # writer1.writerow(['Protein ID']+[iden[i]]) | |
110 # writer1.writerow(['Position'] + [format(x, 's') for x in position[i]]) | |
111 # writer1.writerow(['Residue'] + [format(x, 's') for x in residue[i]]) | |
112 # writer1.writerow(['Charge'] + [format(x, 's') for x in charge[i]]) | |
113 # writer1.writerow(['Hydrophobicity'] + [format(x, '.3f') for x in hydro[i]]) | |
114 # writer1.writerow(['Secondary Structure'] + [format(x, 's') for x in struct[i]]) | |
115 # writer1.writerow(['']) | |
116 | |
117 print(str(iden[i])) | |
118 print("Position \t " + "\t".join(position[i])) | |
119 print("Residue \t" + "\t".join(residue[i])) | |
120 print("Charge \t" + "\t".join(charge[i])) | |
121 print("Hydrophobicity \t" + "\t".join(format(x, ".3f") for x in hydro[i])) | |
122 print("Secondary Structure \t" + "\t".join(struct[i])) |