Mercurial > repos > glogobyte > mirviz
comparison viz_functions.py @ 7:77d835d85a2f draft
Uploaded
| author | glogobyte |
|---|---|
| date | Wed, 13 Oct 2021 11:19:52 +0000 |
| parents | |
| children | 3ba53d80714b |
comparison
equal
deleted
inserted
replaced
| 6:5e57bcdc731c | 7:77d835d85a2f |
|---|---|
| 1 import pandas as pd | |
| 2 import matplotlib.patches as mpatches | |
| 3 import matplotlib.font_manager as font_manager | |
| 4 import matplotlib.pyplot as plt | |
| 5 | |
| 6 ######################################################################################### | |
| 7 | |
| 8 # Read a file and return it as a list | |
| 9 def read(path, flag): | |
| 10 if flag == 0: | |
| 11 with open(path) as fp: | |
| 12 file=fp.readlines() | |
| 13 fp.close() | |
| 14 return file | |
| 15 | |
| 16 if flag == 1: | |
| 17 with open(path) as fp: | |
| 18 file = fp.read().splitlines() | |
| 19 fp.close() | |
| 20 return file | |
| 21 | |
| 22 # Write a list to a txt file | |
| 23 def write(path, list): | |
| 24 with open(path,'w') as fp: | |
| 25 for x in list: | |
| 26 fp.write(str("\t".join(x[1:-1]))) | |
| 27 fp.close() | |
| 28 | |
| 29 | |
| 30 ################################################################################################################################################################> | |
| 31 | |
| 32 def top_diff(miRNA_info, number,flag,l): | |
| 33 | |
| 34 Kind=[] | |
| 35 | |
| 36 miRNA_info.sort(key = lambda x: abs(x[1]),reverse=True) | |
| 37 miRNA_info = miRNA_info[:number] | |
| 38 miRNA_info.sort(key = lambda x: x[0]) | |
| 39 | |
| 40 for x in miRNA_info: | |
| 41 if x[1] > 0: | |
| 42 Kind.append(True) | |
| 43 elif x[1] < 0: | |
| 44 Kind.append(False) | |
| 45 else: | |
| 46 Kind.append("Zero") | |
| 47 | |
| 48 top_miRNA = {"Names": [x[0] for x in miRNA_info], | |
| 49 "Log2FC": [x[1] for x in miRNA_info], | |
| 50 "Kind": Kind}; | |
| 51 | |
| 52 df_miRNA = pd.DataFrame(data=top_miRNA) | |
| 53 df_miRNA = df_miRNA.sort_values(by=['Names']) | |
| 54 if df_miRNA.empty==False: | |
| 55 h1=df_miRNA.plot.barh(x= 'Names',y='Log2FC',color=df_miRNA.Kind.map({True: 'g', False: 'r', 'Zero':'k'})) | |
| 56 figure = plt.gcf() # get current figure | |
| 57 figure.set_size_inches(5, 12) # set figure's size manually to your full screen (32x18) | |
| 58 up_reg = mpatches.Patch(color='green', label='Upregulated') | |
| 59 down_reg = mpatches.Patch(color='red', label='Downregulated') | |
| 60 font = font_manager.FontProperties(weight='bold', style='normal') | |
| 61 l3 = plt.legend(handles=[up_reg,down_reg],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
| 62 h1.set_ylabel(" ", fontsize=3, fontweight='bold') | |
| 63 h1.set_xlabel("Log2FC", fontsize=12, fontweight='bold') | |
| 64 plt.axvline(x=0, color="k") | |
| 65 | |
| 66 plt.grid(axis='y', linewidth=0.2) | |
| 67 plt.grid(axis='x', linewidth=0.2) | |
| 68 if flag=='t': | |
| 69 plt.savefig('tem.png', bbox_inches='tight', dpi=300) | |
| 70 if flag=='nt': | |
| 71 plt.savefig('non.png', bbox_inches='tight', dpi=300) | |
| 72 | |
| 73 | |
| 74 ################################################################################################################################################################> | |
| 75 | |
| 76 def unique(sequence): | |
| 77 seen = set() | |
| 78 return [x for x in sequence if not (x in seen or seen.add(x))] | |
| 79 | |
| 80 ################################################################################################################################################################> | |
| 81 | |
| 82 def top_scatter_non(matures,isoforms,non_temp,uni_names,number): | |
| 83 | |
| 84 mat_names=[] | |
| 85 mat_log2fc=[] | |
| 86 | |
| 87 iso_names=[] | |
| 88 iso_log2fc=[] | |
| 89 | |
| 90 non_temp_names=[] | |
| 91 non_temp_log2fc=[] | |
| 92 | |
| 93 count=0 | |
| 94 for x in uni_names: | |
| 95 flag = False | |
| 96 if count<number: | |
| 97 for y in matures: | |
| 98 if x in y[0]: | |
| 99 mat_log2fc.append(y[1]) | |
| 100 mat_names.append(x) | |
| 101 flag=True | |
| 102 for y in isoforms: | |
| 103 if x in y[0]: | |
| 104 iso_log2fc.append(y[1]) | |
| 105 iso_names.append(x) | |
| 106 flag=True | |
| 107 for y in non_temp: | |
| 108 if x in y[0]: | |
| 109 non_temp_log2fc.append(y[1]) | |
| 110 non_temp_names.append(x) | |
| 111 flag=True | |
| 112 if flag==True: | |
| 113 count+=1 | |
| 114 | |
| 115 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) | |
| 116 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) | |
| 117 non_df = pd.DataFrame(dict(names=non_temp_names, log2fc= non_temp_log2fc)) | |
| 118 | |
| 119 iso_df.sort_values(by=['names']) | |
| 120 mat_df.sort_values(by=['names']) | |
| 121 non_df.sort_values(by=['names']) | |
| 122 | |
| 123 fig, ax = plt.subplots() | |
| 124 | |
| 125 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red',alpha=0.4) | |
| 126 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green',alpha=0.4) | |
| 127 h2=ax.scatter(non_df['log2fc'],non_df['names'],edgecolors='k',linewidth=1, marker='o', c='orange',alpha=0.4) | |
| 128 | |
| 129 l3 = plt.legend([h1,h2,h3],["RefSeq miRNA","Non-templated isomiR","Templated isomiR"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
| 130 plt.axvline(x=0, color="k") | |
| 131 plt.grid(axis='y', linewidth=0.2) | |
| 132 plt.grid(axis='x', linewidth=0.2) | |
| 133 plt.xlabel("Log2FC", fontsize=12, fontweight='bold') | |
| 134 plt.yticks(rotation=0,ha="right", fontsize=10) | |
| 135 plt.xticks(rotation=0,ha="right", fontsize=10) | |
| 136 plt.tight_layout() | |
| 137 figure = plt.gcf() # get current figure | |
| 138 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) | |
| 139 plt.savefig('a2.png', bbox_inches='tight', dpi=300) | |
| 140 | |
| 141 ######################################################################################################################################################################################################################################### | |
| 142 | |
| 143 def top_scatter_tem(matures,isoforms,uni_names,number): | |
| 144 | |
| 145 mat_names=[] | |
| 146 mat_log2fc=[] | |
| 147 | |
| 148 iso_names=[] | |
| 149 iso_log2fc=[] | |
| 150 | |
| 151 count=0 | |
| 152 for x in uni_names: | |
| 153 flag = False | |
| 154 if count<number: | |
| 155 for y in matures: | |
| 156 if x in y[0]: | |
| 157 mat_log2fc.append(y[1]) | |
| 158 mat_names.append(x) | |
| 159 flag=True | |
| 160 for y in isoforms: | |
| 161 if x in y[0]: | |
| 162 iso_log2fc.append(y[1]) | |
| 163 iso_names.append(x) | |
| 164 flag=True | |
| 165 if flag==True: | |
| 166 count+=1 | |
| 167 | |
| 168 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) | |
| 169 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) | |
| 170 | |
| 171 iso_df.sort_values(by=['names']) | |
| 172 mat_df.sort_values(by=['names']) | |
| 173 | |
| 174 fig, ax = plt.subplots() | |
| 175 | |
| 176 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red',alpha=0.4) | |
| 177 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green',alpha=0.4) | |
| 178 | |
| 179 l3 = plt.legend([h1,h3],["RefSeq miRNA","Templated isomiR"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
| 180 plt.axvline(x=0, color="k") | |
| 181 plt.grid(axis='y', linewidth=0.2) | |
| 182 plt.grid(axis='x', linewidth=0.2) | |
| 183 plt.xlabel("Log2FC", fontsize=12, fontweight='bold') | |
| 184 plt.yticks(rotation=0,ha="right", fontsize=10) | |
| 185 plt.xticks(rotation=0,ha="right", fontsize=10) | |
| 186 plt.tight_layout() | |
| 187 figure = plt.gcf() # get current figure | |
| 188 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) | |
| 189 plt.savefig('a2.png', bbox_inches='tight', dpi=300) | |
| 190 | |
| 191 | |
| 192 ############################################################################################################################################################################################################################################## | |
| 193 | |
| 194 def preproccess(non_templated,matures,isoforms,log2fc,pval,stat): | |
| 195 | |
| 196 if stat=="3": | |
| 197 non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])>pval] | |
| 198 mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])>pval] | |
| 199 iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])>pval] | |
| 200 else: | |
| 201 non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
| 202 mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
| 203 iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
| 204 | |
| 205 mat_iso = mat+iso | |
| 206 | |
| 207 if not non_temp and not mat and not iso: | |
| 208 sys.exit("There aren't entries which meet these criteria") | |
| 209 | |
| 210 mat.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 211 iso.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 212 non_temp.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 213 | |
| 214 all=mat+iso+non_temp | |
| 215 all.sort(key = lambda x: abs(float(x[1])), reverse=True) | |
| 216 names=[x[0].split("_")[0] for x in all] | |
| 217 uni_names=unique(names) | |
| 218 | |
| 219 diff_non_templated = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
| 220 diff_matures = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
| 221 diff_isoforms = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
| 222 | |
| 223 diff_matures.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 224 diff_isoforms.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 225 diff_non_templated.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
| 226 | |
| 227 return diff_matures,diff_isoforms,diff_non_templated,uni_names,non_temp,mat_iso | |
| 228 | |
| 229 ################################################################################################################################################################################################################################################> | |
| 230 |
