-
-
-
-
-
--
+ #plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0 ) # get y labels printed horizontally
+ # cg.dendrogram_col.linkage # linkage matrix for columns
+ # cg.dendrogram_row.linkage # linkage matrix for rows
+ # plt.savefig(r"results/" + name + "_heatmap.png")
+ #plt.savefig(htmlresource + "/heatmap.png")
+ #if pdf == 'PDF_Yes':
+ # plt.savefig(htmlresource + "/heatmap.pdf")
+ # shutil.copyfile("heatmap.pdf",heatmapfn) #
+ #plt.legend()
+ fname = html_path+"/"+name+"_clustermap.png"
+ cg.savefig(fname)
+ if pdfExport == 'PDF_Yes':
+ fname = html_path + "/" + name + "_clustermap.pdf"
+ cg.savefig(fname)
+ #plt.show()
-
+def createHTML(name,htmlfn,htmlPath):
+ #assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource
+ htmlString = r"
- - - - - Watch - - 0 - - - - -
- - - - Star - - - 0 - - - - -
- - - - Fork - - - 0 - - - + htmlString += r'
Trypanosoma vivax Variant Antigen Profile
" + htmlString += name -
The Heat Map and Dendrogram
' + imgString = r"" + htmlString += imgString + print(htmlString) -
- - johnheap/VAPPER-Galaxy - - -
- -
-
-
-
+ binBlastResult_df = blastContigs(args[dict['name']], vivaxPath+r"/Database/Bin_2.fas")
+ binPresence_df = getCogsPresent(binBlastResult_df, args[dict['name']], vivaxPath+r"/Database/binlist.txt")
+ cogPresence_df = orthPresence_df.append(binPresence_df, ignore_index=True)
-
-
-
-
-
-
-
-
-
+ fname = args[dict['html_resource']]+r"/"+ args[dict['name']]+"_cogspresent.csv"
+ cogPresence_df.to_csv(fname)
+ current_df = addToCurrentData(cogPresence_df,args[dict['name']]) # load in Tvdatabase and add cogPresence column to it.
+ createClusterMap(current_df, args[dict['name']], args[dict['html_resource']], args[dict['pdfexport']])
+ createHTML(args[dict['name']],args[dict['html_file']],args[dict['html_resource']])
-
- Permalink
-
-
-
-
-
+if __name__ == "__main__":
+ sys.exit()
-
-
-
-
-
-
-
-
-
-
- Join GitHub today
-GitHub is home to over 36 million developers working together to host and review code, manage projects, and build software together.
- Sign up -
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- - Branch: - master -
- -
-
- Find file
-
-
- Copy path
-
-
-
- - VAPPER-Galaxy/Tryp_V.py -
- -
-
- Find file
-
-
- Copy path
-
-
-
- Fetching contributors…
-
-
-
-
- Cannot retrieve contributors at this time
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- - | """ | -
- | * Copyright 2018 University of Liverpool | -
- | * Author: John Heap, Computational Biology Facility, UoL | -
- | * Based on original scripts of Sara Silva Pereira, Institute of Infection and Global Health, UoL | -
- | * | -
- | * Licensed under the Apache License, Version 2.0 (the "License"); | -
- | * you may not use this file except in compliance with the License. | -
- | * You may obtain a copy of the License at | -
- | * | -
- | * http://www.apache.org/licenses/LICENSE-2.0 | -
- | * | -
- | * Unless required by applicable law or agreed to in writing, software | -
- | * distributed under the License is distributed on an "AS IS" BASIS, | -
- | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | -
- | * See the License for the specific language governing permissions and | -
- | * limitations under the License. | -
- | * | -
- | """ | -
- | - | -
- | import matplotlib as mpl | -
- | mpl.use('Agg') | -
- | - | -
- | import subprocess | -
- | import shutil | -
- | import re | -
- | import pandas as pd | -
- | import os | -
- | - | -
- | import sys | -
- | - | -
- | import matplotlib.pyplot as plt | -
- | from matplotlib.patches import Patch | -
- | import seaborn as sns | -
- | - | -
- | def assembleWithVelvet(name, kmers, inslen, covcut, fastq1name,fastq2name): | -
- | #argString = "velveth " + name + "_k65 65 -shortPaired -fastq " + name + "_R1.fastq " + name + "_R2.fastq" | -
- | argString = "velveth " + name + "_k"+ kmers+" "+ kmers + " -shortPaired -fastq " + fastq1name+" "+fastq2name | -
- | print(argString) | -
- | returncode = subprocess.call(argString, shell=True) | -
- | if returncode != 0: | -
- | return "Error in velveth" | -
- | argString = "velvetg " + name + "_k"+kmers+" -exp_cov auto -ins_length "+inslen+" -clean yes -ins_length_sd 50 -min_pair_count 20" | -
- | #argString = "velvetg " + name + "_k"+kmers+" -exp_cov auto -ins_length "+inslen+" -cov_cutoff "+covcut+" -clean yes -ins_length_sd 50 -min_pair_count 20" | -
- | #argString = "velvetg " + name + "_k65 -exp_cov auto -ins_length 400 -cov_cutoff 5 -clean yes -ins_length_sd 50 -min_pair_count 20"+quietString | -
- | print(argString) | -
- | returncode = subprocess.call(argString, shell = True) | -
- | if returncode != 0: | -
- | return "Error in velvetg" | -
- | shutil.copyfile(name + "_k"+kmers+"//contigs.fa",name + ".fa") # my $namechange = "mv ".$input."_k65/contigs.fa ".$input.".fa"; | -
- | return "ok" | -
- | - | -
- | - | -
- | def blastContigs(test_name,database): | -
- | print(test_name) | -
- | print(database) | -
- | #db_path = os.path.dirname(os.path.realpath(__file__))+database | -
- | db_path = database | -
- | argString = "blastn -db "+db_path+" -query "+test_name+".fa -outfmt 10 -out "+test_name+"_blast.txt" | -
- | print(argString) | -
- | returncode = subprocess.call(argString, shell = True) | -
- | if returncode != 0: | -
- | return "Error in blastall" | -
- | blast_df = pd.read_csv(""+test_name+"_blast.txt") | -
- | #print (blast_df) | -
- | #if ($temp[2] >= 98 & & $temp[3] > 100 & & $temp[10] < 0.001){ | -
- | #'qaccver saccver pident length mismatch gapopen qstart qend sstart send evalue bitscore' | -
- | blast_df.columns = ['qaccver', 'saccver', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue','bitscore'] | -
- | blastResult_df = blast_df[(blast_df['pident']>=98) & (blast_df['length'] > 100) & (blast_df['evalue']<0.001) ] | -
- | blastResult_df = blastResult_df[['qaccver', 'saccver', 'pident']] #query accession.version, subject accession.version, Percentage of identical matches | -
- | - | -
- | return blastResult_df | -
- | - | -
- | - | -
- | - | -
- | def getCogsPresent(blastResult_df,strain,cogOrBinList): | -
- | blastResult_df.sort_values('pident',axis = 0, ascending=False, inplace=True) | -
- | nodeList = blastResult_df['qaccver'].tolist() | -
- | cogList = blastResult_df['saccver'].tolist() | -
- | cogSet = set(cogList) #get unique values | -
- | cogList = sorted(cogSet) #sort them | -
- | - | -
- | #print (cogList) | -
- | #print (len(cogList)) | -
- | - | -
- | thereList = [] | -
- | dataList = [] | -
- | #dir_path = os.path.dirname(os.path.realpath(__file__)) | -
- | fname = cogOrBinList | -
- | cnt = 0 | -
- | with open (fname) as f: | -
- | for line in f: | -
- | dataList.append(line.rstrip('\n\r ')) | -
- | if line.rstrip('\n\r ') in cogList: | -
- | thereList.append('1') | -
- | cnt = cnt+1 | -
- | else: | -
- | thereList.append('0') | -
- | - | -
- | #print (thereList) | -
- | #print (cnt) | -
- | data = {'Cog': dataList, strain: thereList} | -
- | presence_df = pd.DataFrame(data) | -
- | #print (presence_df) | -
- | return presence_df | -
- | - | -
- | def addToCurrentData(cog_df, name): | -
- | dir_path = os.path.dirname(os.path.realpath(__file__)) | -
- | j_fname = dir_path + r"/data/vivax/TvDatabase.csv" | -
- | tv_df = pd.read_csv(j_fname) | -
- | - | -
- | cogList = cog_df[name].tolist() | -
- | #cogList.insert(0,'Test') | -
- | #print (len(tv_df)) | -
- | #print(len(cogList)) | -
- | - | -
- | #print(cogList) | -
- | tv_df.loc[:,name]=cogList | -
- | return tv_df | -
- | - | -
- | - | -
- | - | -
- | - | -
- | def createClusterMap(tv_df,name,html_path,pdfExport): | -
- | #Retrieve Data | -
- | dir_path = os.path.dirname(os.path.realpath(__file__)) | -
- | j_fname = dir_path+r"/data/vivax/geoTv.csv" | -
- | geo_df = pd.read_csv(j_fname) | -
- | geo_df.loc[len(geo_df)] =[name,name,'k'] | -
- | print(geo_df) | -
- | - | -
- | myStrains = tv_df.columns.values.tolist() #beware first entry is COG | -
- | myStrains = myStrains[1:] | -
- | print(myStrains) | -
- | myPal = [] | -
- | for s in myStrains: | -
- | col = geo_df[(geo_df['Strain'] == s)]['colour'].tolist() | -
- | myPal.append(col[0]) | -
- | print(myPal) | -
- | mycogmap = ['skyblue', 'orangered'] # blue absent,red present | -
- | tv_df.set_index('COG', inplace=True) | -
- | tv_df = tv_df[tv_df.columns].astype(float) | -
- | - | -
- | cg = sns.clustermap(tv_df, method='ward', col_colors=myPal, cmap=mycogmap, yticklabels = 1500, row_cluster=False, linewidths = 0) | -
- | #cg = sns.clustermap(tv_df, method='ward', row_cluster=False, linewidths = 0) | -
- | ax = cg.ax_heatmap | -
- | #xasix ticks and labels. | -
- | ax.xaxis.tick_top() #set ticks at top | -
- | newlabs = [] | -
- | - | -
- | labs = ax.xaxis.get_ticklabels() | -
- | for i in range(0, len(labs)): | -
- | print(labs[i]) | -
- | # labs[i].set_text(" "+labs[i].get_text()) #make enough room so label sits atop of col_color bars | -
- | newlabs.append(" " + labs[i].get_text()) | -
- | ax.xaxis.set_ticklabels(newlabs) | -
- | - | -
- | #labs = ax.xaxis.get_ticklabels() | -
- | #for i in range(0, len(labs)): | -
- | # print(labs[i]) | -
- | # labs[i].set_text(" "+labs[i].get_text()) #make enough room so label sits atop of col_color bars | -
- | # print(labs[i]) | -
- | #ax.xaxis.set_ticklabels(labs) | -
- | plt.setp(cg.ax_heatmap.xaxis.get_ticklabels(), rotation=90) # get x labels printed vertically | -
- | - | -
- | cg.cax.set_visible(False) | -
- | ax = cg.ax_heatmap | -
- | ax.set_yticklabels("") | -
- | ax.set_ylabel("") | -
- | ax = cg.ax_heatmap | -
- | # ax.set_title("Variant antigen profiles of T. vivax genomes.\nDendrogram reflects the VSG repertoire relationships of each strain inferred by the presence and absence of non-universal T. vivax VSG orthologs.", va = "top", wrap = "True") | -
- | b = len(tv_df) | -
- | print(b) | -
- | title = "Figure Legend: The Variant Antigen Profiles of $\itTrypanosoma$ $\itvivax$ " \ | -
- | "showing the \ncombination of present and absent diagnostic cluster of VSG orthologs " \ | -
- | "across the sample cohort. \nDendrogram reflects the relationships amongst the VSG" \ | -
- | " repertoires of each strain. " \ | -
- | "Strains were isolated \nfrom multiple African countries as shown in the key.\nData was produced with the " \ | -
- | "'Variant Antigen Profiler' (Silva Pereira et al., 2019)." | -
- | - | -
- | ax.text(-1.5, len(tv_df) + 8, | -
- | title, | -
- | ha="left", va="top", wrap="True") | -
- | col = cg.ax_col_dendrogram.get_position() | -
- | cg.ax_col_dendrogram.set_position([col.x0, col.y0*1.08, col.width, col.height*1.1]) | -
- | - | -
- | countryList = pd.unique(geo_df['Location']) | -
- | colourList = pd.unique(geo_df['colour']) | -
- | legend_elements = [Patch(facecolor='orangered', label='COG Present'), | -
- | Patch(facecolor='skyblue', label='COG Absent')] | -
- | - | -
- | for i in range(0, len(colourList)): | -
- | print("country = %s, colour = %s" % (countryList[i], colourList[i])) | -
- | p = Patch(facecolor=str(colourList[i]), label=countryList[i]) | -
- | legend_elements.append(p) | -
- | - | -
- | ax.legend(handles = legend_elements, bbox_to_anchor=(-0.3,1.2),loc = 'upper left') | -
- | - | -
- | - | -
- | - | -
- | - | -
- | - | -
- | - | -
- | - | -
- | #plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0 ) # get y labels printed horizontally | -
- | # cg.dendrogram_col.linkage # linkage matrix for columns | -
- | # cg.dendrogram_row.linkage # linkage matrix for rows | -
- | # plt.savefig(r"results/" + name + "_heatmap.png") | -
- | #plt.savefig(htmlresource + "/heatmap.png") | -
- | #if pdf == 'PDF_Yes': | -
- | # plt.savefig(htmlresource + "/heatmap.pdf") | -
- | # shutil.copyfile("heatmap.pdf",heatmapfn) # | -
- | #plt.legend() | -
- | fname = html_path+"/"+name+"_clustermap.png" | -
- | cg.savefig(fname) | -
- | if pdfExport == 'PDF_Yes': | -
- | fname = html_path + "/" + name + "_clustermap.pdf" | -
- | cg.savefig(fname) | -
- | #plt.show() | -
- | - | -
- | def createHTML(name,htmlfn,htmlPath): | -
- | #assumes imgs are heatmap.png, dheatmap.png, vapPCA.png and already in htmlresource | -
- | htmlString = r"<html><title>T.vivax VAP</title><body><div style='text-align:center'><h2><i>Trypanosoma vivax</i> Variant Antigen Profile</h2><h3>" | -
- | htmlString += name | -
- | - | -
- | - | -
- | htmlString += r'<p> <h3>The Heat Map and Dendrogram</h3></p>' | -
- | imgString = r"<img src = '"+name+"_clustermap.png' alt='Cog Clustermap' style='max-width:100%'><br><br>" | -
- | htmlString += imgString | -
- | print(htmlString) | -
- | - | -
- | with open(htmlfn, "w") as htmlfile: | -
- | htmlfile.write(htmlString) | -
- | - | -
- | - | -
- | def vivax_assemble(args,dict): | -
- | #argdict = {'name': 2, 'pdfexport': 3, 'kmers': 4, 'inslen': 5, 'covcut': 6, 'forward': 7, 'reverse': 8, 'html_file': 9,'html_resource': 10} | -
- | #assembleWithVelvet("V2_Test", '65', '400', '5', "data/TviBrRp.1.clean", "data/TviBrRp.2.clean") | -
- | - | -
- | vivaxPath = os.path.dirname(os.path.realpath(__file__))+r"/data/vivax" | -
- | assembleWithVelvet(args[dict['name']], args[dict['kmers']], args[dict['inslen']], args[dict['covcut']], | -
- | args[dict['forward']], args[dict['reverse']]) | -
- | blastResult_df = blastContigs(args[dict['name']], vivaxPath+r"/Database/COGs.fas") | -
- | orthPresence_df = getCogsPresent(blastResult_df, args[dict['name']], vivaxPath+r"/Database/COGlist.txt") | -
- | - | -
- | binBlastResult_df = blastContigs(args[dict['name']], vivaxPath+r"/Database/Bin_2.fas") | -
- | binPresence_df = getCogsPresent(binBlastResult_df, args[dict['name']], vivaxPath+r"/Database/binlist.txt") | -
- | cogPresence_df = orthPresence_df.append(binPresence_df, ignore_index=True) | -
- | - | -
- | fname = args[dict['html_resource']] +args[dict['name']]+"_cogspresent.csv" | -
- | cogPresence_df.to_csv(fname) | -
- | current_df = addToCurrentData(cogPresence_df,args[dict['name']]) # load in Tvdatabase and add cogPresence column to it. | -
- | createClusterMap(current_df, args[dict['name']],args[dict['html_resource']],args[dict['pdfexport']]) | -
- | createHTML(args[dict['name']],args[dict['html_file']],args[dict['html_resource']]) | -
- | - | -
- | def test_cluster(args,dict): | -
- | print ("name: %s",args[dict['name']]) | -
- | cogPresence_df = pd.read_csv("test_cogspresent.csv") | -
- | print(cogPresence_df) | -
- | current_df = addToCurrentData(cogPresence_df,args[dict['name']]) # load in Tvdatabase and add cogPresence column to it. | -
- | createClusterMap(current_df, args[dict['name']], args[dict['html_resource']], args[dict['pdfexport']]) | -
- | - | -
- | def vivax_contigs(args,dict): | -
- | # argdict = {'name': 2, 'pdfexport': 3, 'contigs': 4, 'html_file': 5, 'html_resource': 6} | -
- | - | -
- | #test_cluster(args,dict) | -
- | #return; | -
- | - | -
- | #subprocess.call('echo $PATH',shell = True) | -
- | #sys.exit(1) | -
- | - | -
- | vivaxPath = os.path.dirname(os.path.realpath(__file__))+r"/data/vivax" | -
- | shutil.copyfile(args[dict['contigs']], args[dict['name']]+".fa") | -
- | blastResult_df = blastContigs(args[dict['name']], vivaxPath+r"/Database/COGs.fas") | -
- | orthPresence_df = getCogsPresent(blastResult_df, args[dict['name']], vivaxPath+r"/Database/COGlist.txt") | -
- | - | -
- | binBlastResult_df = blastContigs(args[dict['name']], vivaxPath+r"/Database/Bin_2.fas") | -
- | binPresence_df = getCogsPresent(binBlastResult_df, args[dict['name']], vivaxPath+r"/Database/binlist.txt") | -
- | cogPresence_df = orthPresence_df.append(binPresence_df, ignore_index=True) | -
- | - | -
- | fname = args[dict['html_resource']]+r"/"+ args[dict['name']]+"_cogspresent.csv" | -
- | cogPresence_df.to_csv(fname) | -
- | current_df = addToCurrentData(cogPresence_df,args[dict['name']]) # load in Tvdatabase and add cogPresence column to it. | -
- | createClusterMap(current_df, args[dict['name']], args[dict['html_resource']], args[dict['pdfexport']]) | -
- | createHTML(args[dict['name']],args[dict['html_file']],args[dict['html_resource']]) | -
- | - | -
- | - | -
- | if __name__ == "__main__": | -
- | sys.exit() | -
- | - | -
- | - | -
- | - | -
- | - | -
-
-
-
-
- - -
--
-
Copy lines
- Copy permalink
- - View git blame -
- Reference in new issue -
-
-
-
-
-
-
-