comparison Tryp_G.py @ 3:4432e4183ebd draft

planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
author johnheap
date Wed, 11 Jul 2018 08:58:14 -0400
parents 36cb22bd911d
children e91e41380946
comparison
equal deleted inserted replaced
2:82770f07a036 3:4432e4183ebd
54 54
55 def contigTranslation(name): 55 def contigTranslation(name):
56 argString = "transeq " + name + ".fa " + name + "_6frame.fas -frame=6 " #+quietString 56 argString = "transeq " + name + ".fa " + name + "_6frame.fas -frame=6 " #+quietString
57 print(argString) 57 print(argString)
58 returncode = subprocess.call(argString, shell=True) 58 returncode = subprocess.call(argString, shell=True)
59 #subprocess.call('ls -l *.fa', shell = True)
60 #sys.exit(1)
61 #if returncode != 0:
62 # return "Error in Transeq"
63 #return 'ok'
64 59
65 60
66 def HMMerMotifSearch(name): 61 def HMMerMotifSearch(name):
67 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b', 62 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b',
68 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c'] 63 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c']
121 countList.append(totalCount) 116 countList.append(totalCount)
122 #print(countList) 117 #print(countList)
123 #print("--------") 118 #print("--------")
124 return countList 119 return countList
125 120
126 """
127 def HMMerMotifSearch(name):
128 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b',
129 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c']
130 lineCounts = []
131 compoundList = []
132 dir_path = os.path.dirname(os.path.realpath(__file__))
133 phylopath = dir_path+"/data/Motifs/Phylotype"
134 for m in motifs:
135 argString = "hmmsearch "+phylopath + m + ".hmm " + name + "_6frame.fas > Phy" + m + ".out" #+quietString
136 #argString = "hmmsearch "+phylopath + m + ".hmm " + dir_path+"/data/Test_6frame.fas > Phy" + m + ".out"
137 print(argString)
138 subprocess.call(argString, shell=True)
139
140 hmmResult = open("Phy" + m + ".out", 'r')
141 tempout = open(dir_path+"/data/"+"Phy" + m + ".txt", 'w')
142 regex = r"NODE_[0-9]{1,7}_length_[0-9]{1,7}_cov_[0-9]{1,10}.[0-9]{1,7}_[0-9]{1,2}"
143 n = 0
144 outList = []
145 for line in hmmResult:
146 m = re.search(regex, line)
147 if m:
148 tempout.write(m.group() + "\n")
149 outList.append(""+m.group()+"\n")
150 n += 1
151 if re.search(r"inclusion", line):
152 print("inclusion threshold reached")
153 break
154 compoundList.append(outList)
155 lineCounts.append(n)
156 hmmResult.close()
157 #tempout.close()
158 print(lineCounts)
159 motifGroups = [['1'], ['2a', '2b'], ['3'], ['4a', '4b', '4c'], ['5'], ['6'], ['7'], ['8a', '8b'], ['9a', '9b',
160 '9c'],
161 ['10a', '10b'], ['11a', '11b'], ['12'], ['13a', '13b', '13c', '13d'], ['14'], ['15a', '15b', '15c']]
162 concatGroups = [1, 2, 1, 3, 1, 1, 1, 2, 3, 2, 2, 1, 4, 1, 3]
163 countList = []
164 countIndex = 0
165 totalCount = 0
166
167 for c in concatGroups:
168 a = []
169 for n in range(0, c):
170 a = a + compoundList.pop(0)
171 t = set(a)
172 countList.append(len(t))
173 totalCount += len(t)
174 countList.append(totalCount)
175 print(countList)
176 print("--------")
177 return countList
178 """
179
180 121
181 122
182 def relativeFrequencyTable(countList, name, htmlresource): 123 def relativeFrequencyTable(countList, name, htmlresource):
183 relFreqList = [] 124 relFreqList = []
184 c = float(countList[15]) 125 c = float(countList[15])
221 j_fname = dir_path+"/data/congodata.csv" 162 j_fname = dir_path+"/data/congodata.csv"
222 #print(dir_path) 163 #print(dir_path)
223 congo_df = pd.read_csv(j_fname) 164 congo_df = pd.read_csv(j_fname)
224 congo_df.drop('Colour', axis=1, inplace=True) 165 congo_df.drop('Colour', axis=1, inplace=True)
225 congo_df.loc[congo_df.index.max() + 1] = localFreqList 166 congo_df.loc[congo_df.index.max() + 1] = localFreqList
167 ysize = len(congo_df) * 20 / 97.0 # make vertical size equivlanet 20' is ok for 97.
168
226 congo_df.set_index('Strain', inplace=True) 169 congo_df.set_index('Strain', inplace=True)
227 170
228 cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values) 171 cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize))
229 plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally 172 plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally
230 ax=cg.ax_heatmap 173 ax=cg.ax_heatmap
231 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ estimated as the phylotype proportion across the\nsample cohort. " 174 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ estimated as the phylotype proportion across the\nsample cohort. "
232 title += "Dendrogram reflects the relationships amongst the VSG repertoires of each strain. " 175 title += "Dendrogram reflects the relationships amongst the VSG repertoires of each strain. "
233 title += "Strains\nwere isolated from multiple African countries as described in Silva Pereira et al. (2018)." 176 title += "Strains\nwere isolated from multiple African countries as described in Silva Pereira et al. (2018)."
257 j_fname = dir_path+ "/data/congodata_deviationfromthemean.csv" 200 j_fname = dir_path+ "/data/congodata_deviationfromthemean.csv"
258 #j_fname = r"data/congodata_deviationfromthemean.csv" 201 #j_fname = r"data/congodata_deviationfromthemean.csv"
259 congo_df = pd.read_csv(j_fname) 202 congo_df = pd.read_csv(j_fname)
260 congo_df.drop('Colour', axis=1, inplace=True) 203 congo_df.drop('Colour', axis=1, inplace=True)
261 congo_df.loc[congo_df.index.max() + 1] = localDevList 204 congo_df.loc[congo_df.index.max() + 1] = localDevList
205 ysize = len(congo_df) * 20 / 97.0 # make vertical size equivlanet 20' is ok for 97.
262 congo_df.set_index('Strain', inplace=True) 206 congo_df.set_index('Strain', inplace=True)
263 cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values) 207 cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize))
264 plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally 208 plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally
265 ax = cg.ax_heatmap 209 ax = cg.ax_heatmap
266 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ expressed as the deviation from the mean phylotypes " 210 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ expressed as the deviation from the mean phylotypes "
267 title +="\nproportions of the sample cohort. Dendrogram reflects the relationships amongst the VSG repertoires of " 211 title +="\nproportions of the sample cohort. Dendrogram reflects the relationships amongst the VSG repertoires of "
268 title +="each \nstrain. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018)." 212 title +="each \nstrain. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018)."
304 for item in pcaResult.Y: 248 for item in pcaResult.Y:
305 col = myCountries.index(myColours[i]) 249 col = myCountries.index(myColours[i])
306 compoundList[col].append(-item[0]) 250 compoundList[col].append(-item[0])
307 compoundList[col].append(item[1]) 251 compoundList[col].append(item[1])
308 i = i + 1 252 i = i + 1
309 cols = ['r', 'g', 'b', 'c', 'm', 'y', 'grey', 'k'] 253 colormap = plt.cm.tab20 # nipy_spectral, Set1,Paired
310 254 cols = [colormap(i) for i in np.linspace(0, 1, 20)]
311 fig, ax = plt.subplots(figsize=(9, 6)) 255 fig, ax = plt.subplots(figsize=(9, 6))
312 #plt.figure(num=1,figsize=(12, 6)) 256 #plt.figure(num=1,figsize=(12, 6))
313 i = 0 257 i = 0
314 for d in myCountries: 258 for d in myCountries:
315 a = compoundList[i] 259 a = compoundList[i]