Mercurial > repos > johnheap > vapper
comparison Tryp_G.py @ 3:4432e4183ebd draft
planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
author | johnheap |
---|---|
date | Wed, 11 Jul 2018 08:58:14 -0400 |
parents | 36cb22bd911d |
children | e91e41380946 |
comparison
equal
deleted
inserted
replaced
2:82770f07a036 | 3:4432e4183ebd |
---|---|
54 | 54 |
55 def contigTranslation(name): | 55 def contigTranslation(name): |
56 argString = "transeq " + name + ".fa " + name + "_6frame.fas -frame=6 " #+quietString | 56 argString = "transeq " + name + ".fa " + name + "_6frame.fas -frame=6 " #+quietString |
57 print(argString) | 57 print(argString) |
58 returncode = subprocess.call(argString, shell=True) | 58 returncode = subprocess.call(argString, shell=True) |
59 #subprocess.call('ls -l *.fa', shell = True) | |
60 #sys.exit(1) | |
61 #if returncode != 0: | |
62 # return "Error in Transeq" | |
63 #return 'ok' | |
64 | 59 |
65 | 60 |
66 def HMMerMotifSearch(name): | 61 def HMMerMotifSearch(name): |
67 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b', | 62 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b', |
68 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c'] | 63 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c'] |
121 countList.append(totalCount) | 116 countList.append(totalCount) |
122 #print(countList) | 117 #print(countList) |
123 #print("--------") | 118 #print("--------") |
124 return countList | 119 return countList |
125 | 120 |
126 """ | |
127 def HMMerMotifSearch(name): | |
128 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b', | |
129 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c'] | |
130 lineCounts = [] | |
131 compoundList = [] | |
132 dir_path = os.path.dirname(os.path.realpath(__file__)) | |
133 phylopath = dir_path+"/data/Motifs/Phylotype" | |
134 for m in motifs: | |
135 argString = "hmmsearch "+phylopath + m + ".hmm " + name + "_6frame.fas > Phy" + m + ".out" #+quietString | |
136 #argString = "hmmsearch "+phylopath + m + ".hmm " + dir_path+"/data/Test_6frame.fas > Phy" + m + ".out" | |
137 print(argString) | |
138 subprocess.call(argString, shell=True) | |
139 | |
140 hmmResult = open("Phy" + m + ".out", 'r') | |
141 tempout = open(dir_path+"/data/"+"Phy" + m + ".txt", 'w') | |
142 regex = r"NODE_[0-9]{1,7}_length_[0-9]{1,7}_cov_[0-9]{1,10}.[0-9]{1,7}_[0-9]{1,2}" | |
143 n = 0 | |
144 outList = [] | |
145 for line in hmmResult: | |
146 m = re.search(regex, line) | |
147 if m: | |
148 tempout.write(m.group() + "\n") | |
149 outList.append(""+m.group()+"\n") | |
150 n += 1 | |
151 if re.search(r"inclusion", line): | |
152 print("inclusion threshold reached") | |
153 break | |
154 compoundList.append(outList) | |
155 lineCounts.append(n) | |
156 hmmResult.close() | |
157 #tempout.close() | |
158 print(lineCounts) | |
159 motifGroups = [['1'], ['2a', '2b'], ['3'], ['4a', '4b', '4c'], ['5'], ['6'], ['7'], ['8a', '8b'], ['9a', '9b', | |
160 '9c'], | |
161 ['10a', '10b'], ['11a', '11b'], ['12'], ['13a', '13b', '13c', '13d'], ['14'], ['15a', '15b', '15c']] | |
162 concatGroups = [1, 2, 1, 3, 1, 1, 1, 2, 3, 2, 2, 1, 4, 1, 3] | |
163 countList = [] | |
164 countIndex = 0 | |
165 totalCount = 0 | |
166 | |
167 for c in concatGroups: | |
168 a = [] | |
169 for n in range(0, c): | |
170 a = a + compoundList.pop(0) | |
171 t = set(a) | |
172 countList.append(len(t)) | |
173 totalCount += len(t) | |
174 countList.append(totalCount) | |
175 print(countList) | |
176 print("--------") | |
177 return countList | |
178 """ | |
179 | |
180 | 121 |
181 | 122 |
182 def relativeFrequencyTable(countList, name, htmlresource): | 123 def relativeFrequencyTable(countList, name, htmlresource): |
183 relFreqList = [] | 124 relFreqList = [] |
184 c = float(countList[15]) | 125 c = float(countList[15]) |
221 j_fname = dir_path+"/data/congodata.csv" | 162 j_fname = dir_path+"/data/congodata.csv" |
222 #print(dir_path) | 163 #print(dir_path) |
223 congo_df = pd.read_csv(j_fname) | 164 congo_df = pd.read_csv(j_fname) |
224 congo_df.drop('Colour', axis=1, inplace=True) | 165 congo_df.drop('Colour', axis=1, inplace=True) |
225 congo_df.loc[congo_df.index.max() + 1] = localFreqList | 166 congo_df.loc[congo_df.index.max() + 1] = localFreqList |
167 ysize = len(congo_df) * 20 / 97.0 # make vertical size equivlanet 20' is ok for 97. | |
168 | |
226 congo_df.set_index('Strain', inplace=True) | 169 congo_df.set_index('Strain', inplace=True) |
227 | 170 |
228 cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values) | 171 cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize)) |
229 plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally | 172 plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally |
230 ax=cg.ax_heatmap | 173 ax=cg.ax_heatmap |
231 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ estimated as the phylotype proportion across the\nsample cohort. " | 174 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ estimated as the phylotype proportion across the\nsample cohort. " |
232 title += "Dendrogram reflects the relationships amongst the VSG repertoires of each strain. " | 175 title += "Dendrogram reflects the relationships amongst the VSG repertoires of each strain. " |
233 title += "Strains\nwere isolated from multiple African countries as described in Silva Pereira et al. (2018)." | 176 title += "Strains\nwere isolated from multiple African countries as described in Silva Pereira et al. (2018)." |
257 j_fname = dir_path+ "/data/congodata_deviationfromthemean.csv" | 200 j_fname = dir_path+ "/data/congodata_deviationfromthemean.csv" |
258 #j_fname = r"data/congodata_deviationfromthemean.csv" | 201 #j_fname = r"data/congodata_deviationfromthemean.csv" |
259 congo_df = pd.read_csv(j_fname) | 202 congo_df = pd.read_csv(j_fname) |
260 congo_df.drop('Colour', axis=1, inplace=True) | 203 congo_df.drop('Colour', axis=1, inplace=True) |
261 congo_df.loc[congo_df.index.max() + 1] = localDevList | 204 congo_df.loc[congo_df.index.max() + 1] = localDevList |
205 ysize = len(congo_df) * 20 / 97.0 # make vertical size equivlanet 20' is ok for 97. | |
262 congo_df.set_index('Strain', inplace=True) | 206 congo_df.set_index('Strain', inplace=True) |
263 cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values) | 207 cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize)) |
264 plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally | 208 plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally |
265 ax = cg.ax_heatmap | 209 ax = cg.ax_heatmap |
266 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ expressed as the deviation from the mean phylotypes " | 210 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ expressed as the deviation from the mean phylotypes " |
267 title +="\nproportions of the sample cohort. Dendrogram reflects the relationships amongst the VSG repertoires of " | 211 title +="\nproportions of the sample cohort. Dendrogram reflects the relationships amongst the VSG repertoires of " |
268 title +="each \nstrain. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018)." | 212 title +="each \nstrain. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018)." |
304 for item in pcaResult.Y: | 248 for item in pcaResult.Y: |
305 col = myCountries.index(myColours[i]) | 249 col = myCountries.index(myColours[i]) |
306 compoundList[col].append(-item[0]) | 250 compoundList[col].append(-item[0]) |
307 compoundList[col].append(item[1]) | 251 compoundList[col].append(item[1]) |
308 i = i + 1 | 252 i = i + 1 |
309 cols = ['r', 'g', 'b', 'c', 'm', 'y', 'grey', 'k'] | 253 colormap = plt.cm.tab20 # nipy_spectral, Set1,Paired |
310 | 254 cols = [colormap(i) for i in np.linspace(0, 1, 20)] |
311 fig, ax = plt.subplots(figsize=(9, 6)) | 255 fig, ax = plt.subplots(figsize=(9, 6)) |
312 #plt.figure(num=1,figsize=(12, 6)) | 256 #plt.figure(num=1,figsize=(12, 6)) |
313 i = 0 | 257 i = 0 |
314 for d in myCountries: | 258 for d in myCountries: |
315 a = compoundList[i] | 259 a = compoundList[i] |