Mercurial > repos > johnheap > vapper
diff Tryp_G.py @ 3:4432e4183ebd draft
planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
author | johnheap |
---|---|
date | Wed, 11 Jul 2018 08:58:14 -0400 |
parents | 36cb22bd911d |
children | e91e41380946 |
line wrap: on
line diff
--- a/Tryp_G.py Tue Jul 10 09:22:53 2018 -0400 +++ b/Tryp_G.py Wed Jul 11 08:58:14 2018 -0400 @@ -56,11 +56,6 @@ argString = "transeq " + name + ".fa " + name + "_6frame.fas -frame=6 " #+quietString print(argString) returncode = subprocess.call(argString, shell=True) - #subprocess.call('ls -l *.fa', shell = True) - #sys.exit(1) - #if returncode != 0: - # return "Error in Transeq" - #return 'ok' def HMMerMotifSearch(name): @@ -123,60 +118,6 @@ #print("--------") return countList -""" -def HMMerMotifSearch(name): - motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b', - '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c'] - lineCounts = [] - compoundList = [] - dir_path = os.path.dirname(os.path.realpath(__file__)) - phylopath = dir_path+"/data/Motifs/Phylotype" - for m in motifs: - argString = "hmmsearch "+phylopath + m + ".hmm " + name + "_6frame.fas > Phy" + m + ".out" #+quietString - #argString = "hmmsearch "+phylopath + m + ".hmm " + dir_path+"/data/Test_6frame.fas > Phy" + m + ".out" - print(argString) - subprocess.call(argString, shell=True) - - hmmResult = open("Phy" + m + ".out", 'r') - tempout = open(dir_path+"/data/"+"Phy" + m + ".txt", 'w') - regex = r"NODE_[0-9]{1,7}_length_[0-9]{1,7}_cov_[0-9]{1,10}.[0-9]{1,7}_[0-9]{1,2}" - n = 0 - outList = [] - for line in hmmResult: - m = re.search(regex, line) - if m: - tempout.write(m.group() + "\n") - outList.append(""+m.group()+"\n") - n += 1 - if re.search(r"inclusion", line): - print("inclusion threshold reached") - break - compoundList.append(outList) - lineCounts.append(n) - hmmResult.close() - #tempout.close() - print(lineCounts) - motifGroups = [['1'], ['2a', '2b'], ['3'], ['4a', '4b', '4c'], ['5'], ['6'], ['7'], ['8a', '8b'], ['9a', '9b', - '9c'], - ['10a', '10b'], ['11a', '11b'], ['12'], ['13a', '13b', '13c', '13d'], ['14'], ['15a', '15b', '15c']] - concatGroups = [1, 2, 1, 3, 1, 1, 1, 2, 3, 2, 2, 1, 4, 1, 3] - countList = [] - countIndex = 0 - totalCount = 0 - - for c in concatGroups: - a = [] - for n in range(0, c): - a = a + compoundList.pop(0) - t = set(a) - countList.append(len(t)) - totalCount += len(t) - countList.append(totalCount) - print(countList) - print("--------") - return countList -""" - def relativeFrequencyTable(countList, name, htmlresource): @@ -223,9 +164,11 @@ congo_df = pd.read_csv(j_fname) congo_df.drop('Colour', axis=1, inplace=True) congo_df.loc[congo_df.index.max() + 1] = localFreqList + ysize = len(congo_df) * 20 / 97.0 # make vertical size equivlanet 20' is ok for 97. + congo_df.set_index('Strain', inplace=True) - cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values) + cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize)) plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally ax=cg.ax_heatmap title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ estimated as the phylotype proportion across the\nsample cohort. " @@ -259,8 +202,9 @@ congo_df = pd.read_csv(j_fname) congo_df.drop('Colour', axis=1, inplace=True) congo_df.loc[congo_df.index.max() + 1] = localDevList + ysize = len(congo_df) * 20 / 97.0 # make vertical size equivlanet 20' is ok for 97. congo_df.set_index('Strain', inplace=True) - cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values) + cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize)) plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally ax = cg.ax_heatmap title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ expressed as the deviation from the mean phylotypes " @@ -306,8 +250,8 @@ compoundList[col].append(-item[0]) compoundList[col].append(item[1]) i = i + 1 - cols = ['r', 'g', 'b', 'c', 'm', 'y', 'grey', 'k'] - + colormap = plt.cm.tab20 # nipy_spectral, Set1,Paired + cols = [colormap(i) for i in np.linspace(0, 1, 20)] fig, ax = plt.subplots(figsize=(9, 6)) #plt.figure(num=1,figsize=(12, 6)) i = 0