diff Tryp_G.py @ 3:4432e4183ebd draft

planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
author johnheap
date Wed, 11 Jul 2018 08:58:14 -0400
parents 36cb22bd911d
children e91e41380946
line wrap: on
line diff
--- a/Tryp_G.py	Tue Jul 10 09:22:53 2018 -0400
+++ b/Tryp_G.py	Wed Jul 11 08:58:14 2018 -0400
@@ -56,11 +56,6 @@
     argString = "transeq " + name + ".fa " + name + "_6frame.fas -frame=6 " #+quietString
     print(argString)
     returncode = subprocess.call(argString, shell=True)
-    #subprocess.call('ls -l *.fa', shell = True)
-    #sys.exit(1)
-    #if returncode != 0:
-    #    return "Error in Transeq"
-    #return 'ok'
 
 
 def HMMerMotifSearch(name):
@@ -123,60 +118,6 @@
     #print("--------")
     return countList
 
-"""
-def HMMerMotifSearch(name):
-    motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b',
-              '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c']
-    lineCounts = []
-    compoundList = []
-    dir_path = os.path.dirname(os.path.realpath(__file__))
-    phylopath = dir_path+"/data/Motifs/Phylotype"
-    for m in motifs:
-        argString = "hmmsearch "+phylopath + m + ".hmm " + name + "_6frame.fas > Phy" + m + ".out"  #+quietString
-        #argString = "hmmsearch "+phylopath + m + ".hmm " + dir_path+"/data/Test_6frame.fas > Phy" + m + ".out"
-        print(argString)
-        subprocess.call(argString, shell=True)
-
-        hmmResult = open("Phy" + m + ".out", 'r')
-        tempout = open(dir_path+"/data/"+"Phy" + m + ".txt", 'w')
-        regex = r"NODE_[0-9]{1,7}_length_[0-9]{1,7}_cov_[0-9]{1,10}.[0-9]{1,7}_[0-9]{1,2}"
-        n = 0
-        outList = []
-        for line in hmmResult:
-            m = re.search(regex, line)
-            if m:
-                tempout.write(m.group() + "\n")
-                outList.append(""+m.group()+"\n")
-                n += 1
-            if re.search(r"inclusion", line):
-                print("inclusion threshold reached")
-                break
-        compoundList.append(outList)
-        lineCounts.append(n)
-        hmmResult.close()
-        #tempout.close()
-    print(lineCounts)
-    motifGroups = [['1'], ['2a', '2b'], ['3'], ['4a', '4b', '4c'], ['5'], ['6'], ['7'], ['8a', '8b'], ['9a', '9b',
-                                                                                                   '9c'],
-               ['10a', '10b'], ['11a', '11b'], ['12'], ['13a', '13b', '13c', '13d'], ['14'], ['15a', '15b', '15c']]
-    concatGroups = [1, 2, 1, 3, 1, 1, 1, 2, 3, 2, 2, 1, 4, 1, 3]
-    countList = []
-    countIndex = 0
-    totalCount = 0
-
-    for c in concatGroups:
-        a = []
-        for n in range(0, c):
-            a = a + compoundList.pop(0)
-        t = set(a)
-        countList.append(len(t))
-        totalCount += len(t)
-    countList.append(totalCount)
-    print(countList)
-    print("--------")
-    return countList
-"""
-
 
 
 def relativeFrequencyTable(countList, name, htmlresource):
@@ -223,9 +164,11 @@
     congo_df = pd.read_csv(j_fname)
     congo_df.drop('Colour', axis=1, inplace=True)
     congo_df.loc[congo_df.index.max() + 1] = localFreqList
+    ysize = len(congo_df) * 20 / 97.0  # make vertical size equivlanet 20' is ok for 97.
+
     congo_df.set_index('Strain', inplace=True)
 
-    cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values)
+    cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize))
     plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, fontsize=8)  # get y labels printed horizontally
     ax=cg.ax_heatmap
     title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ estimated as the phylotype proportion across the\nsample cohort. "
@@ -259,8 +202,9 @@
     congo_df = pd.read_csv(j_fname)
     congo_df.drop('Colour', axis=1, inplace=True)
     congo_df.loc[congo_df.index.max() + 1] = localDevList
+    ysize = len(congo_df) * 20 / 97.0  # make vertical size equivlanet 20' is ok for 97.
     congo_df.set_index('Strain', inplace=True)
-    cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values)
+    cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize))
     plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8)  # get y labels printed horizontally
     ax = cg.ax_heatmap
     title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ expressed as the deviation from the mean phylotypes "
@@ -306,8 +250,8 @@
         compoundList[col].append(-item[0])
         compoundList[col].append(item[1])
         i = i + 1
-    cols = ['r', 'g', 'b', 'c', 'm', 'y', 'grey', 'k']
-
+    colormap = plt.cm.tab20  # nipy_spectral, Set1,Paired
+    cols = [colormap(i) for i in np.linspace(0, 1, 20)]
     fig, ax = plt.subplots(figsize=(9, 6))
     #plt.figure(num=1,figsize=(12, 6))
     i = 0