Repository 'pdaug_merge_dataframes'
hg clone https://toolshed.g2.bx.psu.edu/repos/jay/pdaug_merge_dataframes

Changeset 5:10c7d4807de6 (2021-01-28)
Previous changeset 4:b5a9f541c6f8 (2021-01-12) Next changeset 6:757ed293de85 (2021-01-31)
Commit message:
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit ac4353ca5c0ac9ce60df9f4bf160ed08b99fbee3"
modified:
PDAUG_Basic_Plots/PDAUG_Basic_Plots.py
PDAUG_Peptide_CD_Spectral_Analysis/PDAUG_Peptide_CD_Spectral_Analysis.py
PDAUG_Sequence_Based_Peptide_Generation/PDAUG_Sequence_Based_Peptide_Generation.py
PDAUG_Sequence_Property_Based_Descriptors/PDAUG_Sequence_Property_Based_Descriptors.py
PDAUG_Sequence_Property_Based_Descriptors/test-data/Out.tsv
PDAUG_TSVtoFASTA/test-data/1.fasta
PDAUG_TSVtoFASTA/test-data/2.fasta
PDAUG_TSVtoFASTA/test-data/out.fasta
PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.py
removed:
PDAUG_Basic_Plots/test-data/test5/report_dir/Out.png
PDAUG_Basic_Plots/test-data/test5/report_dir/report.html
b
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Basic_Plots/PDAUG_Basic_Plots.py
--- a/PDAUG_Basic_Plots/PDAUG_Basic_Plots.py Tue Jan 12 18:50:55 2021 +0000
+++ b/PDAUG_Basic_Plots/PDAUG_Basic_Plots.py Thu Jan 28 03:48:52 2021 +0000
[
@@ -182,4 +182,4 @@
         WordCloudPlot(args.InFile, args.FragSize, args.Workdirpath,  args.htmlOutDir, args.htmlFname)   
 
     else:
-        print("In Correct Option:")
+        print("Incorrect option provided: %s" % (sys.argv[1]))
b
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Basic_Plots/test-data/test5/report_dir/Out.png
b
Binary file PDAUG_Basic_Plots/test-data/test5/report_dir/Out.png has changed
b
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Basic_Plots/test-data/test5/report_dir/report.html
--- a/PDAUG_Basic_Plots/test-data/test5/report_dir/report.html Tue Jan 12 18:50:55 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,39 +0,0 @@
-
-
-    <!DOCTYPE html>
-    <html lang="en">
-    <head>
-      <title>Bootstrap Example</title>
-      <meta charset="utf-8">
-      <meta name="viewport" content="width=device-width, initial-scale=1">
-      <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.0/css/bootstrap.min.css">
-      <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.0/jquery.min.js"></script>
-      <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.0/js/bootstrap.min.js"></script>
-    <body>
-    <style>
-    div.container_1 {
-      width:600px;
-      margin: auto;
-     padding-right: 10; 
-    }
-    div.table {
-      width:600px;
-      margin: auto;
-     padding-right: 10; 
-    }
-    </style>
-    </head>
-    <div class="jumbotron text-center">
-      <h1> Word Cloud </h1>
-    </div>
-    <div class="container">
-      <div class="row">
-        <div class="col-sm-4">
-          <img src="Out.png" alt="Smiley face" height="1000" width="800">
-        </div>
-
-      </div>
-    </div>
-    </body>
-    </html>
-    
\ No newline at end of file
b
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Peptide_CD_Spectral_Analysis/PDAUG_Peptide_CD_Spectral_Analysis.py
--- a/PDAUG_Peptide_CD_Spectral_Analysis/PDAUG_Peptide_CD_Spectral_Analysis.py Tue Jan 12 18:50:55 2021 +0000
+++ b/PDAUG_Peptide_CD_Spectral_Analysis/PDAUG_Peptide_CD_Spectral_Analysis.py Thu Jan 28 03:48:52 2021 +0000
[
@@ -71,8 +71,6 @@
 
 if sys.argv[1] == "PlotData":
 
-    os.system('ls temp')
-
     if args.Type == "mean residue ellipticity":
 
         cd = CD(args.DirPath, wmin=int(args.WMin), wmax=int(args.Wmax), amide=args.Amide, pathlen=float(args.Pathlen))
@@ -119,7 +117,3 @@
     cd = CD(args.DirPath, wmin=int(args.WMin), wmax=int(args.Wmax), amide=args.Amide, pathlen=float(args.Pathlen))    
     cd.calc_meanres_ellipticity()
     cd.helicity(temperature=float(args.temperature), k=float(args.k), induction=args.Induction, filename=args.OutPut )
-
-
-
-
b
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Sequence_Based_Peptide_Generation/PDAUG_Sequence_Based_Peptide_Generation.py
--- a/PDAUG_Sequence_Based_Peptide_Generation/PDAUG_Sequence_Based_Peptide_Generation.py Tue Jan 12 18:50:55 2021 +0000
+++ b/PDAUG_Sequence_Based_Peptide_Generation/PDAUG_Sequence_Based_Peptide_Generation.py Thu Jan 28 03:48:52 2021 +0000
[
@@ -150,5 +150,5 @@
         SlidingWindowPeptide(args.InFile, args.winSize, args.FragSize, args.outputFile)
 
     else:
-        print("In Correct Option:")
+        print("Incorrect option provided: %s" % (sys.argv[1]))
 
b
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Sequence_Property_Based_Descriptors/PDAUG_Sequence_Property_Based_Descriptors.py
--- a/PDAUG_Sequence_Property_Based_Descriptors/PDAUG_Sequence_Property_Based_Descriptors.py Tue Jan 12 18:50:55 2021 +0000
+++ b/PDAUG_Sequence_Property_Based_Descriptors/PDAUG_Sequence_Property_Based_Descriptors.py Thu Jan 28 03:48:52 2021 +0000
[
@@ -43,10 +43,10 @@
 
     return df
 
-def Decriptor_generator(InFile, Lamda, Weight, DesType, Out_file):
+def Decriptor_generator(infile, lamda, weight, maxlag, destype, out_file):
 
     list_pep_name = []
-    f = open(InFile)
+    f = open(infile)
     lines = f.readlines()
     
     for line in lines:
@@ -62,62 +62,71 @@
         protein = PyPro()
         protein.ReadProteinSequence(seq)
 
-        if DesType == 'PAAC':
-            DS = protein.GetPAAC(lamda=int(Lamda), weight=float(Weight))

+        if destype == "GetAAComp":
+            DS = protein.GetAAComp()
             df  = pd.DataFrame(DS, index=[0])
-        elif DesType == 'APAAC':
-            DS = protein.GetAPAAC(lamda=int(Lamda), weight=float(Weight))
-            df  = pd.DataFrame(DS, index=[0])
-        elif DesType == 'CTD':
-            DS = protein.GetCTD()
-            df  = pd.DataFrame(DS, index=[0])
-        elif DesType == 'DPComp':
+        elif destype == "GetDPComp":
             DS = protein.GetDPComp()
             df  = pd.DataFrame(DS, index=[0])
-        elif DesType == 'GearyAuto':
-            DS = protein.GetGearyAuto()
+        elif destype == "GetTPComp":
+            DS = protein.GetTPComp()
             df  = pd.DataFrame(DS, index=[0])
-        elif DesType == 'MoranAuto':
-            DS = protein.GetMoranAuto()
-            df  = pd.DataFrame(DS, index=[0])
-        elif DesType == 'MoreauBrotoAuto':
+        elif destype == "GetMoreauBrotoAuto":
             DS = protein.GetMoreauBrotoAuto()
             df  = pd.DataFrame(DS, index=[0])
-        elif DesType == 'QSO':
-            DS = protein.GetQSO()
+        elif destype =="GetMoranAuto":
+            DS = protein.GetMoranAuto()
+            df  = pd.DataFrame(DS, index=[0])
+        elif destype =="GetGearyAuto":
+            DS = protein.GetGearyAuto()
+            df  = pd.DataFrame(DS, index=[0])
+        elif destype == "GetCTD":
+            DS = protein.GetCTD()
             df  = pd.DataFrame(DS, index=[0])
-        elif DesType == 'SOCN':
-            DS = protein.GetSOCN()
+        elif destype == "GetPAAC":
+            DS = protein.GetPAAC(lamda=int(lamda),  weight=float(weight))
             df  = pd.DataFrame(DS, index=[0])
-        elif DesType == 'TPComp':
-            DS = protein.GetTPComp()
+        elif destype == "GetAPAAC":
+            DS = protein.GetAPAAC(lamda=int(lamda), weight=float(weight))
+            df  = pd.DataFrame(DS, index=[0])
+        elif destype =="GetSOCN":
+            DS = protein.GetSOCN(maxlag=int(maxlag))
             df  = pd.DataFrame(DS, index=[0])
-        elif DesType == 'All':
-            DS_1 = protein.GetPAAC(lamda=int(Lamda), weight=float(Weight))
-            DS_2 = protein.GetAPAAC(lamda=int(Lamda), weight=float(Weight))
-            DS_3 = protein.GetCTD()
-            DS_4 = protein.GetDPComp()
-            DS_5 = protein.GetGearyAuto()
-            DS_6 = protein.GetMoranAuto()
-            DS_7 = protein.GetMoreauBrotoAuto()
-            DS_8 = protein.GetQSO()
-            DS_9 = protein.GetSOCN()
-            DS_10 = protein.GetTPComp()
+        elif destype =="GetQSO":
+            DS = protein.GetQSO(maxlag=int(maxlag),  weight=float(weight))
+            df  = pd.DataFrame(DS, index=[0])
+        elif destype == "GetTriad":
+            DS = protein.GetTriad()
+            df  = pd.DataFrame(DS, index=[0])
+        elif destype == "All":
+            DS1 = protein.GetAAComp()
+            DS2 = protein.GetDPComp()
+            DS3 = protein.GetTPComp()
+            DS4 = protein.GetMoreauBrotoAuto()
+            DS5 = protein.GetMoranAuto()
+            DS6 = protein.GetGearyAuto()
+            DS7 = protein.GetCTD()
+            DS8 = protein.GetPAAC(lamda=int(lamda),  weight=float(weight))
+            DS9 = protein.GetAPAAC(lamda=int(lamda), weight=float(weight))
+            DS10 = protein.GetSOCN(maxlag=int(maxlag))
+            DS11 = protein.GetQSO(maxlag=int(maxlag),  weight=float(weight))
+            DS12 = protein.GetTriad()
 
             DS = {}
 
-            for D in (DS_1, DS_2, DS_3, DS_4, DS_5, DS_6, DS_7, DS_8, DS_9, DS_10):
+            for D in (DS1,DS2,DS3,DS4,DS5,DS6,DS7,DS8,DS9,DS10,DS11,DS12):
+                print(D)
                 DS.update(D)
-
             df  = pd.DataFrame(DS, index=[0])
 
-        if DesType == 'BinaryDescriptor':
+        if destype == 'BinaryDescriptor':
             out_df = BinaryDescriptor(list_pep_name)
         else:
             out_df = pd.concat([out_df, df], axis=0)
 
 
-    out_df.to_csv(Out_file, index=False, sep='\t')
+    out_df.to_csv(out_file, index=False, sep='\t')
 
 
 if __name__=="__main__":
@@ -141,7 +150,12 @@
                         required=False,
                         default=0.5,
                         help="pep file")
-                        
+    
+    parser.add_argument("-m", "--MaxLag",
+                        required=False,
+                        default=10,
+                        help="pep file")  
+
     parser.add_argument("-t", "--DesType",
                         required=True,
                         default=None,
@@ -153,6 +167,6 @@
                         help="Path to target tsv file")  
                               
     args = parser.parse_args()
-    Decriptor_generator(args.InFile, args.Lamda, args.Weight, args.DesType, args.Out_file)
+    Decriptor_generator(args.InFile, args.Lamda, args.Weight, args.MaxLag, args.DesType, args.Out_file)
 
    
\ No newline at end of file
b
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Sequence_Property_Based_Descriptors/test-data/Out.tsv
--- a/PDAUG_Sequence_Property_Based_Descriptors/test-data/Out.tsv Tue Jan 12 18:50:55 2021 +0000
+++ b/PDAUG_Sequence_Property_Based_Descriptors/test-data/Out.tsv Thu Jan 28 03:48:52 2021 +0000
b
b'@@ -1,9 +1,9 @@\n-AA\tAAA\tAAC\tAAD\tAAE\tAAF\tAAG\tAAH\tAAI\tAAK\tAAL\tAAM\tAAN\tAAP\tAAQ\tAAR\tAAS\tAAT\tAAV\tAAW\tAAY\tAC\tACA\tACC\tACD\tACE\tACF\tACG\tACH\tACI\tACK\tACL\tACM\tACN\tACP\tACQ\tACR\tACS\tACT\tACV\tACW\tACY\tAD\tADA\tADC\tADD\tADE\tADF\tADG\tADH\tADI\tADK\tADL\tADM\tADN\tADP\tADQ\tADR\tADS\tADT\tADV\tADW\tADY\tAE\tAEA\tAEC\tAED\tAEE\tAEF\tAEG\tAEH\tAEI\tAEK\tAEL\tAEM\tAEN\tAEP\tAEQ\tAER\tAES\tAET\tAEV\tAEW\tAEY\tAF\tAFA\tAFC\tAFD\tAFE\tAFF\tAFG\tAFH\tAFI\tAFK\tAFL\tAFM\tAFN\tAFP\tAFQ\tAFR\tAFS\tAFT\tAFV\tAFW\tAFY\tAG\tAGA\tAGC\tAGD\tAGE\tAGF\tAGG\tAGH\tAGI\tAGK\tAGL\tAGM\tAGN\tAGP\tAGQ\tAGR\tAGS\tAGT\tAGV\tAGW\tAGY\tAH\tAHA\tAHC\tAHD\tAHE\tAHF\tAHG\tAHH\tAHI\tAHK\tAHL\tAHM\tAHN\tAHP\tAHQ\tAHR\tAHS\tAHT\tAHV\tAHW\tAHY\tAI\tAIA\tAIC\tAID\tAIE\tAIF\tAIG\tAIH\tAII\tAIK\tAIL\tAIM\tAIN\tAIP\tAIQ\tAIR\tAIS\tAIT\tAIV\tAIW\tAIY\tAK\tAKA\tAKC\tAKD\tAKE\tAKF\tAKG\tAKH\tAKI\tAKK\tAKL\tAKM\tAKN\tAKP\tAKQ\tAKR\tAKS\tAKT\tAKV\tAKW\tAKY\tAL\tALA\tALC\tALD\tALE\tALF\tALG\tALH\tALI\tALK\tALL\tALM\tALN\tALP\tALQ\tALR\tALS\tALT\tALV\tALW\tALY\tAM\tAMA\tAMC\tAMD\tAME\tAMF\tAMG\tAMH\tAMI\tAMK\tAML\tAMM\tAMN\tAMP\tAMQ\tAMR\tAMS\tAMT\tAMV\tAMW\tAMY\tAN\tANA\tANC\tAND\tANE\tANF\tANG\tANH\tANI\tANK\tANL\tANM\tANN\tANP\tANQ\tANR\tANS\tANT\tANV\tANW\tANY\tAP\tAPA\tAPAAC1\tAPAAC10\tAPAAC11\tAPAAC12\tAPAAC13\tAPAAC14\tAPAAC15\tAPAAC16\tAPAAC17\tAPAAC18\tAPAAC19\tAPAAC2\tAPAAC20\tAPAAC3\tAPAAC4\tAPAAC5\tAPAAC6\tAPAAC7\tAPAAC8\tAPAAC9\tAPC\tAPD\tAPE\tAPF\tAPG\tAPH\tAPI\tAPK\tAPL\tAPM\tAPN\tAPP\tAPQ\tAPR\tAPS\tAPT\tAPV\tAPW\tAPY\tAQ\tAQA\tAQC\tAQD\tAQE\tAQF\tAQG\tAQH\tAQI\tAQK\tAQL\tAQM\tAQN\tAQP\tAQQ\tAQR\tAQS\tAQT\tAQV\tAQW\tAQY\tAR\tARA\tARC\tARD\tARE\tARF\tARG\tARH\tARI\tARK\tARL\tARM\tARN\tARP\tARQ\tARR\tARS\tART\tARV\tARW\tARY\tAS\tASA\tASC\tASD\tASE\tASF\tASG\tASH\tASI\tASK\tASL\tASM\tASN\tASP\tASQ\tASR\tASS\tAST\tASV\tASW\tASY\tAT\tATA\tATC\tATD\tATE\tATF\tATG\tATH\tATI\tATK\tATL\tATM\tATN\tATP\tATQ\tATR\tATS\tATT\tATV\tATW\tATY\tAV\tAVA\tAVC\tAVD\tAVE\tAVF\tAVG\tAVH\tAVI\tAVK\tAVL\tAVM\tAVN\tAVP\tAVQ\tAVR\tAVS\tAVT\tAVV\tAVW\tAVY\tAW\tAWA\tAWC\tAWD\tAWE\tAWF\tAWG\tAWH\tAWI\tAWK\tAWL\tAWM\tAWN\tAWP\tAWQ\tAWR\tAWS\tAWT\tAWV\tAWW\tAWY\tAY\tAYA\tAYC\tAYD\tAYE\tAYF\tAYG\tAYH\tAYI\tAYK\tAYL\tAYM\tAYN\tAYP\tAYQ\tAYR\tAYS\tAYT\tAYV\tAYW\tAYY\tCA\tCAA\tCAC\tCAD\tCAE\tCAF\tCAG\tCAH\tCAI\tCAK\tCAL\tCAM\tCAN\tCAP\tCAQ\tCAR\tCAS\tCAT\tCAV\tCAW\tCAY\tCC\tCCA\tCCC\tCCD\tCCE\tCCF\tCCG\tCCH\tCCI\tCCK\tCCL\tCCM\tCCN\tCCP\tCCQ\tCCR\tCCS\tCCT\tCCV\tCCW\tCCY\tCD\tCDA\tCDC\tCDD\tCDE\tCDF\tCDG\tCDH\tCDI\tCDK\tCDL\tCDM\tCDN\tCDP\tCDQ\tCDR\tCDS\tCDT\tCDV\tCDW\tCDY\tCE\tCEA\tCEC\tCED\tCEE\tCEF\tCEG\tCEH\tCEI\tCEK\tCEL\tCEM\tCEN\tCEP\tCEQ\tCER\tCES\tCET\tCEV\tCEW\tCEY\tCF\tCFA\tCFC\tCFD\tCFE\tCFF\tCFG\tCFH\tCFI\tCFK\tCFL\tCFM\tCFN\tCFP\tCFQ\tCFR\tCFS\tCFT\tCFV\tCFW\tCFY\tCG\tCGA\tCGC\tCGD\tCGE\tCGF\tCGG\tCGH\tCGI\tCGK\tCGL\tCGM\tCGN\tCGP\tCGQ\tCGR\tCGS\tCGT\tCGV\tCGW\tCGY\tCH\tCHA\tCHC\tCHD\tCHE\tCHF\tCHG\tCHH\tCHI\tCHK\tCHL\tCHM\tCHN\tCHP\tCHQ\tCHR\tCHS\tCHT\tCHV\tCHW\tCHY\tCI\tCIA\tCIC\tCID\tCIE\tCIF\tCIG\tCIH\tCII\tCIK\tCIL\tCIM\tCIN\tCIP\tCIQ\tCIR\tCIS\tCIT\tCIV\tCIW\tCIY\tCK\tCKA\tCKC\tCKD\tCKE\tCKF\tCKG\tCKH\tCKI\tCKK\tCKL\tCKM\tCKN\tCKP\tCKQ\tCKR\tCKS\tCKT\tCKV\tCKW\tCKY\tCL\tCLA\tCLC\tCLD\tCLE\tCLF\tCLG\tCLH\tCLI\tCLK\tCLL\tCLM\tCLN\tCLP\tCLQ\tCLR\tCLS\tCLT\tCLV\tCLW\tCLY\tCM\tCMA\tCMC\tCMD\tCME\tCMF\tCMG\tCMH\tCMI\tCMK\tCML\tCMM\tCMN\tCMP\tCMQ\tCMR\tCMS\tCMT\tCMV\tCMW\tCMY\tCN\tCNA\tCNC\tCND\tCNE\tCNF\tCNG\tCNH\tCNI\tCNK\tCNL\tCNM\tCNN\tCNP\tCNQ\tCNR\tCNS\tCNT\tCNV\tCNW\tCNY\tCP\tCPA\tCPC\tCPD\tCPE\tCPF\tCPG\tCPH\tCPI\tCPK\tCPL\tCPM\tCPN\tCPP\tCPQ\tCPR\tCPS\tCPT\tCPV\tCPW\tCPY\tCQ\tCQA\tCQC\tCQD\tCQE\tCQF\tCQG\tCQH\tCQI\tCQK\tCQL\tCQM\tCQN\tCQP\tCQQ\tCQR\tCQS\tCQT\tCQV\tCQW\tCQY\tCR\tCRA\tCRC\tCRD\tCRE\tCRF\tCRG\tCRH\tCRI\tCRK\tCRL\tCRM\tCRN\tCRP\tCRQ\tCRR\tCRS\tCRT\tCRV\tCRW\tCRY\tCS\tCSA\tCSC\tCSD\tCSE\tCSF\tCSG\tCSH\tCSI\tCSK\tCSL\tCSM\tCSN\tCSP\tCSQ\tCSR\tCSS\tCST\tCSV\tCSW\tCSY\tCT\tCTA\tCTC\tCTD\tCTE\tCTF\tCTG\tCTH\tCTI\tCTK\tCTL\tCTM\tCTN\tCTP\tCTQ\tCTR\tCTS\tCTT\tCTV\tCTW\tCTY\tCV\tCVA\tCVC\tCVD\tCVE\tCVF\tCVG\tCVH\tCVI\tCVK\tCVL\tCVM\tCVN\tCVP\tCVQ\tCVR\tCVS\tCVT\tCVV\tCVW\tCVY\tCW\tCWA\tCWC\tCWD\tCWE\tCWF\tCWG\tCWH\tCWI\tCWK\tCWL\tCWM\tCWN\tCWP\tCWQ\tCWR\tCWS\tCWT\tCWV\tCWW\tCWY\tCY\tCYA\tCYC\tCYD\tCYE\tCYF\tCYG\tCYH\tCYI\tCYK\tCYL\tCYM\tCYN\tCYP\tCYQ\tCYR\tCYS\tCYT\tCYV\tCYW\tCYY\tDA\tDAA\tDAC\tDAD\tDAE\tDAF\tDAG\tDAH\tDAI\tDAK\tDAL\tDAM\tDAN\tDAP\tDAQ\tDAR\tDAS\tDAT\tDAV\tDAW\tDAY\tDC\tDCA\tDCC\tDCD\tDCE\tDCF\tDCG\tDCH\tDCI\tDCK\tDCL\tDCM\tDCN\tDCP\tDCQ\tDCR\tDCS\tDCT\tDCV\tDCW\tDCY\tDD\tDDA\tDDC\tDDD\tDDE\tDDF\tDDG\tDDH\tDDI\tDDK\tDDL\tDDM\tDDN\tDDP\tDDQ\tDDR\tDDS\tDDT\tDDV\tDDW\tDDY\tDE\tDEA\tDEC\tDED\tDEE\tDEF\tDEG\tDEH\tDEI\tDEK\tDEL\tDEM\tDEN\tDEP\tDEQ\tDER\tDES\tDET\tDEV\tDEW\tDEY\tDF\tDFA\tDFC\tDFD\tDFE\tDFF\tDFG\tDFH\tDFI\tDFK\tDFL\tDFM\tDFN\tDFP\tDFQ\tDFR\tDFS\tDFT\tDFV\tDFW\tDFY\tDG\tDGA\tDGC\tDGD\tDGE\tDGF\tDGG\tDGH\tDGI\tDGK\tDGL\tDGM\tDGN\tDGP\tDGQ\tDGR\tDGS\tDGT\tDGV\tDGW\tDGY\tDH\tDHA\tDHC\tDH'..b'\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t11.765\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t6.25\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t6.25\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.118\t0.824\t0.059\t41.176\t47.059\t41.176\t41.176\t47.059\t5.882\t17.647\t58.824\t76.471\t100.0\t23.529\t23.529\t23.529\t23.529\t23.529\t0.125\t0.0\t0.125\t0.176\t0.353\t0.471\t23.529\t47.059\t23.529\t41.176\t47.059\t5.882\t5.882\t64.706\t70.588\t94.118\t11.765\t17.647\t35.294\t76.471\t100.0\t0.0\t0.25\t0.313\t0.353\t0.412\t0.235\t5.882\t5.882\t58.824\t64.706\t94.118\t11.765\t11.765\t35.294\t76.471\t100.0\t17.647\t17.647\t41.176\t47.059\t70.588\t0.313\t0.125\t0.25\t0.471\t0.0\t0.176\t11.765\t17.647\t35.294\t76.471\t100.0\t0.0\t0.0\t0.0\t0.0\t0.0\t41.176\t70.588\t41.176\t47.059\t70.588\t0.0\t0.188\t0.0\t0.353\t0.412\t0.235\t5.882\t5.882\t58.824\t64.706\t94.118\t11.765\t11.765\t35.294\t76.471\t100.0\t17.647\t17.647\t41.176\t47.059\t70.588\t0.313\t0.125\t0.25\t0.294\t0.412\t0.294\t11.765\t11.765\t41.176\t47.059\t70.588\t17.647\t17.647\t35.294\t76.471\t100.0\t5.882\t5.882\t23.529\t64.706\t94.118\t0.313\t0.188\t0.25\t0.647\t0.176\t0.176\t5.882\t11.765\t35.294\t64.706\t100.0\t23.529\t47.059\t23.529\t41.176\t47.059\t70.588\t94.118\t70.588\t88.235\t94.118\t0.25\t0.25\t0.0\t5.523\t2.186\t7.66\t4.911\t3.171\t4.7\t5.058\t3.807\t2.541\t3.165\t5.523\t2.186\t7.66\t4.911\t3.171\t4.7\t5.058\t3.807\t2.541\t3.165\n'
b
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_TSVtoFASTA/test-data/1.fasta
--- a/PDAUG_TSVtoFASTA/test-data/1.fasta Tue Jan 12 18:50:55 2021 +0000
+++ b/PDAUG_TSVtoFASTA/test-data/1.fasta Thu Jan 28 03:48:52 2021 +0000
b
@@ -19,4 +19,4 @@
 >21_AMP
 KLLLLKLLK
 >22_AMP
-GLFDIVKKVVGALG
\ No newline at end of file
+GLFDIVKKVVGALG
b
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_TSVtoFASTA/test-data/2.fasta
--- a/PDAUG_TSVtoFASTA/test-data/2.fasta Tue Jan 12 18:50:55 2021 +0000
+++ b/PDAUG_TSVtoFASTA/test-data/2.fasta Thu Jan 28 03:48:52 2021 +0000
b
@@ -21,4 +21,4 @@
 >10_TM
 KLLLLKLLK
 >11_TM
-GLFDIVKKVVGALG
\ No newline at end of file
+GLFDIVKKVVGALG
b
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_TSVtoFASTA/test-data/out.fasta
--- a/PDAUG_TSVtoFASTA/test-data/out.fasta Tue Jan 12 18:50:55 2021 +0000
+++ b/PDAUG_TSVtoFASTA/test-data/out.fasta Thu Jan 28 03:48:52 2021 +0000
b
@@ -43,4 +43,4 @@
 >21
 KLLLLKLLK
 >22
-GLFDIVKKVVGALG
\ No newline at end of file
+GLFDIVKKVVGALG
b
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.py
--- a/PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.py Tue Jan 12 18:50:55 2021 +0000
+++ b/PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.py Thu Jan 28 03:48:52 2021 +0000
b
@@ -11,6 +11,7 @@
 parser.add_argument("-M", "--min_count", required=False, default=0, help="Path to target tsv file")
 parser.add_argument("-W", "--window", required=False, default=5, help="Path to target tsv file")
 parser.add_argument("-O", "--OutFile", required=False, default='model.txt', help="Path to target tsv file")
+parser.add_argument("-S", "--SG", required=False, default='skip-gram', help="Training algorithm: 1 for skip-gram; otherwise CBOW")
 
 args = parser.parse_args()
 
@@ -30,9 +31,14 @@
 #min_count = 0
 size = 200
 #window = 5
-sg = 1
+
+print (args.SG)
+if args.SG == 'skip-gram':
+    SG = 1
+elif args.SG == 'CBOW':
+    SG = 0
 
 sentences = ProteinSeq() 
-model = gensim.models.Word2Vec(sentences, min_count=int(args.min_count), size=size, window=int(args.window), sg = sg, workers = 10)
+model = gensim.models.Word2Vec(sentences, min_count=int(args.min_count), size=size, window=int(args.window), sg = SG, workers = 10)
 model.wv.save_word2vec_format(args.OutFile, binary=False)