Previous changeset 4:b5a9f541c6f8 (2021-01-12) Next changeset 6:757ed293de85 (2021-01-31) |
Commit message:
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit ac4353ca5c0ac9ce60df9f4bf160ed08b99fbee3" |
modified:
PDAUG_Basic_Plots/PDAUG_Basic_Plots.py PDAUG_Peptide_CD_Spectral_Analysis/PDAUG_Peptide_CD_Spectral_Analysis.py PDAUG_Sequence_Based_Peptide_Generation/PDAUG_Sequence_Based_Peptide_Generation.py PDAUG_Sequence_Property_Based_Descriptors/PDAUG_Sequence_Property_Based_Descriptors.py PDAUG_Sequence_Property_Based_Descriptors/test-data/Out.tsv PDAUG_TSVtoFASTA/test-data/1.fasta PDAUG_TSVtoFASTA/test-data/2.fasta PDAUG_TSVtoFASTA/test-data/out.fasta PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.py |
removed:
PDAUG_Basic_Plots/test-data/test5/report_dir/Out.png PDAUG_Basic_Plots/test-data/test5/report_dir/report.html |
b |
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Basic_Plots/PDAUG_Basic_Plots.py --- a/PDAUG_Basic_Plots/PDAUG_Basic_Plots.py Tue Jan 12 18:50:55 2021 +0000 +++ b/PDAUG_Basic_Plots/PDAUG_Basic_Plots.py Thu Jan 28 03:48:52 2021 +0000 |
[ |
@@ -182,4 +182,4 @@ WordCloudPlot(args.InFile, args.FragSize, args.Workdirpath, args.htmlOutDir, args.htmlFname) else: - print("In Correct Option:") + print("Incorrect option provided: %s" % (sys.argv[1])) |
b |
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Basic_Plots/test-data/test5/report_dir/Out.png |
b |
Binary file PDAUG_Basic_Plots/test-data/test5/report_dir/Out.png has changed |
b |
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Basic_Plots/test-data/test5/report_dir/report.html --- a/PDAUG_Basic_Plots/test-data/test5/report_dir/report.html Tue Jan 12 18:50:55 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,39 +0,0 @@ - - - <!DOCTYPE html> - <html lang="en"> - <head> - <title>Bootstrap Example</title> - <meta charset="utf-8"> - <meta name="viewport" content="width=device-width, initial-scale=1"> - <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.0/css/bootstrap.min.css"> - <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.0/jquery.min.js"></script> - <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.0/js/bootstrap.min.js"></script> - <body> - <style> - div.container_1 { - width:600px; - margin: auto; - padding-right: 10; - } - div.table { - width:600px; - margin: auto; - padding-right: 10; - } - </style> - </head> - <div class="jumbotron text-center"> - <h1> Word Cloud </h1> - </div> - <div class="container"> - <div class="row"> - <div class="col-sm-4"> - <img src="Out.png" alt="Smiley face" height="1000" width="800"> - </div> - - </div> - </div> - </body> - </html> - \ No newline at end of file |
b |
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Peptide_CD_Spectral_Analysis/PDAUG_Peptide_CD_Spectral_Analysis.py --- a/PDAUG_Peptide_CD_Spectral_Analysis/PDAUG_Peptide_CD_Spectral_Analysis.py Tue Jan 12 18:50:55 2021 +0000 +++ b/PDAUG_Peptide_CD_Spectral_Analysis/PDAUG_Peptide_CD_Spectral_Analysis.py Thu Jan 28 03:48:52 2021 +0000 |
[ |
@@ -71,8 +71,6 @@ if sys.argv[1] == "PlotData": - os.system('ls temp') - if args.Type == "mean residue ellipticity": cd = CD(args.DirPath, wmin=int(args.WMin), wmax=int(args.Wmax), amide=args.Amide, pathlen=float(args.Pathlen)) @@ -119,7 +117,3 @@ cd = CD(args.DirPath, wmin=int(args.WMin), wmax=int(args.Wmax), amide=args.Amide, pathlen=float(args.Pathlen)) cd.calc_meanres_ellipticity() cd.helicity(temperature=float(args.temperature), k=float(args.k), induction=args.Induction, filename=args.OutPut ) - - - - |
b |
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Sequence_Based_Peptide_Generation/PDAUG_Sequence_Based_Peptide_Generation.py --- a/PDAUG_Sequence_Based_Peptide_Generation/PDAUG_Sequence_Based_Peptide_Generation.py Tue Jan 12 18:50:55 2021 +0000 +++ b/PDAUG_Sequence_Based_Peptide_Generation/PDAUG_Sequence_Based_Peptide_Generation.py Thu Jan 28 03:48:52 2021 +0000 |
[ |
@@ -150,5 +150,5 @@ SlidingWindowPeptide(args.InFile, args.winSize, args.FragSize, args.outputFile) else: - print("In Correct Option:") + print("Incorrect option provided: %s" % (sys.argv[1])) |
b |
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Sequence_Property_Based_Descriptors/PDAUG_Sequence_Property_Based_Descriptors.py --- a/PDAUG_Sequence_Property_Based_Descriptors/PDAUG_Sequence_Property_Based_Descriptors.py Tue Jan 12 18:50:55 2021 +0000 +++ b/PDAUG_Sequence_Property_Based_Descriptors/PDAUG_Sequence_Property_Based_Descriptors.py Thu Jan 28 03:48:52 2021 +0000 |
[ |
@@ -43,10 +43,10 @@ return df -def Decriptor_generator(InFile, Lamda, Weight, DesType, Out_file): +def Decriptor_generator(infile, lamda, weight, maxlag, destype, out_file): list_pep_name = [] - f = open(InFile) + f = open(infile) lines = f.readlines() for line in lines: @@ -62,62 +62,71 @@ protein = PyPro() protein.ReadProteinSequence(seq) - if DesType == 'PAAC': - DS = protein.GetPAAC(lamda=int(Lamda), weight=float(Weight)) + + if destype == "GetAAComp": + DS = protein.GetAAComp() df = pd.DataFrame(DS, index=[0]) - elif DesType == 'APAAC': - DS = protein.GetAPAAC(lamda=int(Lamda), weight=float(Weight)) - df = pd.DataFrame(DS, index=[0]) - elif DesType == 'CTD': - DS = protein.GetCTD() - df = pd.DataFrame(DS, index=[0]) - elif DesType == 'DPComp': + elif destype == "GetDPComp": DS = protein.GetDPComp() df = pd.DataFrame(DS, index=[0]) - elif DesType == 'GearyAuto': - DS = protein.GetGearyAuto() + elif destype == "GetTPComp": + DS = protein.GetTPComp() df = pd.DataFrame(DS, index=[0]) - elif DesType == 'MoranAuto': - DS = protein.GetMoranAuto() - df = pd.DataFrame(DS, index=[0]) - elif DesType == 'MoreauBrotoAuto': + elif destype == "GetMoreauBrotoAuto": DS = protein.GetMoreauBrotoAuto() df = pd.DataFrame(DS, index=[0]) - elif DesType == 'QSO': - DS = protein.GetQSO() + elif destype =="GetMoranAuto": + DS = protein.GetMoranAuto() + df = pd.DataFrame(DS, index=[0]) + elif destype =="GetGearyAuto": + DS = protein.GetGearyAuto() + df = pd.DataFrame(DS, index=[0]) + elif destype == "GetCTD": + DS = protein.GetCTD() df = pd.DataFrame(DS, index=[0]) - elif DesType == 'SOCN': - DS = protein.GetSOCN() + elif destype == "GetPAAC": + DS = protein.GetPAAC(lamda=int(lamda), weight=float(weight)) df = pd.DataFrame(DS, index=[0]) - elif DesType == 'TPComp': - DS = protein.GetTPComp() + elif destype == "GetAPAAC": + DS = protein.GetAPAAC(lamda=int(lamda), weight=float(weight)) + df = pd.DataFrame(DS, index=[0]) + elif destype =="GetSOCN": + DS = protein.GetSOCN(maxlag=int(maxlag)) df = pd.DataFrame(DS, index=[0]) - elif DesType == 'All': - DS_1 = protein.GetPAAC(lamda=int(Lamda), weight=float(Weight)) - DS_2 = protein.GetAPAAC(lamda=int(Lamda), weight=float(Weight)) - DS_3 = protein.GetCTD() - DS_4 = protein.GetDPComp() - DS_5 = protein.GetGearyAuto() - DS_6 = protein.GetMoranAuto() - DS_7 = protein.GetMoreauBrotoAuto() - DS_8 = protein.GetQSO() - DS_9 = protein.GetSOCN() - DS_10 = protein.GetTPComp() + elif destype =="GetQSO": + DS = protein.GetQSO(maxlag=int(maxlag), weight=float(weight)) + df = pd.DataFrame(DS, index=[0]) + elif destype == "GetTriad": + DS = protein.GetTriad() + df = pd.DataFrame(DS, index=[0]) + elif destype == "All": + DS1 = protein.GetAAComp() + DS2 = protein.GetDPComp() + DS3 = protein.GetTPComp() + DS4 = protein.GetMoreauBrotoAuto() + DS5 = protein.GetMoranAuto() + DS6 = protein.GetGearyAuto() + DS7 = protein.GetCTD() + DS8 = protein.GetPAAC(lamda=int(lamda), weight=float(weight)) + DS9 = protein.GetAPAAC(lamda=int(lamda), weight=float(weight)) + DS10 = protein.GetSOCN(maxlag=int(maxlag)) + DS11 = protein.GetQSO(maxlag=int(maxlag), weight=float(weight)) + DS12 = protein.GetTriad() DS = {} - for D in (DS_1, DS_2, DS_3, DS_4, DS_5, DS_6, DS_7, DS_8, DS_9, DS_10): + for D in (DS1,DS2,DS3,DS4,DS5,DS6,DS7,DS8,DS9,DS10,DS11,DS12): + print(D) DS.update(D) - df = pd.DataFrame(DS, index=[0]) - if DesType == 'BinaryDescriptor': + if destype == 'BinaryDescriptor': out_df = BinaryDescriptor(list_pep_name) else: out_df = pd.concat([out_df, df], axis=0) - out_df.to_csv(Out_file, index=False, sep='\t') + out_df.to_csv(out_file, index=False, sep='\t') if __name__=="__main__": @@ -141,7 +150,12 @@ required=False, default=0.5, help="pep file") - + + parser.add_argument("-m", "--MaxLag", + required=False, + default=10, + help="pep file") + parser.add_argument("-t", "--DesType", required=True, default=None, @@ -153,6 +167,6 @@ help="Path to target tsv file") args = parser.parse_args() - Decriptor_generator(args.InFile, args.Lamda, args.Weight, args.DesType, args.Out_file) + Decriptor_generator(args.InFile, args.Lamda, args.Weight, args.MaxLag, args.DesType, args.Out_file) \ No newline at end of file |
b |
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Sequence_Property_Based_Descriptors/test-data/Out.tsv --- a/PDAUG_Sequence_Property_Based_Descriptors/test-data/Out.tsv Tue Jan 12 18:50:55 2021 +0000 +++ b/PDAUG_Sequence_Property_Based_Descriptors/test-data/Out.tsv Thu Jan 28 03:48:52 2021 +0000 |
b |
b'@@ -1,9 +1,9 @@\n-AA\tAAA\tAAC\tAAD\tAAE\tAAF\tAAG\tAAH\tAAI\tAAK\tAAL\tAAM\tAAN\tAAP\tAAQ\tAAR\tAAS\tAAT\tAAV\tAAW\tAAY\tAC\tACA\tACC\tACD\tACE\tACF\tACG\tACH\tACI\tACK\tACL\tACM\tACN\tACP\tACQ\tACR\tACS\tACT\tACV\tACW\tACY\tAD\tADA\tADC\tADD\tADE\tADF\tADG\tADH\tADI\tADK\tADL\tADM\tADN\tADP\tADQ\tADR\tADS\tADT\tADV\tADW\tADY\tAE\tAEA\tAEC\tAED\tAEE\tAEF\tAEG\tAEH\tAEI\tAEK\tAEL\tAEM\tAEN\tAEP\tAEQ\tAER\tAES\tAET\tAEV\tAEW\tAEY\tAF\tAFA\tAFC\tAFD\tAFE\tAFF\tAFG\tAFH\tAFI\tAFK\tAFL\tAFM\tAFN\tAFP\tAFQ\tAFR\tAFS\tAFT\tAFV\tAFW\tAFY\tAG\tAGA\tAGC\tAGD\tAGE\tAGF\tAGG\tAGH\tAGI\tAGK\tAGL\tAGM\tAGN\tAGP\tAGQ\tAGR\tAGS\tAGT\tAGV\tAGW\tAGY\tAH\tAHA\tAHC\tAHD\tAHE\tAHF\tAHG\tAHH\tAHI\tAHK\tAHL\tAHM\tAHN\tAHP\tAHQ\tAHR\tAHS\tAHT\tAHV\tAHW\tAHY\tAI\tAIA\tAIC\tAID\tAIE\tAIF\tAIG\tAIH\tAII\tAIK\tAIL\tAIM\tAIN\tAIP\tAIQ\tAIR\tAIS\tAIT\tAIV\tAIW\tAIY\tAK\tAKA\tAKC\tAKD\tAKE\tAKF\tAKG\tAKH\tAKI\tAKK\tAKL\tAKM\tAKN\tAKP\tAKQ\tAKR\tAKS\tAKT\tAKV\tAKW\tAKY\tAL\tALA\tALC\tALD\tALE\tALF\tALG\tALH\tALI\tALK\tALL\tALM\tALN\tALP\tALQ\tALR\tALS\tALT\tALV\tALW\tALY\tAM\tAMA\tAMC\tAMD\tAME\tAMF\tAMG\tAMH\tAMI\tAMK\tAML\tAMM\tAMN\tAMP\tAMQ\tAMR\tAMS\tAMT\tAMV\tAMW\tAMY\tAN\tANA\tANC\tAND\tANE\tANF\tANG\tANH\tANI\tANK\tANL\tANM\tANN\tANP\tANQ\tANR\tANS\tANT\tANV\tANW\tANY\tAP\tAPA\tAPAAC1\tAPAAC10\tAPAAC11\tAPAAC12\tAPAAC13\tAPAAC14\tAPAAC15\tAPAAC16\tAPAAC17\tAPAAC18\tAPAAC19\tAPAAC2\tAPAAC20\tAPAAC3\tAPAAC4\tAPAAC5\tAPAAC6\tAPAAC7\tAPAAC8\tAPAAC9\tAPC\tAPD\tAPE\tAPF\tAPG\tAPH\tAPI\tAPK\tAPL\tAPM\tAPN\tAPP\tAPQ\tAPR\tAPS\tAPT\tAPV\tAPW\tAPY\tAQ\tAQA\tAQC\tAQD\tAQE\tAQF\tAQG\tAQH\tAQI\tAQK\tAQL\tAQM\tAQN\tAQP\tAQQ\tAQR\tAQS\tAQT\tAQV\tAQW\tAQY\tAR\tARA\tARC\tARD\tARE\tARF\tARG\tARH\tARI\tARK\tARL\tARM\tARN\tARP\tARQ\tARR\tARS\tART\tARV\tARW\tARY\tAS\tASA\tASC\tASD\tASE\tASF\tASG\tASH\tASI\tASK\tASL\tASM\tASN\tASP\tASQ\tASR\tASS\tAST\tASV\tASW\tASY\tAT\tATA\tATC\tATD\tATE\tATF\tATG\tATH\tATI\tATK\tATL\tATM\tATN\tATP\tATQ\tATR\tATS\tATT\tATV\tATW\tATY\tAV\tAVA\tAVC\tAVD\tAVE\tAVF\tAVG\tAVH\tAVI\tAVK\tAVL\tAVM\tAVN\tAVP\tAVQ\tAVR\tAVS\tAVT\tAVV\tAVW\tAVY\tAW\tAWA\tAWC\tAWD\tAWE\tAWF\tAWG\tAWH\tAWI\tAWK\tAWL\tAWM\tAWN\tAWP\tAWQ\tAWR\tAWS\tAWT\tAWV\tAWW\tAWY\tAY\tAYA\tAYC\tAYD\tAYE\tAYF\tAYG\tAYH\tAYI\tAYK\tAYL\tAYM\tAYN\tAYP\tAYQ\tAYR\tAYS\tAYT\tAYV\tAYW\tAYY\tCA\tCAA\tCAC\tCAD\tCAE\tCAF\tCAG\tCAH\tCAI\tCAK\tCAL\tCAM\tCAN\tCAP\tCAQ\tCAR\tCAS\tCAT\tCAV\tCAW\tCAY\tCC\tCCA\tCCC\tCCD\tCCE\tCCF\tCCG\tCCH\tCCI\tCCK\tCCL\tCCM\tCCN\tCCP\tCCQ\tCCR\tCCS\tCCT\tCCV\tCCW\tCCY\tCD\tCDA\tCDC\tCDD\tCDE\tCDF\tCDG\tCDH\tCDI\tCDK\tCDL\tCDM\tCDN\tCDP\tCDQ\tCDR\tCDS\tCDT\tCDV\tCDW\tCDY\tCE\tCEA\tCEC\tCED\tCEE\tCEF\tCEG\tCEH\tCEI\tCEK\tCEL\tCEM\tCEN\tCEP\tCEQ\tCER\tCES\tCET\tCEV\tCEW\tCEY\tCF\tCFA\tCFC\tCFD\tCFE\tCFF\tCFG\tCFH\tCFI\tCFK\tCFL\tCFM\tCFN\tCFP\tCFQ\tCFR\tCFS\tCFT\tCFV\tCFW\tCFY\tCG\tCGA\tCGC\tCGD\tCGE\tCGF\tCGG\tCGH\tCGI\tCGK\tCGL\tCGM\tCGN\tCGP\tCGQ\tCGR\tCGS\tCGT\tCGV\tCGW\tCGY\tCH\tCHA\tCHC\tCHD\tCHE\tCHF\tCHG\tCHH\tCHI\tCHK\tCHL\tCHM\tCHN\tCHP\tCHQ\tCHR\tCHS\tCHT\tCHV\tCHW\tCHY\tCI\tCIA\tCIC\tCID\tCIE\tCIF\tCIG\tCIH\tCII\tCIK\tCIL\tCIM\tCIN\tCIP\tCIQ\tCIR\tCIS\tCIT\tCIV\tCIW\tCIY\tCK\tCKA\tCKC\tCKD\tCKE\tCKF\tCKG\tCKH\tCKI\tCKK\tCKL\tCKM\tCKN\tCKP\tCKQ\tCKR\tCKS\tCKT\tCKV\tCKW\tCKY\tCL\tCLA\tCLC\tCLD\tCLE\tCLF\tCLG\tCLH\tCLI\tCLK\tCLL\tCLM\tCLN\tCLP\tCLQ\tCLR\tCLS\tCLT\tCLV\tCLW\tCLY\tCM\tCMA\tCMC\tCMD\tCME\tCMF\tCMG\tCMH\tCMI\tCMK\tCML\tCMM\tCMN\tCMP\tCMQ\tCMR\tCMS\tCMT\tCMV\tCMW\tCMY\tCN\tCNA\tCNC\tCND\tCNE\tCNF\tCNG\tCNH\tCNI\tCNK\tCNL\tCNM\tCNN\tCNP\tCNQ\tCNR\tCNS\tCNT\tCNV\tCNW\tCNY\tCP\tCPA\tCPC\tCPD\tCPE\tCPF\tCPG\tCPH\tCPI\tCPK\tCPL\tCPM\tCPN\tCPP\tCPQ\tCPR\tCPS\tCPT\tCPV\tCPW\tCPY\tCQ\tCQA\tCQC\tCQD\tCQE\tCQF\tCQG\tCQH\tCQI\tCQK\tCQL\tCQM\tCQN\tCQP\tCQQ\tCQR\tCQS\tCQT\tCQV\tCQW\tCQY\tCR\tCRA\tCRC\tCRD\tCRE\tCRF\tCRG\tCRH\tCRI\tCRK\tCRL\tCRM\tCRN\tCRP\tCRQ\tCRR\tCRS\tCRT\tCRV\tCRW\tCRY\tCS\tCSA\tCSC\tCSD\tCSE\tCSF\tCSG\tCSH\tCSI\tCSK\tCSL\tCSM\tCSN\tCSP\tCSQ\tCSR\tCSS\tCST\tCSV\tCSW\tCSY\tCT\tCTA\tCTC\tCTD\tCTE\tCTF\tCTG\tCTH\tCTI\tCTK\tCTL\tCTM\tCTN\tCTP\tCTQ\tCTR\tCTS\tCTT\tCTV\tCTW\tCTY\tCV\tCVA\tCVC\tCVD\tCVE\tCVF\tCVG\tCVH\tCVI\tCVK\tCVL\tCVM\tCVN\tCVP\tCVQ\tCVR\tCVS\tCVT\tCVV\tCVW\tCVY\tCW\tCWA\tCWC\tCWD\tCWE\tCWF\tCWG\tCWH\tCWI\tCWK\tCWL\tCWM\tCWN\tCWP\tCWQ\tCWR\tCWS\tCWT\tCWV\tCWW\tCWY\tCY\tCYA\tCYC\tCYD\tCYE\tCYF\tCYG\tCYH\tCYI\tCYK\tCYL\tCYM\tCYN\tCYP\tCYQ\tCYR\tCYS\tCYT\tCYV\tCYW\tCYY\tDA\tDAA\tDAC\tDAD\tDAE\tDAF\tDAG\tDAH\tDAI\tDAK\tDAL\tDAM\tDAN\tDAP\tDAQ\tDAR\tDAS\tDAT\tDAV\tDAW\tDAY\tDC\tDCA\tDCC\tDCD\tDCE\tDCF\tDCG\tDCH\tDCI\tDCK\tDCL\tDCM\tDCN\tDCP\tDCQ\tDCR\tDCS\tDCT\tDCV\tDCW\tDCY\tDD\tDDA\tDDC\tDDD\tDDE\tDDF\tDDG\tDDH\tDDI\tDDK\tDDL\tDDM\tDDN\tDDP\tDDQ\tDDR\tDDS\tDDT\tDDV\tDDW\tDDY\tDE\tDEA\tDEC\tDED\tDEE\tDEF\tDEG\tDEH\tDEI\tDEK\tDEL\tDEM\tDEN\tDEP\tDEQ\tDER\tDES\tDET\tDEV\tDEW\tDEY\tDF\tDFA\tDFC\tDFD\tDFE\tDFF\tDFG\tDFH\tDFI\tDFK\tDFL\tDFM\tDFN\tDFP\tDFQ\tDFR\tDFS\tDFT\tDFV\tDFW\tDFY\tDG\tDGA\tDGC\tDGD\tDGE\tDGF\tDGG\tDGH\tDGI\tDGK\tDGL\tDGM\tDGN\tDGP\tDGQ\tDGR\tDGS\tDGT\tDGV\tDGW\tDGY\tDH\tDHA\tDHC\tDH'..b'\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t11.765\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t6.25\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t6.25\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0.118\t0.824\t0.059\t41.176\t47.059\t41.176\t41.176\t47.059\t5.882\t17.647\t58.824\t76.471\t100.0\t23.529\t23.529\t23.529\t23.529\t23.529\t0.125\t0.0\t0.125\t0.176\t0.353\t0.471\t23.529\t47.059\t23.529\t41.176\t47.059\t5.882\t5.882\t64.706\t70.588\t94.118\t11.765\t17.647\t35.294\t76.471\t100.0\t0.0\t0.25\t0.313\t0.353\t0.412\t0.235\t5.882\t5.882\t58.824\t64.706\t94.118\t11.765\t11.765\t35.294\t76.471\t100.0\t17.647\t17.647\t41.176\t47.059\t70.588\t0.313\t0.125\t0.25\t0.471\t0.0\t0.176\t11.765\t17.647\t35.294\t76.471\t100.0\t0.0\t0.0\t0.0\t0.0\t0.0\t41.176\t70.588\t41.176\t47.059\t70.588\t0.0\t0.188\t0.0\t0.353\t0.412\t0.235\t5.882\t5.882\t58.824\t64.706\t94.118\t11.765\t11.765\t35.294\t76.471\t100.0\t17.647\t17.647\t41.176\t47.059\t70.588\t0.313\t0.125\t0.25\t0.294\t0.412\t0.294\t11.765\t11.765\t41.176\t47.059\t70.588\t17.647\t17.647\t35.294\t76.471\t100.0\t5.882\t5.882\t23.529\t64.706\t94.118\t0.313\t0.188\t0.25\t0.647\t0.176\t0.176\t5.882\t11.765\t35.294\t64.706\t100.0\t23.529\t47.059\t23.529\t41.176\t47.059\t70.588\t94.118\t70.588\t88.235\t94.118\t0.25\t0.25\t0.0\t5.523\t2.186\t7.66\t4.911\t3.171\t4.7\t5.058\t3.807\t2.541\t3.165\t5.523\t2.186\t7.66\t4.911\t3.171\t4.7\t5.058\t3.807\t2.541\t3.165\n' |
b |
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_TSVtoFASTA/test-data/1.fasta --- a/PDAUG_TSVtoFASTA/test-data/1.fasta Tue Jan 12 18:50:55 2021 +0000 +++ b/PDAUG_TSVtoFASTA/test-data/1.fasta Thu Jan 28 03:48:52 2021 +0000 |
b |
@@ -19,4 +19,4 @@ >21_AMP KLLLLKLLK >22_AMP -GLFDIVKKVVGALG \ No newline at end of file +GLFDIVKKVVGALG |
b |
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_TSVtoFASTA/test-data/2.fasta --- a/PDAUG_TSVtoFASTA/test-data/2.fasta Tue Jan 12 18:50:55 2021 +0000 +++ b/PDAUG_TSVtoFASTA/test-data/2.fasta Thu Jan 28 03:48:52 2021 +0000 |
b |
@@ -21,4 +21,4 @@ >10_TM KLLLLKLLK >11_TM -GLFDIVKKVVGALG \ No newline at end of file +GLFDIVKKVVGALG |
b |
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_TSVtoFASTA/test-data/out.fasta --- a/PDAUG_TSVtoFASTA/test-data/out.fasta Tue Jan 12 18:50:55 2021 +0000 +++ b/PDAUG_TSVtoFASTA/test-data/out.fasta Thu Jan 28 03:48:52 2021 +0000 |
b |
@@ -43,4 +43,4 @@ >21 KLLLLKLLK >22 -GLFDIVKKVVGALG \ No newline at end of file +GLFDIVKKVVGALG |
b |
diff -r b5a9f541c6f8 -r 10c7d4807de6 PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.py --- a/PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.py Tue Jan 12 18:50:55 2021 +0000 +++ b/PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.py Thu Jan 28 03:48:52 2021 +0000 |
b |
@@ -11,6 +11,7 @@ parser.add_argument("-M", "--min_count", required=False, default=0, help="Path to target tsv file") parser.add_argument("-W", "--window", required=False, default=5, help="Path to target tsv file") parser.add_argument("-O", "--OutFile", required=False, default='model.txt', help="Path to target tsv file") +parser.add_argument("-S", "--SG", required=False, default='skip-gram', help="Training algorithm: 1 for skip-gram; otherwise CBOW") args = parser.parse_args() @@ -30,9 +31,14 @@ #min_count = 0 size = 200 #window = 5 -sg = 1 + +print (args.SG) +if args.SG == 'skip-gram': + SG = 1 +elif args.SG == 'CBOW': + SG = 0 sentences = ProteinSeq() -model = gensim.models.Word2Vec(sentences, min_count=int(args.min_count), size=size, window=int(args.window), sg = sg, workers = 10) +model = gensim.models.Word2Vec(sentences, min_count=int(args.min_count), size=size, window=int(args.window), sg = SG, workers = 10) model.wv.save_word2vec_format(args.OutFile, binary=False) |