Mercurial > repos > jjjjia > cpo_prediction
comparison cpo_galaxy_prediction.py @ 13:a14b12a71a53 draft
planemo upload
author | jjjjia |
---|---|
date | Sat, 25 Aug 2018 20:56:37 -0400 |
parents | cabceaa239e4 |
children | 596bf8a792de |
comparison
equal
deleted
inserted
replaced
12:4b2738bc81ed | 13:a14b12a71a53 |
---|---|
455 pf.file = str(plasmidFinder.iloc[i,0]) | 455 pf.file = str(plasmidFinder.iloc[i,0]) |
456 pf.sequence = str(plasmidFinder.iloc[i,1]) | 456 pf.sequence = str(plasmidFinder.iloc[i,1]) |
457 pf.start = int(plasmidFinder.iloc[i,2]) | 457 pf.start = int(plasmidFinder.iloc[i,2]) |
458 pf.end = int(plasmidFinder.iloc[i,3]) | 458 pf.end = int(plasmidFinder.iloc[i,3]) |
459 pf.gene = str(plasmidFinder.iloc[i,4]) | 459 pf.gene = str(plasmidFinder.iloc[i,4]) |
460 pf.shortGene = pf.gene[:pf.gene.index("_")] | 460 if (pf.gene.find("_") > -1): |
461 pf.shortGene = pf.gene[:pf.gene.index("_")] | |
462 else: | |
463 pf.shortGene = pf.gene | |
461 pf.coverage = str(plasmidFinder.iloc[i,5]) | 464 pf.coverage = str(plasmidFinder.iloc[i,5]) |
462 pf.coverage_map = str(plasmidFinder.iloc[i,6]) | 465 pf.coverage_map = str(plasmidFinder.iloc[i,6]) |
463 pf.gaps = str(plasmidFinder.iloc[i,7]) | 466 pf.gaps = str(plasmidFinder.iloc[i,7]) |
464 pf.pCoverage = float(plasmidFinder.iloc[i,8]) | 467 pf.pCoverage = float(plasmidFinder.iloc[i,8]) |
465 pf.pIdentity = float(plasmidFinder.iloc[i,9]) | 468 pf.pIdentity = float(plasmidFinder.iloc[i,9]) |
526 ToJson(rFinder, "resfinder.json") #************* | 529 ToJson(rFinder, "resfinder.json") #************* |
527 | 530 |
528 rgiAMR = ParseRGIResult(rgi, plasmidContigs, likelyPlasmidContigs) # outputDir + "/predictions/" + ID + ".rgi.txt", plasmidContigs, likelyPlasmidContigs)#*********************** | 531 rgiAMR = ParseRGIResult(rgi, plasmidContigs, likelyPlasmidContigs) # outputDir + "/predictions/" + ID + ".rgi.txt", plasmidContigs, likelyPlasmidContigs)#*********************** |
529 ToJson(rgiAMR, "rgi.json") #************* | 532 ToJson(rgiAMR, "rgi.json") #************* |
530 | 533 |
531 carbapenamases = [] | 534 carbapenamases = [] |
535 resfinderCarbas = [] #list of rfinder objects for lindaout list | |
532 amrGenes = [] | 536 amrGenes = [] |
533 for keys in rFinder: | 537 for keys in rFinder: |
534 carbapenamases.append(rFinder[keys].shortGene + "(" + rFinder[keys].source + ")") | 538 carbapenamases.append(rFinder[keys].shortGene + "(" + rFinder[keys].source + ")") |
539 resfinderCarbas.append(rFinder[keys]) | |
535 for keys in rgiAMR: | 540 for keys in rgiAMR: |
536 if (rgiAMR[keys].Drug_Class.find("carbapenem") > -1): | 541 if (rgiAMR[keys].Drug_Class.find("carbapenem") > -1 and rgiAMR[keys].AMR_Gene_Family.find("beta-lactamase") > -1): |
537 if (rgiAMR[keys].Best_Hit_ARO not in carbapenamases): | 542 if (rgiAMR[keys].Best_Hit_ARO not in carbapenamases): |
538 carbapenamases.append(rgiAMR[keys].Best_Hit_ARO+ "(" + rgiAMR[keys].source + ")") | 543 carbapenamases.append(rgiAMR[keys].Best_Hit_ARO+ "(" + rgiAMR[keys].source + ")") |
539 else: | 544 else: |
540 if (rgiAMR[keys].Best_Hit_ARO not in amrGenes): | 545 if (rgiAMR[keys].Best_Hit_ARO not in amrGenes): |
541 amrGenes.append(rgiAMR[keys].Best_Hit_ARO+ "(" + rgiAMR[keys].source + ")") | 546 amrGenes.append(rgiAMR[keys].Best_Hit_ARO+ "(" + rgiAMR[keys].source + ")") |
596 | 601 |
597 | 602 |
598 #TSV output | 603 #TSV output |
599 lindaOut = [] | 604 lindaOut = [] |
600 tsvOut = [] | 605 tsvOut = [] |
601 lindaOut.append("new\tID\tQUALITY\tExpected Species\tMLST Scheme\tSequence Type\tMLST_ALLELE_1\tMLST_ALLELE_2\tMLST_ALLELE_3\tMLST_ALLELE_4\tMLST_ALLELE_5\tMLST_ALLELE_6\tMLST_ALLELE_7\tSEROTYPE\tK_CAPSULE\tPLASMID_1_FAMILY\tPLASMID_1_BEST_MATCH\tPLASMID_1_COVERAGE\tPLASMID_1_SNVS_TO_BEST_MATCH\tPLASMID_1_CARBAPENEMASE\tPLASMID_1_INC_GROUP\tPLASMID_2_RFLP\tPLASMID_2_FAMILY\tPLASMID_2_BEST_MATCH\tPLASMID_2_COVERAGE\tPLASMID_2_SNVS_TO_BEST_MATCH\tPLASMID_2_CARBAPENEMASE\tPLASMID_2_INC_GROUP") | 606 lindaOut.append("ID\tQUALITY\tExpected Species\tMLST Scheme\tSequence Type\tMLST_ALLELE_1\tMLST_ALLELE_2\tMLST_ALLELE_3\tMLST_ALLELE_4\tMLST_ALLELE_5\tMLST_ALLELE_6\tMLST_ALLELE_7\tSEROTYPE\tK_CAPSULE\tPLASMID_2_RFLP\tPLASMID_1_FAMILY\tPLASMID_1_BEST_MATCH\tPLASMID_1_COVERAGE\tPLASMID_1_SNVS_TO_BEST_MATCH\tPLASMID_1_CARBAPENEMASE\tPLASMID_1_INC_GROUP\tPLASMID_2_RFLP\tPLASMID_2_FAMILY\tPLASMID_2_BEST_MATCH\tPLASMID_2_COVERAGE\tPLASMID_2_SNVS_TO_BEST_MATCH\tPLASMID_2_CARBAPENEMASE\tPLASMID_2_INC_GROUP") |
607 lindaTemp = ID + "\t" #id | |
608 lindaTemp += "\t" #quality | |
609 lindaTemp += expectedSpecies + "\t" #expected | |
610 lindaTemp += mlstHit.species + "\t" #mlstscheme | |
611 lindaTemp += str(mlstHit.seqType) + "\t" #seq type | |
612 lindaTemp += "\t".join(mlstHit.scheme.split(";")) + "\t"#mlst alleles x 7 | |
613 lindaTemp += "\t\t" #sero and kcap | |
614 | |
615 #resfinderCarbas | |
616 for carbs in resfinderCarbas: | |
617 if (carbs.source == "plasmid"): # | |
618 lindaTemp += "\t\t\t\t\t" #plasmid 1 rflp plasmid 1 family information. PLASMID_1_FAMILY\tPLASMID_1_BEST_MATCH\tPLASMID_1_COVERAGE\tPLASMID_1_SNVS_TO_BEST_MATCH | |
619 lindaTemp += carbs.shortGene + "\t" #found an carbapenase | |
620 contig = carbs.sequence[6:] #this is the contig number | |
621 for i in mSuite.keys(): | |
622 if (str(mSuite[i].contig_num) == str(contig)): #found the right plasmid | |
623 lindaTemp += mSuite[i].rep_type | |
624 lindaOut.append(lindaTemp) | |
625 out = open("summary.linda.tsv", 'w') | |
626 for item in lindaOut: | |
627 out.write("%s\n" % item) | |
602 | 628 |
603 tsvOut.append("new\tID\tExpected Species\tMLST Species\tSequence Type\tMLST Scheme\tCarbapenem Resistance Genes\tOther AMR Genes\tTotal Plasmids\tPlasmids ID\tNum_Contigs\tPlasmid Length\tPlasmid RepType\tPlasmid Mobility\tNearest Reference\tDefinitely Plasmid Contigs\tLikely Plasmid Contigs") | 629 tsvOut.append("new\tID\tExpected Species\tMLST Species\tSequence Type\tMLST Scheme\tCarbapenem Resistance Genes\tOther AMR Genes\tTotal Plasmids\tPlasmids ID\tNum_Contigs\tPlasmid Length\tPlasmid RepType\tPlasmid Mobility\tNearest Reference\tDefinitely Plasmid Contigs\tLikely Plasmid Contigs") |
604 #start with ID | 630 #start with ID |
605 temp = "\t" | 631 temp = "\t" |
606 temp += (ID + "\t") | 632 temp += (ID + "\t") |