Mercurial > repos > estrain > srst2
comparison scoreProfiles.py @ 0:efc202baae1f draft
planemo upload
| author | estrain |
|---|---|
| date | Sun, 03 Dec 2017 13:28:11 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:efc202baae1f |
|---|---|
| 1 #!/usr/bin/env | |
| 2 | |
| 3 ## Generate basic summary stats for SRST2 (v2) allele score output. Generate summaries for each profile defined in the database | |
| 4 ## author: errol strain, estrain@gmail.com | |
| 5 | |
| 6 from argparse import (ArgumentParser, FileType) | |
| 7 import sys | |
| 8 import glob | |
| 9 from decimal import Decimal | |
| 10 | |
| 11 def parse_args(): | |
| 12 "Parse the input arguments, use '-h' for help." | |
| 13 | |
| 14 parser = ArgumentParser(description='Generate Summary Scores for SRST2 Allele Score Output') | |
| 15 | |
| 16 # Read inputs | |
| 17 parser.add_argument('--mlst_definitions', type=str, required=True, nargs=1, help='MLST Definitions') | |
| 18 parser.add_argument('--output', type=str, required=True, nargs=1, help='MLST Definitions') | |
| 19 parser.add_argument('--profile_cov', type=str, required=False, help='Minimum Average Coverage to Report ST Profile',default="98") | |
| 20 parser.add_argument('--profile_max_mismatch', type=str, required=False, help='Maximum Number of Mismatches (SNPs)', default="1") | |
| 21 | |
| 22 return parser.parse_args() | |
| 23 | |
| 24 args =parse_args() | |
| 25 | |
| 26 allHash = {} | |
| 27 | |
| 28 # Read in Profile Database | |
| 29 profiles = open(args.mlst_definitions[0],"r") | |
| 30 output = open(args.output[0],"w") | |
| 31 | |
| 32 # Minimum mean percent coverage for reporting profile | |
| 33 min_per=float(args.profile_cov) | |
| 34 # Maximum mean mismatch for reporting profile | |
| 35 max_mis=float(args.profile_max_mismatch) | |
| 36 | |
| 37 # Read in Allele Scores | |
| 38 # Scores should be in srts2*.scores file | |
| 39 # Column 0:Allele, 1:Score, 2:Avg Depth, 5:% Coverage, 7:Mismatches, 8:Indels | |
| 40 scoreFile = open(glob.glob("srst2*.scores")[0],"r") | |
| 41 scoreFile.readline() | |
| 42 | |
| 43 for line in scoreFile.readlines() : | |
| 44 els = line.split("\t") | |
| 45 vals = els[0].split("_") | |
| 46 allHash.update({els[0]:line}) | |
| 47 | |
| 48 | |
| 49 # Allele names in first row | |
| 50 als = profiles.readline().rstrip() | |
| 51 filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels\n") | |
| 52 output.write(filehead) | |
| 53 | |
| 54 genes = als.split("\t") | |
| 55 | |
| 56 for line in profiles.readlines() : | |
| 57 line = line.rstrip() | |
| 58 els = line.split("\t") | |
| 59 alleles = [] | |
| 60 for i in range(1,len(genes)) : | |
| 61 alleles.append(genes[i] + "_" + els[i]) | |
| 62 mscore=0 | |
| 63 mdepth=0 | |
| 64 mcover=0 | |
| 65 mmisma=0 | |
| 66 mindel=0 | |
| 67 for i in alleles : | |
| 68 if i in allHash : | |
| 69 vals=str(allHash[i]).split("\t") | |
| 70 mscore+=float(vals[1]) | |
| 71 mdepth+=float(vals[2]) | |
| 72 mcover+=float(vals[5]) | |
| 73 mmisma+=int(vals[7]) | |
| 74 mindel+=int(vals[8]) | |
| 75 | |
| 76 mscore=round(Decimal(mscore/(len(genes)-1)),5) | |
| 77 mdepth=round(Decimal(mdepth/(len(genes)-1)),2) | |
| 78 mcover=round(Decimal(mcover/(len(genes)-1)),2) | |
| 79 | |
| 80 if mmisma<=max_mis and mcover>=min_per : | |
| 81 output.write(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel) + "\n") | |
| 82 |
