comparison spring_minz.py @ 27:e34da554d415 draft

"planemo upload commit f12d400ea7827bb7d6b2ef31eb7f38e555bd053f-dirty"
author guerler
date Sat, 31 Oct 2020 22:46:10 +0000
parents 5d1ae615e4ec
children 41353488926c
comparison
equal deleted inserted replaced
26:124f82fbd986 27:e34da554d415
38 matchScores(targetFile=targetFile, 38 matchScores(targetFile=targetFile,
39 targetName=targetName, 39 targetName=targetName,
40 inputs=inputs, 40 inputs=inputs,
41 inputPath=inputPath, 41 inputPath=inputPath,
42 crossReference=crossReference, 42 crossReference=crossReference,
43 idLength=args.idlength,
43 minScore=args.minscore, 44 minScore=args.minscore,
44 logFile=logFile, 45 logFile=logFile,
45 interactions=interactions) 46 interactions=interactions)
46 if args.inputlist: 47 if args.inputlist:
47 for inputName in inputs: 48 for inputName in inputs:
51 targetName=inputName, 52 targetName=inputName,
52 inputs=targets, 53 inputs=targets,
53 inputPath=targetPath, 54 inputPath=targetPath,
54 crossReference=crossReference, 55 crossReference=crossReference,
55 minScore=args.minscore, 56 minScore=args.minscore,
57 idLength=args.idlength,
56 logFile=logFile, 58 logFile=logFile,
57 interactions=interactions) 59 interactions=interactions)
58 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) 60 interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True)
59 with open(args.output, 'w') as output_file: 61 with open(args.output, 'w') as output_file:
60 for entry in interactions: 62 for entry in interactions:
61 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"])) 63 output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"]))
62 logFile.close() 64 logFile.close()
63 65
64 def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, logFile, interactions): 66 def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, logFile, interactions):
65 targetTop, targetHits = getTemplateScores(targetFile, minScore) 67 targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength)
66 if not targetHits: 68 if not targetHits:
67 print("No targets found `%s`" % targetFile) 69 print("No targets found `%s`" % targetFile)
68 else: 70 else:
69 print ("Loaded target scores from `%s`." % targetFile) 71 print ("Loaded target scores from `%s`." % targetFile)
70 for inputName in inputs: 72 for inputName in inputs:
71 inputFile = "%s/%s" % (inputPath, inputName) 73 inputFile = "%s/%s" % (inputPath, inputName)
72 inputTop, inputHits = getTemplateScores(inputFile, minScore) 74 inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength)
73 minZ = 0 75 minZ = 0
74 minInfo = "" 76 minInfo = ""
75 for t in targetHits: 77 for t in targetHits:
76 if t in crossReference: 78 if t in crossReference:
77 partners = crossReference[t] 79 partners = crossReference[t]
90 if interactions[interactionKey]["minZ"] >= minZ: 92 if interactions[interactionKey]["minZ"] >= minZ:
91 continue 93 continue
92 interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo) 94 interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo)
93 logFile.write("Interaction between %s and %s [min-Z: %s].\n" % (targetName, inputName, minZ)) 95 logFile.write("Interaction between %s and %s [min-Z: %s].\n" % (targetName, inputName, minZ))
94 96
95 def getTemplateScores(hhrFile, minScore): 97 def getTemplateScores(hhrFile, minScore, idLength):
96 result = dict() 98 result = dict()
97 topTemplate = None 99 topTemplate = None
100 idLength = idLength + 4
98 if os.path.isfile(hhrFile): 101 if os.path.isfile(hhrFile):
99 with open(hhrFile) as file: 102 with open(hhrFile) as file:
100 for index, line in enumerate(file): 103 for index, line in enumerate(file):
101 if index > 8: 104 if index > 8:
102 if not line.strip(): 105 if not line.strip():
103 break 106 break
104 templateId = line[4:10] 107 templateId = line[4:idLength]
105 templateScore = float(line[57:63]) 108 templateScore = float(line[57:63])
106 if templateScore > minScore: 109 if templateScore > minScore:
107 if topTemplate is None: 110 if topTemplate is None:
108 topTemplate = templateId 111 topTemplate = templateId
109 result[templateId] = templateScore 112 result[templateId] = templateScore
117 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=False) 120 parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=False)
118 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) 121 parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True)
119 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) 122 parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True)
120 parser.add_argument('-l', '--log', help='Log file', required=True) 123 parser.add_argument('-l', '--log', help='Log file', required=True)
121 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) 124 parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10)
125 parser.add_argument('-idx', '--idlength', help='Length of identifier in reference', type=int, default=6)
122 args = parser.parse_args() 126 args = parser.parse_args()
123 main(args) 127 main(args)