Mercurial > repos > guerler > springsuite
changeset 16:16eb2acaaa20 draft
"planemo upload commit 0e4e1f8de9464b411152c44f4edd099db8ad9e0b"
author | guerler |
---|---|
date | Sat, 24 Oct 2020 17:48:06 +0000 |
parents | 4a4888bf0338 |
children | c790d25086dc |
files | spring_minz.py test-data/dbCAN_result.txt test-data/pdb70_result.txt |
diffstat | 3 files changed, 24 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/spring_minz.py Sun Sep 27 20:52:58 2020 +0000 +++ b/spring_minz.py Sat Oct 24 17:48:06 2020 +0000 @@ -13,12 +13,12 @@ for index, line in enumerate(file): columns = line.split() core = columns[0] - partner = columns[2] + partner = columns[-1] if core not in crossreference: crossreference[core] = [] crossreference[core].append(partner) print ("Loaded cross reference from `%s`." % args.crossreference) - targets = get_template_scores(args.target, args.minscore, args.idx) + toptarget, targets = get_template_scores(args.target, args.minscore, args.idx) interactions = [] if not targets: print("No targets found `%s`" % args.target) @@ -27,8 +27,9 @@ for name in names: input_directory = args.inputs.rstrip("/") input_file = "%s/%s" % (input_directory, name) - templates = get_template_scores(input_file, args.minscore, args.idx) + toptemplate, templates = get_template_scores(input_file, args.minscore, args.idx) minz = 0 + mint = "" for t in targets: if t in crossreference: partners = crossreference[t] @@ -37,16 +38,18 @@ score = min(targets[t], templates[p]) if score > minz: minz = score + mint = "%s\t%s\t%s\t%s" % (toptarget, toptemplate, t, p) if minz > args.minscore: - interactions.append((name, minz)) - print("Predicting: %s, min-Z: %s" % (name, minz)) + interactions.append((name, minz, mint)) + print("Predicting: %s, min-Z: %s, templates: %s" % (name, minz, mint)) interactions.sort(key=lambda tup: tup[1], reverse=True) with open(args.output, 'a+') as output_file: for i in interactions: - output_file.write("%s\t%s\t%s\n" % (args.name, i[0], i[1])) + output_file.write("%s\t%s\t%s\t%s\n" % (args.name, i[0], i[1], i[2])) def get_template_scores(hhr_file, min_score, identifier_length): result = {} + toptemplate = None identifier_length = identifier_length + 4 if os.path.isfile(hhr_file): with open(hhr_file) as file: @@ -57,8 +60,10 @@ template_id = line[4:identifier_length] template_score = float(line[57:63]) if template_score > min_score: + if toptemplate is None: + toptemplate = template_id result[template_id] = template_score - return result + return toptemplate, result if __name__ == "__main__": parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') @@ -68,7 +73,7 @@ parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6) parser.add_argument('-l', '--list', help='Text file containing identifiers.', required=True) parser.add_argument('-i', '--inputs', help='Directory containing `hhr` files', required=True) - parser.add_argument('-o', '--output', help='Output file containing minZ-scores`', required=True) + parser.add_argument('-o', '--output', help='Output file containing min-Z scores`', required=True) parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) args = parser.parse_args() main(args) \ No newline at end of file
--- a/test-data/dbCAN_result.txt Sun Sep 27 20:52:58 2020 +0000 +++ b/test-data/dbCAN_result.txt Sat Oct 24 17:48:06 2020 +0000 @@ -1,9 +1,9 @@ -6W4H_A.hhr 6W4H_A.hhr 24.7 -6W9Q_A.hhr 6W4H_B.hhr 22.3 -6W9Q_A.hhr 6W9C_A.hhr 21.0 -6W37_A.hhr 6W9C_A.hhr 21.3 -6W37_A.hhr 6W4H_B.hhr 16.0 -6W37_A.hhr 6W37_A.hhr 14.0 -7BQY_A.hhr 6W9C_A.hhr 24.2 -7BQY_A.hhr 6W4H_B.hhr 24.0 -7BQY_A.hhr 6W37_A.hhr 14.0 +6W4H_A.hhr 6W4H_A.hhr 24.7 CAA16605.1 CAA16605.1 CAA16605.1 CAA16605.1 +6W9Q_A.hhr 6W4H_B.hhr 22.3 AJP22991.1 CAR47543.1 AJP22991.1 ACI15933.1 +6W9Q_A.hhr 6W9C_A.hhr 21.0 AJP22991.1 ACY47587.1 ATY34423.1 AHA42547.2 +6W37_A.hhr 6W9C_A.hhr 21.3 CAG81347.1 ACY47587.1 CAG81347.1 AAO76127.1 +6W37_A.hhr 6W4H_B.hhr 16.0 CAG81347.1 CAR47543.1 ABO94043.1 ACY95489.1 +6W37_A.hhr 6W37_A.hhr 14.0 CAG81347.1 CAG81347.1 ABO94043.1 ACY95489.1 +7BQY_A.hhr 6W9C_A.hhr 24.2 QCE11709.1 ACY47587.1 QCE11709.1 AAW84061.1 +7BQY_A.hhr 6W4H_B.hhr 24.0 QCE11709.1 CAR47543.1 CAK38741.1 ACD98010.1 +7BQY_A.hhr 6W37_A.hhr 14.0 QCE11709.1 CAG81347.1 ABO94043.1 ACY95489.1
--- a/test-data/pdb70_result.txt Sun Sep 27 20:52:58 2020 +0000 +++ b/test-data/pdb70_result.txt Sat Oct 24 17:48:06 2020 +0000 @@ -1,2 +1,2 @@ -NP_000282.1.hhr NP_000282.1.hhr 875.1 -NP_000282.1.hhr NP_000290.2.hhr 86.5 +NP_000282.1.hhr NP_000282.1.hhr 875.1 2WZB_A 2WZB_A 2WZB_A 2WZB_A +NP_000282.1.hhr NP_000290.2.hhr 86.5 2WZB_A 1XM9_A 3UWD_A 3NMZ_A