Mercurial > repos > guerler > springsuite

--- a/spring_minz.py	Wed Oct 28 06:49:58 2020 +0000
+++ b/spring_minz.py	Thu Oct 29 13:04:47 2020 +0000
@@ -3,120 +3,125 @@
 import os

 def main(args):
-	inputs = list()
-	with open(args.inputlist) as file:
-		for index, line in enumerate(file):
-			name = line.strip()
-			inputs.append(name)
-	print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist))
-	targets = list()
-	duplicates = 0
-	with open(args.targetlist) as file:
-		for index, line in enumerate(file):
-			name = line.strip()
-			targets.append(name)
-			if name in inputs:
-				duplicates = duplicates + 1
-	print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist))
-	crossReference = dict()
-	with open(args.crossreference) as file:
-		for index, line in enumerate(file):
-			columns = line.split()
-			core = columns[0]
-			partner = columns[-1]
-			if core not in crossReference:
-				crossReference[core] = []
-			crossReference[core].append(partner)
-	print ("Loaded cross reference from `%s`." % args.crossreference)
-	interactions = dict()
-	for targetName in targets:
-		targetDirectory = args.targetpath.rstrip("/")
-		targetFile = "%s/%s" % (targetDirectory, targetName)
-		matchScores(targetFile=targetFile,
-					targetName=targetName,
-					inputs=sorted(inputs),
-					inputPath=args.inputpath,
-					crossReference=crossReference,
-					minScore=args.minscore,
-					idLength=args.idx,
-					interactions=interactions)
-	if duplicates != len(targets):
-		for inputName in inputs:
-			inputDirectory = args.inputpath.rstrip("/")
-			inputFile = "%s/%s" % (inputDirectory, inputName)
-			matchScores(targetFile=inputFile,
-						targetName=inputName,
-						inputs=targets,
-						inputPath=args.targetpath,
-						crossReference=crossReference,
-						minScore=args.minscore,
-						idLength=args.idx,
-						interactions=interactions)
-	interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True)
-	with open(args.output, 'w') as output_file:
-		for entry in interactions:
-			output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"]))
+    logFile = open(args.log, 'a+')
+    targets = list()
+    targetPath = args.targetpath.rstrip("/")
+    with open(args.targetlist) as file:
+        for index, line in enumerate(file):
+            name = line.strip()
+            targets.append(name)
+    print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist))
+    if args.inputlist:
+        inputs = list()
+        inputPath = args.inputpath.rstrip("/")
+        with open(args.inputlist) as file:
+            for index, line in enumerate(file):
+                name = line.strip()
+                inputs.append(name)
+        print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist))
+    else:
+        inputs = targets
+        inputPath = targetPath
+    crossReference = dict()
+    with open(args.crossreference) as file:
+        for index, line in enumerate(file):
+            columns = line.split()
+            core = columns[0]
+            partner = columns[-1]
+            if core not in crossReference:
+                crossReference[core] = []
+            crossReference[core].append(partner)
+    print ("Loaded cross reference from `%s`." % args.crossreference)
+    interactions = dict()
+    for targetName in targets:
+        targetFile = "%s/%s" % (targetPath, targetName)
+        matchScores(targetFile=targetFile,
+                    targetName=targetName,
+                    inputs=inputs,
+                    inputPath=inputPath,
+                    crossReference=crossReference,
+                    minScore=args.minscore,
+                    idLength=args.idx,
+                    logFile=logFile,
+                    interactions=interactions)
+    if args.inputlist:
+        for inputName in inputs:
+            inputDirectory = inputPath
+            inputFile = "%s/%s" % (inputDirectory, inputName)
+            matchScores(targetFile=inputFile,
+                        targetName=inputName,
+                        inputs=targets,
+                        inputPath=targetPath,
+                        crossReference=crossReference,
+                        minScore=args.minscore,
+                        idLength=args.idx,
+                        logFile=logFile,
+                        interactions=interactions)
+    interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True)
+    with open(args.output, 'w') as output_file:
+        for entry in interactions:
+            output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"]))
+    logFile.close()

-def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, interactions):
-	targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength)
-	if not targetHits:
-		print("No targets found `%s`" % targetFile)
-	else:
-		print ("Loaded target scores from `%s`." % targetFile)
-		for inputName in inputs:
-			inputDirectory = inputPath.rstrip("/")
-			inputFile = "%s/%s" % (inputDirectory, inputName)
-			inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength)
-			minZ = 0
-			minInfo = ""
-			for t in targetHits:
-				if t in crossReference:
-					partners = crossReference[t]
-					for p in partners:
-						if p in inputHits:
-							score = min(targetHits[t], inputHits[p])
-							if score > minZ:
-								minZ = score
-								minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p)
-			if minZ > minScore:
-				if targetName > inputName:
-					interactionKey = "%s_%s" % (targetName, inputName)
-				else:
-					interactionKey = "%s_%s" % (inputName, targetName)
-				if interactionKey in interactions:
-					if interactions[interactionKey]["minZ"] >= minZ:
-						continue
-				interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo)
-				print("Predicting: %s, min-Z: %s, templates: %s" % (inputName, minZ, minInfo))
-	return interactions
+def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, logFile, interactions):
+    targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength)
+    if not targetHits:
+        print("No targets found `%s`" % targetFile)
+    else:
+        print ("Loaded target scores from `%s`." % targetFile)
+        for inputName in inputs:
+            inputFile = "%s/%s" % (inputPath, inputName)
+            inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength)
+            minZ = 0
+            minInfo = ""
+            for t in targetHits:
+                if t in crossReference:
+                    partners = crossReference[t]
+                    for p in partners:
+                        if p in inputHits:
+                            score = min(targetHits[t], inputHits[p])
+                            if score > minZ:
+                                minZ = score
+                                minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p)
+            if minZ > minScore:
+                if targetName > inputName:
+                    interactionKey = "%s_%s" % (targetName, inputName)
+                else:
+                    interactionKey = "%s_%s" % (inputName, targetName)
+                if interactionKey in interactions:
+                    if interactions[interactionKey]["minZ"] >= minZ:
+                        continue
+                interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo)
+                logFile.write("Interaction between %s and %s [min-Z: %s].\n" % (targetName, inputName, minZ))

 def getTemplateScores(hhrFile, minScore, identifierLength):
-	result = dict()
-	topTemplate = None
-	identifierLength = identifierLength + 4
-	if os.path.isfile(hhrFile):
-		with open(hhrFile) as file:
-			for index, line in enumerate(file):
-				if index > 8:
-					if not line.strip():
-						break
-					templateId = line[4:identifierLength]
-					templateScore = float(line[57:63])
-					if templateScore > minScore:
-						if topTemplate is None:
-							topTemplate = templateId
-						result[templateId] = templateScore
-	return topTemplate, result
+    result = dict()
+    topTemplate = None
+    identifierLength = identifierLength + 4
+    if os.path.isfile(hhrFile):
+        with open(hhrFile) as file:
+            for index, line in enumerate(file):
+                if index > 8:
+                    if not line.strip():
+                        break
+                    templateId = line[4:identifierLength]
+                    templateScore = float(line[57:63])
+                    if templateScore > minScore:
+                        if topTemplate is None:
+                            topTemplate = templateId
+                        result[templateId] = templateScore
+    return topTemplate, result

 if __name__ == "__main__":
-	parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.')
-	parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=True)
-	parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=True)
-	parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True)
-	parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True)
-	parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True)
-	parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6)
-	parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True)
-	parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10)
-	args = parser.parse_args()
-	main(args)
\ No newline at end of file
+    parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.')
+    parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True)
+    parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True)
+    parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False)
+    parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=False)
+    parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True)
+    parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6)
+    parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True)
+    parser.add_argument('-l', '--log', help='Log file', required=True)
+    parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10)
+    args = parser.parse_args()
+    main(args)
\ No newline at end of file
--- a/spring_minz.xml	Wed Oct 28 06:49:58 2020 +0000
+++ b/spring_minz.xml	Thu Oct 29 13:04:47 2020 +0000
@@ -2,59 +2,60 @@
     <description>filter operation</description>
     <code file="spring_minz.py"/>
     <command detect_errors="exit_code"><![CDATA[
-        mkdir -p inputs &&
-        #for input in $inputs
-            link '${str(input)}' 'inputs/${input.element_identifier}' &&
-            echo '${input.element_identifier}' >> 'input_list' &&
-        #end for
-		mkdir -p targets &&
+        mkdir -p targets &&
         #for target in $targets
             link '${str(target)}' 'targets/${target.element_identifier}' &&
             echo '${target.element_identifier}' >> 'target_list' &&
         #end for
-		python3 '$__tool_directory__/spring_minz.py' -il input_list -ip inputs -tl target_list -tp targets -m '$minscore' -c '$crossreference' -x '$idx' -o '$output'
+        #if str($input_type.input_type_selector) == "true":
+            mkdir -p inputs &&
+            #for input in $input_type.inputs
+                link '${str(input)}' 'inputs/${input.element_identifier}' &&
+                echo '${input.element_identifier}' >> 'input_list' &&
+            #end for
+            python3 '$__tool_directory__/spring_minz.py' -tl target_list -tp targets -il input_list -ip inputs -m '$minscore' -c '$crossreference' -x '$idx' -o '$output' -l '$log'
+        #else
+            python3 '$__tool_directory__/spring_minz.py' -tl target_list -tp targets -m '$minscore' -c '$crossreference' -x '$idx' -o '$output' -l '$log'
+        #end if
     ]]></command>
     <inputs>
-        <param format="txt" name="targets" type="data_collection" label="Target Profiles" help="Homology search result of target/query profiles `hhr`."/>
-        <param format="txt" name="inputs" type="data_collection" collection_type="list" label="Input Profiles" help="Homology search results of input profiles `hhr`."/>
+        <param format="txt" name="targets" type="data_collection" collection_type="list" label="Target Profiles" help="Homology search result of target/query profiles `hhr`."/>
+        <conditional name="input_type">
+            <param name="input_type_selector" type="boolean" label="Identify interactions across sets?" checked="true"/>
+            <when value="true">
+                <param format="txt" name="inputs" type="data_collection" collection_type="list" label="Input Profiles" help="Homology search results of input profiles `hhr`."/>
+            </when>
+        </conditional>
         <param format="txt" name="crossreference" type="data" label="Cross Reference" help="Cross reference of interacting proteins `first_id metadata_id second_id`."/>
         <param name="minscore" type="integer" label="Score threshold" value="10" min="1" max="200" help="Matching interaction pairs with a score lower than this threshold will be excluded."/>
         <param name="idx" type="integer" label="Identifier length" value="6" min="1" max="20" help="Specify the length of the identifier e.g. `1ACB_A` has length 6."/>
     </inputs>
     <outputs>
-        <data format="tabular" name="output" />
+        <data format="tabular" name="output" label="SPRING min-Z Table"/>
+        <data format="txt" name="log" label="SPRING min-Z Log" />
     </outputs>
     <tests>
         <test>
             <param name="targets">
                 <collection type="list">
-					<element name="6WEY_A.hhr" value="6WEY_A.hhr" />
+                    <element name="7BQY_A.hhr" value="7BQY_A.hhr" />
+                    <element name="6WLC_A.hhr" value="6WLC_A.hhr" />
                     <element name="6WJI_A.hhr" value="6WJI_A.hhr" />
-                    <element name="6WLC_A.hhr" value="6WLC_A.hhr" />
-                    <element name="7BQY_A.hhr" value="7BQY_A.hhr" />
+                    <element name="6WEY_A.hhr" value="6WEY_A.hhr" />
                     <element name="6W37_A.hhr" value="6W37_A.hhr" />
                     <element name="6W9C_A.hhr" value="6W9C_A.hhr" />
+                    <element name="6W4H_B.hhr" value="6W4H_B.hhr" />
                     <element name="6W4H_A.hhr" value="6W4H_A.hhr" />
-                    <element name="6W4H_B.hhr" value="6W4H_B.hhr" />
                     <element name="6W9Q_A.hhr" value="6W9Q_A.hhr" />
                 </collection>
             </param>
+            <conditional name="input_type">
+                <param name="input_type_selector" type="boolean" checked="false"/>
+            </conditional>
             <param name="idx" value="10"/>
-            <param name="inputs">
-                <collection type="list">
-					<element name="6WEY_A.hhr" value="6WEY_A.hhr" />
-                    <element name="6WJI_A.hhr" value="6WJI_A.hhr" />
-                    <element name="6WLC_A.hhr" value="6WLC_A.hhr" />
-                    <element name="7BQY_A.hhr" value="7BQY_A.hhr" />
-                    <element name="6W37_A.hhr" value="6W37_A.hhr" />
-                    <element name="6W9C_A.hhr" value="6W9C_A.hhr" />
-                    <element name="6W4H_A.hhr" value="6W4H_A.hhr" />
-                    <element name="6W4H_B.hhr" value="6W4H_B.hhr" />
-                    <element name="6W9Q_A.hhr" value="6W9Q_A.hhr" />
-                </collection>
-            </param>
             <param name="crossreference" value="dbCAN_random.txt"/>
             <output name="output" file="dbCAN_result.txt"/>
+            <output name="log" file="dbCAN_log.txt"/>
         </test>
         <test>
             <param name="targets">
@@ -62,16 +63,20 @@
                     <element name="NP_000282.1.hhr" value="NP_000282.1.hhr" />
                 </collection>
             </param>
-            <param name="inputs">
-                <collection type="list">
-                    <element name="NP_000282.1.hhr" value="NP_000282.1.hhr" />
-                    <element name="NP_000290.2.hhr" value="NP_000290.2.hhr" />
-                    <element name="NP_000548.2.hhr" value="NP_000548.2.hhr" />
-                    <element name="NP_000836.2.hhr" value="NP_000836.2.hhr" />
-                </collection>
-            </param>
+            <conditional name="input_type">
+                <param name="input_type_selector" type="boolean" checked="true"/>
+                <param name="inputs">
+                    <collection type="list">
+                        <element name="NP_000282.1.hhr" value="NP_000282.1.hhr" />
+                        <element name="NP_000290.2.hhr" value="NP_000290.2.hhr" />
+                        <element name="NP_000548.2.hhr" value="NP_000548.2.hhr" />
+                        <element name="NP_000836.2.hhr" value="NP_000836.2.hhr" />
+                    </collection>
+                </param>
+            </conditional>
             <param name="crossreference" value="pdb70_random.txt"/>
             <output name="output" file="pdb70_result.txt"/>
+            <output name="log" file="pdb70_log.txt"/>
         </test>
     </tests>
     <help><![CDATA[
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dbCAN_log.txt	Thu Oct 29 13:04:47 2020 +0000
@@ -0,0 +1,9 @@
+Interaction between 7BQY_A.hhr and 6W37_A.hhr [min-Z: 14.0].
+Interaction between 7BQY_A.hhr and 6W9C_A.hhr [min-Z: 24.2].
+Interaction between 7BQY_A.hhr and 6W4H_B.hhr [min-Z: 24.0].
+Interaction between 6W37_A.hhr and 6W37_A.hhr [min-Z: 14.0].
+Interaction between 6W37_A.hhr and 6W9C_A.hhr [min-Z: 21.3].
+Interaction between 6W37_A.hhr and 6W4H_B.hhr [min-Z: 16.0].
+Interaction between 6W4H_A.hhr and 6W4H_A.hhr [min-Z: 24.7].
+Interaction between 6W9Q_A.hhr and 6W9C_A.hhr [min-Z: 21.0].
+Interaction between 6W9Q_A.hhr and 6W4H_B.hhr [min-Z: 22.3].
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pdb70_log.txt	Thu Oct 29 13:04:47 2020 +0000
@@ -0,0 +1,2 @@
+Interaction between NP_000282.1.hhr and NP_000282.1.hhr [min-Z: 875.1].
+Interaction between NP_000282.1.hhr and NP_000290.2.hhr [min-Z: 86.5].