Galaxy |

Changeset 0:e41ec5af7472 (2018-08-02)

Next changeset 1:5f589c91566e (2018-10-30)

Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/heinz commit b0b2c64a46bdd9beebdfb7fc5312f75346483763

added:
bum.R
bum.xml
heinz.xml
heinz_scoring.py
heinz_scoring.xml
test-data/BUM_input.txt
test-data/BUM_output.txt
test-data/EdgesPCST.txt
test-data/Heinz_output.txt
test-data/Heinz_score_using_BUM_output.txt
test-data/Heinz_score_using_manual_BUM_params.txt
test-data/NodesPCST.txt
test-data/genes_with_a_p_value.csv
test-data/heinz_graph.pdf
visualization.py
visualization.xml

diff -r 000000000000 -r e41ec5af7472 bum.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bum.R Thu Aug 02 11:57:44 2018 -0400

@@ -0,0 +1,34 @@
+# Author: Cico Zhang
+# Usage: Rscript bum.R --input p-values.txt --output result.txt --verbose TRUE
+
+# Set up R error handling to go to stderr
+options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
+
+# Avoid crashing Galaxy with an UTF8 error on German LC settings
+#loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+# Import required libraries
+suppressPackageStartupMessages({
+  library('getopt')
+  library('BioNet')
+})
+
+# Take in trailing command line arguments
+args <- commandArgs(trailingOnly = TRUE)
+
+# Get options using the spec as defined by the enclosed list
+# Read the options from the default: commandArgs(TRUE)
+option_specification <- matrix(c(
+  'input', 'i', 2, 'character',
+  'output', 'o', 2, 'character'
+), byrow=TRUE, ncol=4);
+
+# Parse options
+options <- getopt(option_specification);
+
+pvals <- read.table(options$input)
+bum <- fitBumModel(pvals,plot=FALSE)
+mat <- c(bum$lambda, bum$a)
+#bumtablename <- paste(options$output,sep="\t")
+write.table(x=mat, file=options$output,quote=FALSE, row.names=FALSE, col.names=FALSE)
+message ("Parameters have been estimated successfully!")

diff -r 000000000000 -r e41ec5af7472 bum.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bum.xml Thu Aug 02 11:57:44 2018 -0400

[

@@ -0,0 +1,41 @@
+<tool id="heinz_bum" name="Fit a BUM model" version="1.0">
+    <description>with p-values</description>
+    <requirements>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.34.0">bioconductor-bionet</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+Rscript '$__tool_directory__/bum.R' --input '$p_values' --output '$dist_params'
+    ]]></command>
+    <inputs>
+        <param type="data" name="p_values" label="Input file" format="txt" help="
+        The input file should only contain p-values, one per line." />
+    </inputs>
+    <outputs>
+        <data name="dist_params" format="txt" label="${tool.name} on ${on_string}: BUM params"/>
+    </outputs>
+    <tests>
+      
+        <test>
+            <param name="p_values" value="BUM_input.txt" />
+            <output name="dist_params">
+                <assert_contents>
+                    <has_text text="0.684" />
+                    <has_text text="0.376" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+BUM is the abbreviation for "Beta-Uniform Mixture (BUM) distribution."
+
+This tool (part of the package "Bionet") is used for fitting Beta-Uniform
+mixture model to a P-value distribution, the output of which is two model
+parameters: lambda and alpha, kept in a text file, the first line is
+lambda; the second, alpha.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btq089</citation>
+        <citation type="doi">10.1093/bioinformatics/btn161</citation>
+    </citations>
+</tool>

diff -r 000000000000 -r e41ec5af7472 heinz.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/heinz.xml Thu Aug 02 11:57:44 2018 -0400

[

@@ -0,0 +1,76 @@
+<tool id="heinz" name="Identify optimal scoring subnetwork" version="1.0">
+    <description>using Heinz</description>
+    <requirements>
+        <requirement type="package" version="2.0">heinz</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+heinz -m \${GALAXY_SLOTS:-2} -n '$score' -e '$edge' 1> '$subnetwork'
+    ]]></command>
+    <inputs>
+        <param type="data" name="score" format="txt" label="File containing Heinz scores"
+        help="The file should contain two columns: the node identifier (e.g. gene) and the corresponding Heinz score" />
+        <param type="data" name="edge" format="txt" label="Edge file"
+        help="In this file, two nodes define one edge, one edge per line" />
+    </inputs>
+    <outputs>
+        <data name="subnetwork" format="txt" label="${tool.name} on ${on_string}: Optimal subgraph" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="score" value="NodesPCST.txt" />
+            <param name="edge" value="EdgesPCST.txt" />
+            <output name="subnetwork">
+                <assert_contents>
+                    <has_text text='label="a\n5\n0\n0"' />
+                    <has_text text='label="b\n-4\n0\n1",shape=box' />
+                    <has_text text="Total weight: 36" />
+                    <has_text text="0 -- 1" />
+                    <has_text text="1 -- 3" />
+                    <has_text text="5 -- 7" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+**Note**: You are currently using a version of Heinz based on IBM CPLEX Community version,
+which is limiting the capacity of Heinz in handling big networks. For an unlimited version of Heinz,
+you need to get a license of IBM CPLEX and compile Heinz from scratch from here https://github.com/ls-cwi/heinz.
+We are currently looking for an alternative to IBM CPLEX in Heinz to make an unlimited version of Heinz available
+to the public as soon as possible.
+
+Score file --- the output file of the tool "Calculate Heinz scores": two columns delimited
+by a tab without headers, the first column is node identifier (e.g., genes, KEGG ORTHOLOGY (KO));
+the second, Heinz score.
+
+========= ===================
+BRCA2    -6.991782933819368
+--------- -------------------
+BRCA1    -5.206139799106934
+--------- -------------------
+AACS     -0.9299868303078357
+--------- -------------------
+ABCC11    -5.845009850430119
+========= ===================
+
+
+Edge file: the background network Heinz uses in the form of a list of edges; each line is made up of
+two node identifiers (e.g., genes, KEGG ORTHOLOGY (KO)) delimited by a tab. In practice, we could
+prepare this file using different pathway databases, such as Reactome, STRING and KEGG. Which database
+to choose depends on the question to solve.
+
+========= =========
+ACTR1B    ACVR2B
+--------- ---------
+ZSWIM9    FOXP3
+--------- ---------
+LGALS4    PRKX
+--------- ---------
+NPTX1     CIAO1
+========= =========
+
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btn161</citation>
+    </citations>
+</tool>

diff -r 000000000000 -r e41ec5af7472 heinz_scoring.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/heinz_scoring.py Thu Aug 02 11:57:44 2018 -0400

[

@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+"""Calculate scores for Heinz.
+
+This script transform a p-value into a score:
+    1. Use alpha and lambda to calculate a threshold P-value.
+    2. Calculate a score based on each P-value by alpha and the threshold.
+
+For more details, please refer to the paper doi:10.1093/bioinformatics/btn161
+
+Input:
+    P-values from DESeq2 result: first column: names, second column P-values
+Output:
+    Scores, which will be used as the input of Heinz.
+    First column: names, second column: scores.
+
+Python 3 is required.
+"""
+# Implemented by: Chao (Cico) Zhang
+# Homepage: https://Hi-IT.org
+# Date: 14 Mar 2017
+# Last modified: 23 May 2018
+
+import argparse
+import sys
+
+import numpy as np
+import pandas as pd
+
+
+parser = argparse.ArgumentParser(description='Transform a P-value into a '
+                                 'score which can be used as the input of '
+                                 'Heinz')
+parser.add_argument('-n', '--node', required=True, dest='nodes',
+                    metavar='nodes_pvalue.txt', type=str,
+                    help='Input file of nodes with P-values')
+parser.add_argument('-f', '--fdr', required=True, dest='fdr',
+                    metavar='0.007', type=float, help='Choose a value of FDR')
+parser.add_argument('-m', '--model', required=False, dest='param_file',
+                    metavar='param.txt', type=str,
+                    help='A txt file contains model params as input')
+parser.add_argument('-a', '--alpha', required=False, dest='alpha',
+                    metavar='0.234', type=float, default=0.5,
+                    help='Single parameter alpha as input if txt input is '
+                    'not provided')
+parser.add_argument('-l', '--lambda', required=False, dest='lam',
+                    metavar='0.345', type=float, default=0.5,
+                    help='Single parameter lambda as input if txt input is '
+                    'not provided')
+parser.add_argument('-o', '--output', required=True, dest='output',
+                    metavar='scores.txt', type=str,
+                    help='The output file to store the calculated scores')
+args = parser.parse_args()
+
+# Check if the parameters are complete
+if args.output is None:
+    sys.exit('Output file is not designated.')
+
+if args.nodes is None:
+    sys.exit('Nodes with p-values must be provided.')
+
+if args.fdr is None:
+    sys.exit('FDR must be provided')
+
+if args.fdr >= 1 or args.fdr <= 0:
+    sys.exit('FDR must greater than 0 and smaller than 1')
+
+# run heinz-print according to the input type
+if args.param_file is not None:  # if BUM output is provided
+    with open(args.param_file) as p:
+        params = p.readlines()
+        lam = float(params[0])  # Maybe this is a bug
+        alpha = float(params[1])  # Maybe this is a bug
+# if BUM output is not provided
+elif args.alpha is not None and args.lam is not None:
+    lam = args.lam
+    alpha = args.alpha
+else:  # The input is not complete
+    sys.exit('The parameters of the model are incomplete.')
+
+# Calculate the threshold P-value
+pie = lam + (1 - lam) * alpha
+p_threshold = np.power((pie - lam * args.fdr) / (args.fdr - lam * args.fdr),
+                       1 / (alpha - 1))
+print(p_threshold)
+# Calculate the scores
+input_pvalues = pd.read_csv(args.nodes, sep='\t', names=['node', 'pvalue'])
+input_pvalues.loc[:, 'score'] = input_pvalues.pvalue.apply(lambda x:
+                                                           (alpha - 1) *
+                                                           (np.log(x) -
+                                                            np.log(
+                                                                p_threshold)))
+# print(input_pvalues.loc[:, ['node', 'score']])
+input_pvalues.loc[:, ['node', 'score']].to_csv(args.output, sep='\t',
+                                               index=False, header=False)

diff -r 000000000000 -r e41ec5af7472 heinz_scoring.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/heinz_scoring.xml Thu Aug 02 11:57:44 2018 -0400

[

@@ -0,0 +1,90 @@
+<tool id="heinz_scoring" name="Calculate a Heinz score" version="1.0">
+    <description>for each node</description>
+    <requirements>
+        <requirement type="package" version="0.19.2">pandas</requirement>
+        <requirement type="package" version="1.12.0">numpy</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+python '$__tool_directory__/heinz_scoring.py' -n '$node' -f '$FDR' -o '$score'
+#if str( $input_type.input_type_selector ) == "bum_type":
+    -l '$input_type.lambda' -a '$input_type.alpha'
+#else:
+    -m '$input_type.input_bum'
+#end if
+    ]]></command>
+    <inputs>
+        <param type="data" name="node" format="txt" label="A node file with p-values"
+        help="The file should contain two columns: the node identifier (e.g. gene) and the corresponding p-value" />
+        <param type="float" name="FDR" value="0.5" min="0" max="1" label="FDR value"
+        help="False Discovery Rate (FDR), e.g. 0.0001"/>
+        <conditional name="input_type">
+            <param name="input_type_selector" type="select" label="Choose your input type for BUM parameters">
+                <option value="bum_output" selected="True">The output file of BUM model</option>
+                <option value="bum_type">Manually type the parameters of BUM model.</option>
+            </param>
+            <when value="bum_type">
+                <param type="float" name="lambda" value="0.5" min="0" max="1" label="lambda" />
+                <param type="float" name="alpha" value="0.5" min="0" max="1" label="alpha" />
+            </when>
+            <when value="bum_output">
+                <param type="data" name="input_bum" format="txt" label="Output file of BUM model as input:
+                lambda on the first line and alpha, the second" />
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="score" format="txt" label="${tool.name} on ${on_string}: Heinz scores" />
+    </outputs>
+    <tests>
+        
+        <test>
+            <param name="node" value="genes_with_a_p_value.csv" />
+            <param name="FDR" value="0.001" />
+            <param name="input_type_selector" value="bum_type" />
+            <param name="lambda" value="0.546" />
+            <param name="alpha" value="0.453" />
+            <output name="score" file="Heinz_score_using_manual_BUM_params.txt" />
+        </test>
+        
+        <test>
+            <param name="node" value="genes_with_a_p_value.csv" />
+            <param name="FDR" value="0.001" />
+            <param name="input_type_selector" value="bum_output" />
+            <param name="input_bum" value="BUM_output.txt" />
+            <output name="score" file="Heinz_score_using_BUM_output.txt" />
+        </test>
+    </tests>
+    <help><![CDATA[
+This tool is used to calculate a Heinz score for each node (e.g., a gene). This
+score further serves as the input of the tool "Identify optimal scoring subnetwork".
+
+The input "Node file with p-values" should be like this:
+
+========= ===================
+K10970    0.00278208628672861
+--------- -------------------
+K10780    0.0029566591795884
+--------- -------------------
+K01484    0.0157152504694443
+--------- -------------------
+K09055    0.0188894478579773
+========= ===================
+
+
+The first column is "node identifier" (e.g., a gene name); the second column, "p-value";
+The columns are delimited by a tab; no headers are needed.
+
+The section "Choose your input type for BUM parameters" requires two alternative types of input:
+
+1."The output of the BUM model" (The tool "Fit a BUM model"): the first line of the file
+is lambda; the second, alpha.
+
+2."Manually type the parameters of BUM model": lambda and alpha.
+
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btn161</citation>
+        <citation type="doi">10.1093/bioinformatics/btg148</citation>
+    </citations>
+</tool>

diff -r 000000000000 -r e41ec5af7472 test-data/BUM_input.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/BUM_input.txt Thu Aug 02 11:57:44 2018 -0400

b'@@ -0,0 +1,1000 @@\n+0.49481751513521\n+0.562753719976071\n+0.744133012583812\n+0.5820479466754\n+0.277084259090565\n+0.280005959376492\n+0.341604323677893\n+0.659077451445324\n+0.0430231866782772\n+0.710357423493805\n+0.717040204365618\n+0.369196736655045\n+0.283710170519702\n+0.463477113101571\n+0.941146391172019\n+0.413770914841299\n+0.967537969882414\n+0.519166646814472\n+0.427899034185276\n+0.461227232625961\n+0.222858349291007\n+0.734922697988863\n+0.0456725302598508\n+0.995767048547236\n+0.0915756529362226\n+0.00120685411681\n+0.0388888773605078\n+0.617726611493306\n+0.133321251644567\n+0.340636053697019\n+0.177380444230216\n+0.519984527417586\n+0.105684937409975\n+0.313368648950836\n+0.0115096903747378\n+0.570438880100494\n+0.000738816572187191\n+0.115181723593816\n+0.117727355289624\n+0.453773977765532\n+0.599008069698606\n+0.366714610297756\n+0.865928280276381\n+0.400688439878588\n+0.686393883083386\n+0.740246932407673\n+0.0486511268243364\n+0.887662695899543\n+0.40508463598585\n+0.78435569161742\n+0.662970389774761\n+0.928807915375191\n+0.00369375258213678\n+0.298961555472186\n+0.272606894114562\n+0.546820623366012\n+0.706989923705387\n+0.813316396014418\n+0.0282992911886284\n+0.114986698540584\n+0.816452589210325\n+0.274950800290431\n+0.318057869290295\n+0.913670986044854\n+0.47795013386574\n+0.360785944988818\n+0.307989497298263\n+0.61804501524227\n+0.103922698756938\n+0.350667613950193\n+0.176078589945038\n+0.745902041693146\n+0.576994077008291\n+0.170398155426894\n+0.1541649879633\n+0.0330017239563531\n+0.0293608049078487\n+7.21452342190644e-06\n+0.630569403264635\n+0.145024531215092\n+0.436416429611253\n+0.921961370972599\n+0.825540806920698\n+0.0882918508819074\n+0.875958713997509\n+0.0790585354887591\n+0.254240017811673\n+0.433229466752555\n+0.527445597675559\n+0.71518566406593\n+0.140499969468555\n+0.473917903374763\n+0.66322630983507\n+0.768523685569719\n+0.212725261086578\n+0.0047348819340012\n+0.655266034514027\n+0.140758830097346\n+0.772624077145585\n+0.45578365913374\n+0.804811668130559\n+0.581866229472084\n+0.0241337990088429\n+0.67809202044837\n+0.345048005704991\n+0.173998878964601\n+0.628577274816718\n+0.107469341348957\n+0.34572034387723\n+0.764191025138894\n+0.76143595558573\n+0.76350101736562\n+0.989707001273934\n+0.0449899053526661\n+0.246247280173831\n+0.591642206830146\n+0.0896093879631843\n+0.292297806019216\n+0.0631665162037131\n+0.540107781837766\n+0.322265114010064\n+0.597359423273375\n+0.688117527008375\n+0.934689315245155\n+0.52632461594423\n+0.708703724829695\n+0.437356798262351\n+0.275217000320249\n+0.723663876816597\n+0.21816301805726\n+0.0586976705194264\n+0.528343233507166\n+0.348163027237861\n+0.229228887118277\n+0.291292808810915\n+0.489382917859043\n+0.0558177461351703\n+0.959075580583519\n+0.0564280030267507\n+0.00684706763197022\n+0.166539119395623\n+0.0623267647059293\n+0.850165292532509\n+0.0420002036107131\n+0.872891172295804\n+0.65640002552188\n+0.0841154525363908\n+0.0115987570215684\n+0.0866502473058187\n+0.889258645071542\n+0.00606343144327119\n+0.789830938148316\n+0.151573865534411\n+0.848615361230869\n+0.268380019696077\n+0.0562373952671436\n+0.870705565349727\n+0.609301798198624\n+0.688695241719027\n+0.994671933998766\n+0.0258695171412451\n+0.245989125870314\n+0.187081789023063\n+0.976563535973874\n+0.976301290173714\n+0.299717964494201\n+0.330547172633139\n+0.384777166873804\n+0.0868195309689014\n+0.736284879950963\n+0.0147202728959051\n+0.983982519454288\n+0.000130480134148907\n+0.0144245966718382\n+0.240468583450882\n+0.0611488266290817\n+0.00242371536502852\n+0.669111915757483\n+0.312254834231797\n+0.38098047213934\n+0.307995859589542\n+0.662871507852625\n+0.859659541352016\n+0.23695614129232\n+0.683217490884554\n+0.00779918833128529\n+0.426918928224668\n+0.354163996706725\n+0.382836204769902\n+0.870772763366846\n+0.159156204286861\n+0.0525184125956388\n+0.202158066741185\n+0.681455570244015\n+0.256191108370611\n+0.752464432128511\n+0.663168761117561\n+0.867674095882039\n+0.428497046474926\n+0.275366414207152\n+0.357019570660332\n+0.118844869338568\n+0.0184286807600588\n+0.116761817795942\n+0.980424233838723\n+0.466070504913689\n+0.141724443812031\n+0.413926379552881'..b'7213923\n+0.873489350370018\n+0.289251866673951\n+0.0350966467432808\n+0.938637332608205\n+0.0232172571964305\n+4.51644774651476e-07\n+0.0897238418283346\n+0.623809160088457\n+4.91486550187763e-07\n+0.311264551659959\n+0.628376874978171\n+0.854436760773822\n+0.00118806629622962\n+0.0834089925447356\n+0.59521734901398\n+0.00939024663496496\n+0.0175299869385936\n+0.723854408292602\n+0.495821684674521\n+0.0518378377215187\n+0.526081804407129\n+0.519457700997639\n+0.905806501223034\n+0.0254511811747993\n+0.950370893370638\n+0.453939930752726\n+0.351127967455516\n+0.565630489588527\n+0.336226567263163\n+0.745270194175886\n+0.19022564072336\n+0.61322683762087\n+0.659268995185285\n+0.399289237242576\n+0.628469750044814\n+0.920918867578836\n+0.852106756459696\n+0.424135482074893\n+0.0713208064710028\n+0.24871346894129\n+0.0158892288450341\n+0.0183479609923709\n+0.247013895228423\n+0.735143691953843\n+0.896966030011141\n+0.0362273182668588\n+0.603563530653737\n+0.960063741706101\n+0.488603224454184\n+0.316800864451958\n+0.164537251066126\n+0.844477662864338\n+0.187679840805534\n+0.146645678835831\n+0.782156028528406\n+0.852413751820361\n+0.109149522242336\n+0.0666954990923861\n+0.417837947869313\n+0.134458382645643\n+0.00010263814404356\n+0.606332345399193\n+0.220520608869126\n+0.902740791254412\n+0.972769549348979\n+0.786059198400105\n+0.000257919193829843\n+0.099728863201009\n+0.48813400811968\n+0.122115874340003\n+0.691314984298683\n+0.704248690968703\n+0.356039379562619\n+0.236340151893964\n+0.21613241342977\n+0.462814816913001\n+0.147630829826267\n+0.614006317396632\n+0.0276123983701355\n+0.0388067496785611\n+0.798372213326457\n+0.0356607587870191\n+0.746565470671474\n+0.179076610104685\n+0.7067660683508\n+0.327207461495618\n+0.0699681593671749\n+0.0589812855809452\n+0.822298002610464\n+0.0541609182688533\n+0.0014645166208377\n+0.414455507152768\n+0.147278458237102\n+0.855082340806644\n+0.0973647799836709\n+0.000466125250830992\n+0.725913028083264\n+0.418867202232968\n+0.357621934557879\n+0.592145135377933\n+0.874710298556188\n+0.251052174249483\n+0.438160107924926\n+0.445084708981774\n+0.158346045041189\n+0.521346950954502\n+0.681873214968761\n+0.529423633575557\n+0.615822118249103\n+0.838796012373514\n+0.877471162594851\n+0.589874394347028\n+0.494195471524579\n+0.693291252237846\n+0.467819681135473\n+0.81701823358668\n+0.694319999525736\n+0.121962823844595\n+0.151703062046977\n+0.417877280101387\n+0.457444553241744\n+0.400970256713384\n+0.146620637420985\n+0.212734754065417\n+6.46130403127839e-05\n+3.15313299235481e-06\n+0.356138177556596\n+0.910533507420001\n+0.473334155506573\n+0.295526798546519\n+0.215910129059011\n+0.817498688652582\n+0.427745873300515\n+0.589221380726825\n+0.790977061704187\n+0.232250506712447\n+0.0130939697694528\n+0.454884331603569\n+0.0720679033612144\n+0.198986466036537\n+0.371388049910615\n+0.0886681369094951\n+0.943456029196158\n+0.0115962709753868\n+0.169147563289402\n+0.95440970300256\n+0.148186290334766\n+0.79469864959776\n+0.398061188204062\n+0.775885906340456\n+0.534580526179839\n+0.525126819365943\n+0.0636524244086398\n+0.483695842136235\n+0.550013297321542\n+0.803957488923753\n+0.0687732353561399\n+0.0223467066851557\n+0.194705380015316\n+0.561004020062096\n+0.675328722659393\n+0.433019001895956\n+0.705598921589213\n+0.62012802890816\n+0.815613485139828\n+0.633137051349011\n+0.963970390481642\n+0.75867006742631\n+0.822377585232521\n+0.628374113266244\n+0.420657899764137\n+0.569966801742234\n+0.865267703405176\n+0.00337493056633076\n+0.30400350300163\n+0.514564453330559\n+0.150859876840911\n+0.528228602944177\n+0.040236592014929\n+0.140563657143996\n+0.140426371242889\n+0.758223225206527\n+0.184407866286835\n+0.237803827253682\n+0.901539046853114\n+0.333050038387453\n+0.781529363473918\n+0.194190412648925\n+0.476659487150017\n+0.953836884980725\n+0.784592981944965\n+0.583406195272662\n+0.662890565822532\n+0.83318628321259\n+0.709236399906408\n+0.729917970326033\n+0.854039297563908\n+0.224122140131679\n+0.0892458988605694\n+0.965188985280667\n+0.608957726415198\n+0.1545456981837\n+0.00112303024929184\n+0.493677809317058\n+0.320267370941394\n+0.957479463898128\n+0.663938331651806\n+0.421226891355979\n'

diff -r 000000000000 -r e41ec5af7472 test-data/BUM_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/BUM_output.txt Thu Aug 02 11:57:44 2018 -0400

@@ -0,0 +1,2 @@
+0.546281463286217
+0.453175526745403

diff -r 000000000000 -r e41ec5af7472 test-data/EdgesPCST.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/EdgesPCST.txt Thu Aug 02 11:57:44 2018 -0400

@@ -0,0 +1,10 @@
+a b
+b c
+a c
+c d
+b d
+d f
+c e
+f h
+f g
+e f

diff -r 000000000000 -r e41ec5af7472 test-data/Heinz_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Heinz_output.txt Thu Aug 02 11:57:44 2018 -0400

[

@@ -0,0 +1,22 @@
+// Successfully parsed '/export/galaxy-central/database/files/000/dataset_183.dat': contains 8 nodes, 10 edges and 1 component(s)
+
+// Considering component 1/1: contains 8 nodes
+// Identified 4 dependent node pairs
+// Added 4 analyze constraints
+// Solution with weight 36 and 6 nodes found
+graph G {
+ overlap=scale
+ layout=neato
+ 0 [label="a\n5\n0\n0"]
+ 1 [label="b\n-4\n0\n1",shape=box]
+ 3 [label="d\n30\n0\n3"]
+ 5 [label="f\n-15\n0\n5",shape=box]
+ 6 [label="g\n10\n0\n6"]
+ 7 [label="h\n10\n0\n7"]
+ label="Total weight: 36"
+ 0 -- 1
+ 1 -- 3
+ 3 -- 5
+ 5 -- 6
+ 5 -- 7
+}

diff -r 000000000000 -r e41ec5af7472 test-data/Heinz_score_using_BUM_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Heinz_score_using_BUM_output.txt Thu Aug 02 11:57:44 2018 -0400

@@ -0,0 +1,3 @@
+GRHPR -6.060224431226826
+B4GALT6 -7.230950844434514
+FGF17 -7.312812620557818

diff -r 000000000000 -r e41ec5af7472 test-data/Heinz_score_using_manual_BUM_params.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Heinz_score_using_manual_BUM_params.txt Thu Aug 02 11:57:44 2018 -0400

@@ -0,0 +1,3 @@
+GRHPR -6.058859736554457
+B4GALT6 -7.229961944568557
+FGF17 -7.3118499977365765

diff -r 000000000000 -r e41ec5af7472 test-data/NodesPCST.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/NodesPCST.txt Thu Aug 02 11:57:44 2018 -0400

@@ -0,0 +1,8 @@
+a 5
+b -4
+c -5
+d 30
+e -5
+f -15
+g 10
+h 10

diff -r 000000000000 -r e41ec5af7472 test-data/genes_with_a_p_value.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genes_with_a_p_value.csv Thu Aug 02 11:57:44 2018 -0400

@@ -0,0 +1,3 @@
+GRHPR 0.0843889413906522
+B4GALT6 0.717943620714012
+FGF17 0.833884563412679

diff -r 000000000000 -r e41ec5af7472 test-data/heinz_graph.pdf

Binary file test-data/heinz_graph.pdf has changed

diff -r 000000000000 -r e41ec5af7472 visualization.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/visualization.py Thu Aug 02 11:57:44 2018 -0400

[

@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+"""Visualise the output of Heinz.
+
+This script is used to visualize the output of Heinz, which is in the form of
+DOT language:
+    1. Clear the output of Heinz, extract the DOT source code.
+    2. Visualize the DOT source code and save it into file.
+
+The function of this script is rather simple, for more advanced visualization,
+please adopt other solutions mentioned in the paper
+doi: 10.1093/bioinformatics/btv526
+
+This tool is only designed for visualizing the output of Heinz tool.
+"""
+
+# Author: Cico Zhang
+# Date: 2 Aug 2017
+# Version: 0.2
+
+import argparse
+import sys
+
+from graphviz import Source
+
+
+def get_args():
+    """Collect the inputs."""
+    parser = argparse.ArgumentParser(
+        description='Visualise the output of Heinz')
+    parser.add_argument('-i', '--input', required=True, dest='heinz',
+                        metavar='Heinz_output.txt', type=str,
+                        help='Output file of Heinz as the input')
+    parser.add_argument('-o', '--output', required=True, dest='output',
+                        metavar='graph.pdf', type=str,
+                        help='The output file that saves the visualisation')
+    args = parser.parse_args()
+
+    if args.heinz is None:
+        sys.exit('Input file must be designated.')
+
+    return args
+
+
+def main():
+    """Main function."""
+    args = get_args()
+    # Read the whole output file
+    with open(args.heinz) as r:
+        graph_dot = r.readlines()
+
+    # Remove the redundant lines
+    while not graph_dot[0].startswith('graph G {'):
+        graph_dot.pop(0)
+
+    src = Source(''.join(graph_dot))
+    data_pdf = src.pipe('pdf')
+    # Redirect the output (very important)
+    with open(args.output, 'wb') as w:
+        w.write(data_pdf)
+    print('The visualization is saved as PDF!')
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()

diff -r 000000000000 -r e41ec5af7472 visualization.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/visualization.xml Thu Aug 02 11:57:44 2018 -0400

[

@@ -0,0 +1,33 @@
+<tool id="heinz_visualization" name="Visualize" version="0.1.0">
+    <description>the optimal scoring subnetwork</description>
+    <requirements>
+        <requirement type="package" version="2.38.0">graphviz</requirement>
+        <requirement type="package" version="0.4.10">py-graphviz</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+python '$__tool_directory__/visualization.py' -i '$subnetwork' -o $visualization
+    ]]></command>
+    <inputs>
+        <param type="data" name="subnetwork" format="txt" label="Heinz output file"
+        help='Output file of the tool "Identify optimal scoring subnetwork"'/>
+    </inputs>
+    <outputs>
+        <data name="visualization" format="pdf" label="${tool.name} on ${on_string}: Heinz visualization" />
+    </outputs>
+    <tests>
+      <test>
+        <param name="subnetwork" value="Heinz_output.txt" />
+        <output name="visualization" file="heinz_graph.pdf" compare="sim_size" delta="2000" />
+      </test>
+    </tests>
+    <help><![CDATA[
+This tool provides a simple visualisation of the raw output of the Heinz Galaxy tool
+and saves the output as PDF.
+
+Heinz output file: output file of the tool "Identify optimal scoring subnetwork".
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btn161</citation>
+        <citation type="doi">10.1093/bioinformatics/btg148</citation>
+    </citations>
+</tool>