changeset 1:221d7dca03a5 draft

Uploaded v0.0.4, which adds a unit test
author peterjc
date Tue, 16 Apr 2013 13:01:54 -0400
parents b8be455c27d1
children d1aebb0acee7
files test-data/four_human_proteins.clinod-1.3.tabular test-data/four_human_proteins.fasta tools/protein_analysis/clinod.txt tools/protein_analysis/clinod.xml
diffstat 4 files changed, 97 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.clinod-1.3.tabular	Tue Apr 16 13:01:54 2013 -0400
@@ -0,0 +1,4 @@
+#ID	Start	End	NOLS
+sp|Q9NSY1|BMP2K_HUMAN	965	998	SQQQKVKQRSLQKLSSRQRRTKQDMSKSNGKRHH
+sp|Q9NSY1|BMP2K_HUMAN	1000	1035	TPTSTKKTLKPTYRTPERARRHKKVGRRDSQSSNEF
+sp|P06213|INSR_HUMAN	286	307	CQDLHHKCKNSRRQGCHQYVIH
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta	Tue Apr 16 13:01:54 2013 -0400
@@ -0,0 +1,61 @@
+>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+MHPAVFLSLPDLRCSLLLLVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
+SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
+REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
+VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
+CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
+CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
+HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
+>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+MKKFSRMPKSEGGSGGGAAGGGAGGAGAGAGCGSGGSSVGVRVFAVGRHQVTLEESLAEG
+GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
+DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
+LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
+KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
+DPEHRPDIFQVSYFAFKFAKKDCPVSNINNSSIPSALPEPMTASEAAARKSQIKARITDT
+IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
+ILLGQGPPQQPPQQHRVLQQLQQGDWRLQQLHLQHRHPHQQQQQQQQQQQQQQQQQQQQQ
+QQQQQQHHHHHHHHLLQDAYMQQYQHATQQQQMLQQQFLMHSVYQPQPSASQYPTMMPQY
+QQAFFQQQMLAQHQPSQQQASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
+ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTEEELLDREFDLLRSNRLEERASSD
+KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
+QRTGKKTSVQGQVQKGNDESESDFESDPPSPKSSEEEEQDDEEVLQGEQGDFNDDDTEPE
+NLGHRPLLMDSEDEEEEEKHSSDSDYEQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
+QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
+APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
+EITGSQQQKVKQRSLQKLSSRQRRTKQDMSKSNGKRHHGTPTSTKKTLKPTYRTPERARR
+HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
+WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
+SQQSQPVELDPFGAAPFPSKQ
+>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
+QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
+VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
+ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
+GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
+TVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETL
+EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
+RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
+NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
+DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
+RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
+KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAF
+PNTSSTSVPTSPEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
+SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
+SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
+EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
+FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
+AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
+RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
+CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPESEELEME
+FEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
+PS
+>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
+VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
+GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
+EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES
+ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
+YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
--- a/tools/protein_analysis/clinod.txt	Tue Aug 02 06:51:52 2011 -0400
+++ b/tools/protein_analysis/clinod.txt	Tue Apr 16 13:01:54 2013 -0400
@@ -39,6 +39,9 @@
 =======
 
 v0.0.1 - Initial public release
+v0.0.2 - Treat non-zero return codes as errors
+v0.0.3 - Describe output table in help
+v0.0.4 - Added unit test
 
 
 Developers
@@ -47,16 +50,18 @@
 This script and related tools are being developed on the following hg branch:
 http://bitbucket.org/peterjc/galaxy-central/src/tools
 
-For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball use
+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
 the following command from the Galaxy root folder:
 
-$ tar -czf clinod.tar.gz tools/protein_analysis/clinod.xml tools/protein_analysis/clinod.txt
+$ tar -czf clinod.tar.gz tools/protein_analysis/clinod.xml tools/protein_analysis/clinod.txt test-data/four_human_proteins.fasta test-data/four_human_proteins.clinod-1.3.tabular
 
 Check this worked:
 
 $ tar -tzf clinod.tar.gz
 tools/protein_analysis/clinod.xml
 tools/protein_analysis/clinod.txt
+test-data/four_human_proteins.fasta
+test-data/four_human_proteins.clinod-1.3.tabular
 
 
 Licence (MIT/BSD style)
--- a/tools/protein_analysis/clinod.xml	Tue Aug 02 06:51:52 2011 -0400
+++ b/tools/protein_analysis/clinod.xml	Tue Apr 16 13:01:54 2013 -0400
@@ -1,10 +1,15 @@
-<tool id="clinod" name="Nucleolar localization sequence Detector (NoD)" version="0.0.1">
+<tool id="clinod" name="Nucleolar localization sequence Detector (NoD)" version="0.0.4">
     <description>Find nucleolar localization signals (NoLSs) in protein sequences</description>
     <command>
       java -jar /opt/clinod/clinod-1.3.jar -in="$fasta_file" -out="$tabular_file" -t=8 -f=MEDIUM_TAB -nonols -clean_sequence
       ##I want the number of threads to be a Galaxy config option...
-      ##TODO - Make the -clean_sequece argument a parameter?
+      ##TODO - Make the -clean_sequence argument a parameter?
     </command>
+    <stdio>
+        <!-- Assume anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
     <inputs>
         <param name="fasta_file" type="data" format="fasta" label="FASTA file of protein sequences"/> 
     </inputs>
@@ -14,8 +19,14 @@
     <requirements>
         <requirement type="binary">java</requirement>
     </requirements>
+    <tests>
+        <test>
+            <param name="fasta_file" value="four_human_proteins.fasta" ftype="fasta" />
+	    <output name="tabular_file" file="four_human_proteins.clinod-1.3.tabular" ftype="tabular" />
+        </test>
+    </tests>
     <help>
-    
+
 **What it does**
 
 This calls the command line version of the NoD tool from the Barton Group for
@@ -28,14 +39,19 @@
 The input is a FASTA file of protein sequences, and the output is tabular with
 four columns (multiple rows per protein):
 
- * Sequence identifier
- * Start of NoLS
- * End of NoLS
- * NoLS sequence
+====== ===================
+Column Description
+------ -------------------
+     1 Sequence identifier
+     2 Start of NoLS
+     3 End of NoLS
+     4 NoLS sequence
+====== ===================
 
 If a sequence has no predicted NoLS, then there is no line in the output file
 for it.
 
+
 **References**
 
 M. S. Scott, F. M. Boisvert, M. D. McDowall, A. I. Lamond and G. J. Barton.
@@ -45,7 +61,8 @@
 
 M. S. Scott, P. V. Troshin and G. J. Barton.
 NoD: a Nucleolar localization sequence detector for eukaryotic and viral proteins.
-BMC Bioinformatics, in press, 2011.
+BMC Bioinformatics, 12:317, 2011.
+http://dx.doi.org/10.1186/1471-2105-12-317
 
 http://www.compbio.dundee.ac.uk/www-nod/