changeset 2:5a8e09f115f8 draft

Uploaded v0.0.10, adds unit tests. Includes v0.0.9 which checked error codes.
author peterjc
date Wed, 17 Apr 2013 05:26:26 -0400
parents e607c342312f
children b0b927299aee
files test-data/empty.fasta test-data/empty_effectiveT3.tabular test-data/four_human_proteins.effectiveT3.tabular test-data/four_human_proteins.fasta tools/protein_analysis/effectiveT3.txt tools/protein_analysis/effectiveT3.xml
diffstat 6 files changed, 112 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/empty.fasta	Wed Apr 17 05:26:26 2013 -0400
@@ -0,0 +1,2 @@
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/empty_effectiveT3.tabular	Wed Apr 17 05:26:26 2013 -0400
@@ -0,0 +1,1 @@
+#ID	Description	Score	Effective
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.effectiveT3.tabular	Wed Apr 17 05:26:26 2013 -0400
@@ -0,0 +1,5 @@
+#ID	Description	Score	Effective
+sp|P08100|OPSD_HUMAN	Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1	0.461926109094959	false
+sp|Q9BS26|ERP44_HUMAN	Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1	0.000000100329473	false
+sp|Q9NSY1|BMP2K_HUMAN	BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2	0.000000000000339	false
+sp|P06213|INSR_HUMAN	Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4	0.000000000000000	false
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/four_human_proteins.fasta	Wed Apr 17 05:26:26 2013 -0400
@@ -0,0 +1,61 @@
+>sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
+MHPAVFLSLPDLRCSLLLLVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
+SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
+REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
+VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
+CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
+CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
+HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
+>sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
+MKKFSRMPKSEGGSGGGAAGGGAGGAGAGAGCGSGGSSVGVRVFAVGRHQVTLEESLAEG
+GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
+DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
+LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
+KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
+DPEHRPDIFQVSYFAFKFAKKDCPVSNINNSSIPSALPEPMTASEAAARKSQIKARITDT
+IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
+ILLGQGPPQQPPQQHRVLQQLQQGDWRLQQLHLQHRHPHQQQQQQQQQQQQQQQQQQQQQ
+QQQQQQHHHHHHHHLLQDAYMQQYQHATQQQQMLQQQFLMHSVYQPQPSASQYPTMMPQY
+QQAFFQQQMLAQHQPSQQQASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
+ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTEEELLDREFDLLRSNRLEERASSD
+KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
+QRTGKKTSVQGQVQKGNDESESDFESDPPSPKSSEEEEQDDEEVLQGEQGDFNDDDTEPE
+NLGHRPLLMDSEDEEEEEKHSSDSDYEQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
+QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
+APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
+EITGSQQQKVKQRSLQKLSSRQRRTKQDMSKSNGKRHHGTPTSTKKTLKPTYRTPERARR
+HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
+WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
+SQQSQPVELDPFGAAPFPSKQ
+>sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
+MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
+QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
+VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
+ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
+GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
+TVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETL
+EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
+RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
+NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
+DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
+RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
+KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAF
+PNTSSTSVPTSPEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
+SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
+SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
+EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
+FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
+AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
+RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
+CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPESEELEME
+FEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
+PS
+>sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
+MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
+VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
+GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
+EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES
+ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
+YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
--- a/tools/protein_analysis/effectiveT3.txt	Tue Aug 02 07:06:13 2011 -0400
+++ b/tools/protein_analysis/effectiveT3.txt	Wed Apr 17 05:26:26 2013 -0400
@@ -21,7 +21,7 @@
 
 
 Installation
-===========
+============
 
 You can change the path by editing the definition near the start of the Python
 script effectiveT3.py, but by default it expects the following files to be
@@ -48,6 +48,9 @@
 
 <tool file="protein_analysis/effectiveT3.xml" />
 
+If you wish to run the unit tests, also add this to tools_conf.xml.sample
+and move/copy the test-data files under Galaxy's test-data folder.
+
 That's it.
 
 
@@ -56,6 +59,8 @@
 
 v0.0.7 - Initial public release
 v0.0.8 - Include effectiveT3.loc.sample in Tool Shed
+v0.0.9 - Check the return code for errors in the XML
+v0.0.10- Added unit test
 
 
 Developers
@@ -64,10 +69,11 @@
 This script and related tools are being developed on the following hg branch:
 http://bitbucket.org/peterjc/galaxy-central/src/tools
 
-For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball use
+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
 the following command from the Galaxy root folder:
 
-$ tar -czf effectiveT3.tar.gz tools/protein_analysis/effectiveT3.xml tools/protein_analysis/effectiveT3.py tools/protein_analysis/effectiveT3.txt tool-data/effectiveT3.loc.sample
+$ tar -czf effectiveT3.tar.gz tools/protein_analysis/effectiveT3.xml tools/protein_analysis/effectiveT3.py tools/protein_analysis/effectiveT3.txt tool-data/effectiveT3.loc.sample test-data/four_human_proteins.fasta test-data/four_human_proteins.effectiveT3.tabular test-data/empty.fasta test-data/empty_effectiveT3.tabular
+
 
 Check this worked:
 
@@ -76,6 +82,11 @@
 tools/protein_analysis/effectiveT3.py
 tools/protein_analysis/effectiveT3.txt
 tool-data/effectiveT3.loc.sample
+test-data/four_human_proteins.fasta
+test-data/four_human_proteins.effectiveT3.tabular
+test-data/empty.fasta
+test-data/empty_effectiveT3.tabular
+
 
 Licence (MIT/BSD style)
 =======================
--- a/tools/protein_analysis/effectiveT3.xml	Tue Aug 02 07:06:13 2011 -0400
+++ b/tools/protein_analysis/effectiveT3.xml	Wed Apr 17 05:26:26 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="effectiveT3" name="Effective T3" version="0.0.8">
+<tool id="effectiveT3" name="Effective T3" version="0.0.10">
     <description>Find bacterial effectors in protein sequences</description>
     <command interpreter="python">
 effectiveT3.py $module.fields.path
@@ -8,6 +8,11 @@
   $restrict.type
 #end if
 $fasta_file $tabular_file</command>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
     <inputs>
         <param name="fasta_file" type="data" format="fasta" label="FASTA file of protein sequences"/> 
         <param name="module" type="select" display="radio" label="Classification module">
@@ -33,6 +38,20 @@
     <outputs>
         <data name="tabular_file" format="tabular" label="$module.value_label results" />
     </outputs>
+    <tests>
+        <test>
+            <param name="fasta_file" value="four_human_proteins.fasta" ftype="fasta" />
+            <param name="module" value="animal" />
+            <param name="type" value="selective" />
+            <output name="tabular_file" file="four_human_proteins.effectiveT3.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="fasta_file" value="empty.fasta" ftype="fasta" />
+            <param name="module" value="plant" />
+            <param name="type" value="sensistive" />
+            <output name="tabular_file" file="empty_effectiveT3.tabular" ftype="tabular" />
+        </test>
+    </tests>
     <help>
     
 **What it does**
@@ -41,10 +60,15 @@
 
 The input is a FASTA file of protein sequences, and the output is tabular with four columns (one row per protein):
 
- * Sequence identifier
- * Sequence description (from the FASTA file)
- * Score (between 0 and 1, or negative for an error such as a very short peptide)
- * Predicted effector (true/false)
+====== ==============================================================================
+Column Description
+------ ------------------------------------------------------------------------------
+     1 Sequence identifier
+     2 Sequence description (from the FASTA file)
+     3 Score (between 0 and 1, or negative for an error such as a very short peptide)
+     4 Predicted effector (true/false)
+====== ==============================================================================
+
 
 **References**