Repository 'weeder2'
hg clone https://toolshed.g2.bx.psu.edu/repos/pjbriggs/weeder2

Changeset 0:496bc4eff47e (2014-11-19)
Next changeset 1:571cb77ab9e7 (2014-12-09)
Commit message:
Initial version.
added:
README.markdown
test-data/weeder2_matrix.out
test-data/weeder2_motifs.out
test-data/weeder_in.fa
tool_dependencies.xml
weeder2_wrapper.sh
weeder2_wrapper.xml
b
diff -r 000000000000 -r 496bc4eff47e README.markdown
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.markdown Wed Nov 19 07:56:27 2014 -0500
b
@@ -0,0 +1,20 @@
+weeder2
+=======
+
+Galaxy tool for motif discovery using weeder2.
+
+weeder2_wrapper
+---------------
+
+XML and wrapper script for weeder motif discovery package version 2.0.
+
+`weeder2` can be obtained from <http://159.149.160.51/modtools/downloads/weeder2.html>.
+
+To add to Galaxy add the following to tool_conf.xml:
+
+    <tool file="weeder2/weeder2_wrapper.xml" />
+
+### Changes ###
+
+2.0.0.0: initial version
+
b
diff -r 000000000000 -r 496bc4eff47e test-data/weeder2_matrix.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/weeder2_matrix.out Wed Nov 19 07:56:27 2014 -0500
b
@@ -0,0 +1,100 @@
+>MAT1 GTTTCAATTA
+A 0.1739 0.08497 0.1198 0.1538 0.186 0.8184 0.6871 0.09156 0.1002 0.7403
+C 0.06162 0.05256 0.05743 0.07398 0.5592 0.08087 0.0987 0.08757 0.149 0.08036
+G 0.591 0.06033 0.07754 0.07127 0.1033 0.0563 0.08422 0.04673 0.08862 0.06081
+T 0.1735 0.8021 0.7452 0.701 0.1515 0.04446 0.1299 0.7741 0.6622 0.1185
+>MAT2 CATTTTAA
+A 0.216 0.8128 0.1075 0.04678 0.1112 0.1399 0.7346 0.7969
+C 0.5444 0.04193 0.0509 0.03932 0.0429 0.08711 0.1099 0.09147
+G 0.1067 0.03289 0.07156 0.0785 0.0966 0.09202 0.05706 0.0382
+T 0.1328 0.1124 0.77 0.8354 0.7493 0.681 0.09836 0.0734
+>MAT3 GTGAATTA
+A 0.2434 0.0588 0.09038 0.7708 0.7064 0.0692 0.08129 0.7448
+C 0.08343 0.02873 0.1008 0.06039 0.0821 0.07782 0.09324 0.09242
+G 0.5181 0.06202 0.6983 0.08538 0.05523 0.08203 0.1107 0.09517
+T 0.1551 0.8504 0.1106 0.08347 0.1562 0.7709 0.7148 0.0676
+>MAT4 TCAATCAT
+A 0.05132 0.186 0.8183 0.6756 0.1425 0.08545 0.8726 0.1838
+C 0.02852 0.5378 0.07891 0.05957 0.02786 0.6724 0.03639 0.1337
+G 0.0695 0.1008 0.02175 0.09712 0.07326 0.07966 0.0383 0.06208
+T 0.8507 0.1753 0.08099 0.1677 0.7564 0.1625 0.05268 0.6204
+>MAT5 TGTTTAAT
+A 0.1111 0.09112 0.1881 0.1074 0.1057 0.6769 0.7668 0.1993
+C 0.02537 0.03582 0.06521 0.08013 0.08927 0.09377 0.03432 0.08804
+G 0.07262 0.7214 0.06612 0.05896 0.07036 0.1072 0.116 0.09061
+T 0.7909 0.1516 0.6806 0.7536 0.7347 0.1221 0.08288 0.6221
+>MAT6 ATTACT
+A 0.8781 0.0835 0.07038 0.7901 0.03473 0.1075
+C 0.04152 0.05455 0.05837 0.03709 0.7755 0.04605
+G 0.0322 0.01277 0.1088 0.07223 0.04238 0.01572
+T 0.04814 0.8492 0.7624 0.1005 0.1473 0.8308
+>MAT7 TCACAT
+A 0.08233 0.1013 0.9032 0.139 0.8283 0.0876
+C 0.04274 0.7009 0.02867 0.7396 0.0355 0.04803
+G 0.04677 0.1109 0.01307 0.0554 0.0163 0.05845
+T 0.8282 0.08693 0.05509 0.06599 0.1199 0.8059
+>MAT8 TACATT
+A 0.09867 0.7322 0.1194 0.8806 0.101 0.09423
+C 0.1068 0.06715 0.743 0.03825 0.03645 0.02894
+G 0.1046 0.07625 0.0467 0.01049 0.07555 0.07099
+T 0.6899 0.1244 0.09086 0.07061 0.7871 0.8058
+>MAT9 TTGACA
+A 0.1287 0.06713 0.09732 0.8048 0.1278 0.8784
+C 0.07599 0.01615 0.136 0.02754 0.7339 0.02517
+G 0.07764 0.03641 0.6902 0.03821 0.05137 0.01588
+T 0.7176 0.8803 0.0764 0.1295 0.08697 0.0806
+>MAT10 AATAAT
+A 0.821 0.7797 0.1145 0.7009 0.8508 0.1043
+C 0.05975 0.04103 0.05818 0.09164 0.07329 0.07718
+G 0.05042 0.1458 0.06827 0.08449 0.04593 0.05097
+T 0.06887 0.03344 0.759 0.1229 0.02993 0.7675
+>MAT11 ATGACT
+A 0.7923 0.07691 0.08105 0.8363 0.111 0.123
+C 0.07045 0.02479 0.07396 0.02863 0.7081 0.04261
+G 0.06487 0.02256 0.6694 0.03558 0.06369 0.01484
+T 0.07241 0.8757 0.1756 0.0995 0.1172 0.8196
+>MAT12 TTGAAA
+A 0.08972 0.0524 0.2142 0.8734 0.7765 0.8008
+C 0.08565 0.01703 0.08046 0.009563 0.09992 0.05955
+G 0.07526 0.08263 0.6446 0.04169 0.04608 0.03337
+T 0.7494 0.8479 0.06075 0.07533 0.07746 0.1063
+>MAT13 ATTTTA
+A 0.7686 0.09324 0.05315 0.07052 0.1079 0.7442
+C 0.05296 0.05566 0.02564 0.04286 0.09559 0.0825
+G 0.06642 0.0539 0.07598 0.07754 0.08916 0.06427
+T 0.112 0.7972 0.8452 0.8091 0.7073 0.1091
+>MAT14 TAAACA
+A 0.1209 0.7643 0.8407 0.816 0.1373 0.8505
+C 0.06995 0.06399 0.03895 0.04513 0.685 0.05353
+G 0.09693 0.07741 0.05489 0.04938 0.08995 0.02176
+T 0.7122 0.09432 0.06543 0.08946 0.08774 0.07422
+>MAT15 ATGATT
+A 0.7798 0.03895 0.1394 0.8016 0.09996 0.05479
+C 0.04628 0.04674 0.09287 0.04385 0.1069 0.06237
+G 0.07995 0.03341 0.6306 0.0422 0.05873 0.09202
+T 0.09401 0.8809 0.1371 0.1123 0.7344 0.7908
+>MAT16 AGTATT
+A 0.8169 0.03667 0.1017 0.7024 0.1984 0.05968
+C 0.01586 0.0534 0.06469 0.08485 0.01561 0.04079
+G 0.03184 0.739 0.03857 0.072 0.06428 0.04865
+T 0.1354 0.1709 0.7951 0.1407 0.7217 0.8509
+>MAT17 TTGAGT
+A 0.1122 0.08252 0.1535 0.8677 0.167 0.08711
+C 0.05038 0.01835 0.1439 0.01705 0.09193 0.05017
+G 0.1 0.04409 0.647 0.02826 0.5944 0.03802
+T 0.7373 0.855 0.05572 0.08702 0.1467 0.8247
+>MAT18 TAAAAC
+A 0.09976 0.6641 0.8508 0.8037 0.7918 0.1444
+C 0.1021 0.08641 0.05693 0.04963 0.05966 0.5613
+G 0.05432 0.1092 0.04048 0.04923 0.04498 0.06765
+T 0.7438 0.1403 0.05175 0.09743 0.1036 0.2267
+>MAT19 GTGAAT
+A 0.1593 0.04483 0.06949 0.8955 0.7175 0.1105
+C 0.07553 0.01876 0.09003 0.01629 0.08374 0.06563
+G 0.6152 0.0609 0.7443 0.03019 0.08214 0.05749
+T 0.15 0.8755 0.09618 0.05804 0.1166 0.7664
+>MAT20 AATACA
+A 0.8268 0.7796 0.1396 0.6772 0.1349 0.8374
+C 0.05488 0.04317 0.05118 0.06514 0.7076 0.03498
+G 0.04696 0.05148 0.06801 0.08048 0.04932 0.01976
+T 0.0714 0.1257 0.7412 0.1771 0.1081 0.1079
b
diff -r 000000000000 -r 496bc4eff47e test-data/weeder2_motifs.out
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/weeder2_motifs.out Wed Nov 19 07:56:27 2014 -0500
b
b'@@ -0,0 +1,1250 @@\n+COMMAND LINE:\n+\n+weeder2 -f weeder_in.fa -chipseq -sim 0.95 -O MM \n+\n+MOTIFS SUMMARY:\n+\n+1)\tGTTTCAATTA\t(TAATTGAAAC)\t2.38\n+2)\tCATTTTAA\t(TTAAAATG)\t2.082\n+3)\tGTGAATTA\t(TAATTCAC)\t1.969\n+4)\tTCAATCAT\t(ATGATTGA)\t1.944\n+5)\tTGTTTAAT\t(ATTAAACA)\t1.874\n+6)\tATTACT\t(AGTAAT)\t1.823\n+7)\tTCACAT\t(ATGTGA)\t1.8\n+8)\tTACATT\t(AATGTA)\t1.773\n+9)\tTTGACA\t(TGTCAA)\t1.733\n+10)\tAATAAT\t(ATTATT)\t1.714\n+11)\tATGACT\t(AGTCAT)\t1.702\n+12)\tTTGAAA\t(TTTCAA)\t1.693\n+13)\tATTTTA\t(TAAAAT)\t1.691\n+14)\tTAAACA\t(TGTTTA)\t1.685\n+15)\tATGATT\t(AATCAT)\t1.678\n+16)\tAGTATT\t(AATACT)\t1.664\n+17)\tTTGAGT\t(ACTCAA)\t1.574\n+18)\tTAAAAC\t(GTTTTA)\t1.572\n+19)\tGTGAAT\t(ATTCAC)\t1.568\n+20)\tAATACA\t(TGTATT)\t1.564\n+\n+\n+DETAILED RESULTS:\n+\n+1)\tGTTTCAATTA\t(TAATTGAAAC)\t2.38\n+\n+Matrix: MAT1\tGTTTCAATTA\n+A\t0.1739\t0.08497\t0.1198\t0.1538\t0.186\t0.8184\t0.6871\t0.09156\t0.1002\t0.7403\t\n+C\t0.06162\t0.05256\t0.05743\t0.07398\t0.5592\t0.08087\t0.0987\t0.08757\t0.149\t0.08036\t\n+G\t0.591\t0.06033\t0.07754\t0.07127\t0.1033\t0.0563\t0.08422\t0.04673\t0.08862\t0.06081\t\n+T\t0.1735\t0.8021\t0.7452\t0.701\t0.1515\t0.04446\t0.1299\t0.7741\t0.6622\t0.1185\t\n+\n+OCCURRENCES:\n+>chr1:8797248-879744\t35\tGTTTCAATTA\t1\t174\t+\n+>chr1:3467418-346761\t1\tTTTTCAATTA\t0.934855\t187\t-\n+>chr1:5072821-507302\t8\tGTTTGAATTA\t0.928879\t153\t-\n+>chr1:9932599-993279\t45\tGTTTCAATCA\t0.919923\t124\t+\n+>chr1:9013525-901372\t37\tGTTTCACTTA\t0.908195\t51\t+\n+>chr1:9956513-995671\t48\tGTTTCCATTA\t0.88494\t114\t-\n+>chr1:9813405-981360\t44\tTTTTAAATTA\t0.876639\t141\t+\n+>chr1:7768736-776893\t34\tATTTTAATTA\t0.871314\t131\t-\n+>chr1:9956513-995671\t48\tATTTCAATCA\t0.854845\t177\t-\n+>chr1:7444756-744495\t33\tGTTTAAATGA\t0.852292\t99\t+\n+>chr1:7386917-738711\t29\tTTTACAATTA\t0.849485\t45\t+\n+>chr1:7303541-730374\t27\tGTTTTAATGA\t0.8469\t8\t-\n+>chr1:5805347-580554\t10\tGTTCAAATTA\t0.843965\t178\t-\n+>chr1:7768736-776893\t34\tGTATTAATTA\t0.83883\t127\t+\n+>chr1:6588721-658892\t21\tGTTGTAATTA\t0.83815\t138\t+\n+>chr1:7388025-738822\t30\tCTTTCAATCA\t0.83733\t1\t+\n+>chr1:9962925-996312\t49\tGTCTAAATTA\t0.834485\t190\t-\n+>chr1:4562216-456241\t4\tATTTCAACTA\t0.827808\t147\t+\n+>chr1:6878570-687877\t25\tGTTTTGATTA\t0.817499\t91\t-\n+>chr1:6396504-639670\t18\tGTTTCATTTT\t0.816051\t171\t+\n+>chr1:7768736-776893\t34\tGTTTCATCTA\t0.805952\t147\t+\n+>chr1:9460371-946057\t38\tGTATCAATTT\t0.805422\t84\t+\n+>chr1:6721868-672206\t23\tGTTTCAGATA\t0.799444\t28\t-\n+>chr1:6090845-609104\t11\tTTTTTAATCA\t0.79117\t168\t-\n+>chr1:9948240-994844\t47\tGTTTCAACTC\t0.789921\t109\t-\n+>chr1:6266967-626716\t15\tGCTCCAATTA\t0.785236\t145\t-\n+>chr1:5072821-507302\t8\tGTTTAATTCA\t0.774774\t150\t+\n+>chr1:6090845-609104\t11\tATTGTAATTA\t0.773071\t87\t+\n+>chr1:8851545-885174\t36\tTTTTGACTTA\t0.771928\t49\t-\n+>chr1:6878570-687877\t25\tATTACAATCA\t0.769475\t34\t-\n+>chr1:6090845-609104\t11\tGTTAAAATGA\t0.766922\t148\t-\n+>chr1:4662531-466273\t5\tGTTTTACTCA\t0.764511\t34\t-\n+>chr1:4562216-456241\t4\tGTTGAAATCA\t0.763465\t145\t-\n+>chr1:6090845-609104\t11\tATTACAATAA\t0.761859\t85\t-\n+>chr1:9013525-901372\t37\tGTTTAAGTAA\t0.760027\t162\t+\n+>chr1:6205539-620573\t13\tGTTAAAATTT\t0.7594\t156\t+\n+>chr1:7413722-741392\t32\tTTTTCATTGA\t0.758428\t125\t-\n+>chr1:9932599-993279\t45\tGTTTGATTAA\t0.754251\t45\t-\n+>chr1:6588721-658892\t21\tTTATCAATAA\t0.749598\t145\t+\n+>chr1:6205539-620573\t13\tGTATAAATTT\t0.747207\t160\t-\n+>chr1:4774948-477514\t6\tTTTACAATTC\t0.74652\t118\t-\n+>chr1:4833767-483396\t7\tGGTTAAATCA\t0.745976\t183\t+\n+>chr1:6588721-658892\t21\tATTACAACTA\t0.742438\t136\t-\n+>chr1:9948240-994844\t47\tGTTGAAACTA\t0.736427\t111\t+\n+>chr1:4833767-483396\t7\tATTACAAGTA\t0.736066\t103\t-\n+>chr1:9574131-957433\t42\tATTACCATTA\t0.734492\t157\t-\n+>chr1:6721868-672206\t23\tAATTCAGTTA\t0.728969\t72\t+\n+>chr1:4774948-477514\t6\tGATCTAATTA\t0.726685\t11\t-\n+>chr1:6266967-626716\t15\tATTTCAACTC\t0.724842\t175\t+\n+>chr1:4833767-483396\t7\tGTAACGATTA\t0.698174\t62\t-\n+**********\n+\n+2)\tCATTTTAA\t(TTAAAATG)\t2.082\n+\n+Matrix: MAT2\tCATTTTAA\n+A\t0.216\t0.8128\t0.1075\t0.04678\t0.1112\t0.1399\t0.7346\t0.7969\t\n+C\t0.5444\t0.04193\t0.0509\t0.03932\t0.0429\t0.08711\t0.1099\t0.09147\t\n+G\t0.1067\t0.03289\t0.07156\t0.0785\t0.0966\t0.09202\t0.05706\t0.0382\t\n+T\t0.1328\t0.1124\t0.77\t0.8354\t0.7493\t0.681\t0.09836\t0.0734\t\n+\n+OCCURRENCES:\n+>chr1:9813405-981360\t44\tCATTTTAA\t1\t131\t-\n+>chr1:7768736-776893\t34\tCATTTTAA\t1\t134\t-\n+>chr1:7388'..b'386\t130\t-\n+>chr1:6183701-618390\t12\tATGAAT\t0.89386\t71\t+\n+>chr1:5072821-507302\t8\tATGAAT\t0.89386\t127\t+\n+>chr1:5072821-507302\t8\tATGAAT\t0.89386\t87\t-\n+>chr1:5072821-507302\t8\tATGAAT\t0.89386\t21\t+\n+>chr1:9942670-994287\t46\tTTGAAT\t0.891676\t162\t+\n+>chr1:7444756-744495\t33\tTTGAAT\t0.891676\t134\t+\n+>chr1:7413722-741392\t32\tTTGAAT\t0.891676\t123\t-\n+>chr1:6721868-672206\t23\tTTGAAT\t0.891676\t85\t+\n+>chr1:6277750-627795\t16\tTTGAAT\t0.891676\t35\t-\n+>chr1:6277750-627795\t16\tTTGAAT\t0.891676\t25\t+\n+>chr1:6183701-618390\t12\tTTGAAT\t0.891676\t87\t+\n+>chr1:6090845-609104\t11\tTTGAAT\t0.891676\t119\t-\n+>chr1:6090845-609104\t11\tTTGAAT\t0.891676\t115\t+\n+>chr1:5072821-507302\t8\tTTGAAT\t0.891676\t155\t-\n+>chr1:9956513-995671\t48\tGTGATT\t0.860082\t176\t+\n+>chr1:9932599-993279\t45\tGTGATT\t0.860082\t129\t-\n+>chr1:9568576-956877\t41\tGTGATT\t0.860082\t22\t+\n+>chr1:9554705-955490\t40\tGTGATT\t0.860082\t126\t+\n+>chr1:6472202-647240\t20\tGTGATT\t0.860082\t76\t+\n+>chr1:4833767-483396\t7\tGTGATT\t0.860082\t188\t-\n+>chr1:3467418-346761\t1\tGTGATT\t0.860082\t51\t-\n+>chr1:9956513-995671\t48\tGTTAAT\t0.849088\t112\t+\n+>chr1:9932599-993279\t45\tGTTAAT\t0.849088\t44\t+\n+>chr1:9574131-957433\t42\tGTTAAT\t0.849088\t106\t+\n+>chr1:9554705-955490\t40\tGTTAAT\t0.849088\t49\t+\n+>chr1:9013525-901372\t37\tGTTAAT\t0.849088\t173\t+\n+>chr1:6721868-672206\t23\tGTTAAT\t0.849088\t172\t-\n+>chr1:4662531-466273\t5\tGTTAAT\t0.849088\t159\t-\n+>chr1:9942670-994287\t46\tGTCAAT\t0.847655\t91\t-\n+>chr1:7768736-776893\t34\tGTCAAT\t0.847655\t76\t+\n+>chr1:7444756-744495\t33\tGTCAAT\t0.847655\t10\t+\n+>chr1:7209035-720923\t26\tGTCAAT\t0.847655\t123\t+\n+**********\n+\n+20)\tAATACA\t(TGTATT)\t1.564\n+\n+Matrix: MAT20\tAATACA\n+A\t0.8268\t0.7796\t0.1396\t0.6772\t0.1349\t0.8374\t\n+C\t0.05488\t0.04317\t0.05118\t0.06514\t0.7076\t0.03498\t\n+G\t0.04696\t0.05148\t0.06801\t0.08048\t0.04932\t0.01976\t\n+T\t0.0714\t0.1257\t0.7412\t0.1771\t0.1081\t0.1079\t\n+\n+OCCURRENCES:\n+>chr1:9962925-996312\t49\tAATACA\t1\t83\t+\n+>chr1:9956513-995671\t48\tAATACA\t1\t68\t+\n+>chr1:9948240-994844\t47\tAATACA\t1\t84\t-\n+>chr1:9942670-994287\t46\tAATACA\t1\t156\t+\n+>chr1:9574131-957433\t42\tAATACA\t1\t48\t-\n+>chr1:7768736-776893\t34\tAATACA\t1\t126\t-\n+>chr1:5072821-507302\t8\tAATACA\t1\t43\t-\n+>chr1:4024990-402519\t2\tAATACA\t1\t173\t-\n+>chr1:3467418-346761\t1\tAATACA\t1\t114\t-\n+>chr1:3467418-346761\t1\tAATACA\t1\t27\t+\n+>chr1:3467418-346761\t1\tAATACA\t1\t5\t+\n+>chr1:9956513-995671\t48\tAATTCA\t0.883548\t78\t+\n+>chr1:9942670-994287\t46\tAATTCA\t0.883548\t163\t-\n+>chr1:9013525-901372\t37\tAATTCA\t0.883548\t116\t+\n+>chr1:8851545-885174\t36\tAATTCA\t0.883548\t85\t+\n+>chr1:7303541-730374\t27\tAATTCA\t0.883548\t56\t+\n+>chr1:6878570-687877\t25\tAATTCA\t0.883548\t86\t-\n+>chr1:6878570-687877\t25\tAATTCA\t0.883548\t54\t-\n+>chr1:6721868-672206\t23\tAATTCA\t0.883548\t72\t+\n+>chr1:6437830-643803\t19\tAATTCA\t0.883548\t129\t+\n+>chr1:6277750-627795\t16\tAATTCA\t0.883548\t28\t+\n+>chr1:6277750-627795\t16\tAATTCA\t0.883548\t26\t-\n+>chr1:6266967-626716\t15\tAATTCA\t0.883548\t16\t-\n+>chr1:6090845-609104\t11\tAATTCA\t0.883548\t118\t+\n+>chr1:6090845-609104\t11\tAATTCA\t0.883548\t116\t-\n+>chr1:5072821-507302\t8\tAATTCA\t0.883548\t154\t+\n+>chr1:9956513-995671\t48\tAATATA\t0.860396\t155\t-\n+>chr1:9942670-994287\t46\tAATATA\t0.860396\t119\t-\n+>chr1:9813405-981360\t44\tAATATA\t0.860396\t31\t+\n+>chr1:9013525-901372\t37\tAATATA\t0.860396\t78\t+\n+>chr1:7768736-776893\t34\tAATATA\t0.860396\t2\t+\n+>chr1:7413722-741392\t32\tAATATA\t0.860396\t87\t-\n+>chr1:7413722-741392\t32\tAATATA\t0.860396\t85\t+\n+>chr1:6472202-647240\t20\tAATATA\t0.860396\t2\t-\n+>chr1:6360131-636033\t17\tAATATA\t0.860396\t26\t+\n+>chr1:9813405-981360\t44\tATTACA\t0.84772\t147\t+\n+>chr1:6878570-687877\t25\tATTACA\t0.84772\t38\t-\n+>chr1:6719561-671976\t22\tATTACA\t0.84772\t127\t+\n+>chr1:6588721-658892\t21\tATTACA\t0.84772\t140\t-\n+>chr1:6090845-609104\t11\tATTACA\t0.84772\t89\t-\n+>chr1:5805347-580554\t10\tATTACA\t0.84772\t147\t+\n+>chr1:5805347-580554\t10\tATTACA\t0.84772\t125\t+\n+>chr1:4833767-483396\t7\tATTACA\t0.84772\t107\t-\n+>chr1:4774948-477514\t6\tATTACA\t0.84772\t9\t-\n+>chr1:7388801-738900\t31\tAATACT\t0.830134\t75\t+\n+>chr1:7209035-720923\t26\tAATACT\t0.830134\t10\t+\n+>chr1:6262414-626261\t14\tAATACT\t0.830134\t181\t-\n+>chr1:5805347-580554\t10\tAATACT\t0.830134\t152\t-\n+>chr1:4833767-483396\t7\tAATACT\t0.830134\t18\t-\n+>chr1:4402453-440265\t3\tAATACT\t0.830134\t177\t-\n+**********\n+\n'
b
diff -r 000000000000 -r 496bc4eff47e test-data/weeder_in.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/weeder_in.fa Wed Nov 19 07:56:27 2014 -0500
b
b'@@ -0,0 +1,100 @@\n+>chr1:3467418-3467618\n+CCATAATACACCCTTGTATAGCTAATAATACATTACTTTGATCCTGTGCTAATCACATTGATCCTGGGAAAAGGAAACTCACATACCTCTGTGTATGAGGAAGCTCACTGTACTGTATTCTTCAAGGAGTGGAATTTTATCACCACTTCACACCATTTGGAACAGGCAGAGGAATTCTAGGCATTTTAATTGAAAATTCA\n+>chr1:4024990-4025190\n+CTTACTTTGCAGTGATCCCGTTGACACAAACTGTGTAAAGAAATCAGAGTGAGAACAGGTGGTGAATGTGACTCAGAGTGTTAAAACATGTGCACGTAGTTCACTTGCTGTCTTACCTAAAATGTAGCACAGCAGTGTGCCCAAAGGAATTTGGGGGTCTAGTAGATCTACATGTATTTATTGCTTGATTATTTCTCTTG\n+>chr1:4402453-4402653\n+TAAAACATGTCTCTAGTAGTCACAGTGCCCACACTGCTGATGGAGTCACCTTTTCCAGGGAACGGCTGCTGCTCTCGATATCTGGTGGATCTCTGGAAAGACTTGTGCTGATCTCTCTCTGCCCCTTCCTTGATTCACATCTCAAGGGACCGAGAAGGGAGGGAAAACACCAGTCCAGTATTTCCTATCAGTTCAGCGGG\n+>chr1:4562216-4562416\n+TTGCTGAGCCCTGCTGCCCGCACTGCAACGCTGGGCTCTCTCATCACCTTTGCCAACACCTGCCTCTTCTGTGCAGCCTTCCATCAAGTATGACGGGAGCCCTCACCAAGGACGATTCTAGACATATTTAGCAGCAATATTGTTTGATTTCAACTAGACATTGTTCTAAACACAAAGGAGTTCAAGGCTGGGGCTTTGAT\n+>chr1:4662531-4662731\n+GTCAACTTCAGCCTCCTGGGACAACGCACAATGTGAGTAAAACTGCCTTTCATCATAAATCGTTCTTACCCACCCCGAATCCCAGGGAGAACTGACAAACAGCCTGCCTGTGAGAGGAAAGGCAGATGACTCCTTCAACTTCGAGGTAGGAGCAAAACATTAACCCAGTGCAACCACAACAATCTAGGCAGGATGAAGGG\n+>chr1:4774948-4775148\n+CGTAAATATGTAATTAGATCACATATGGCTAAGGAAAACTCCAATTTTTGCTAGGGACACATTCTAATTTCCTAAATTCCTAAAAGATAAGCTCCTGGCAACTTGTCCCCCTCCACAGAATTGTAAAACTGATGTGAAATGATTAATTGCTTCGGTAAAGTTCCTCATGTAGCTGTAAGTTCCCCAGAAATAAAGTAATT\n+>chr1:4833767-4833967\n+TTTTGAGTCAAGATAGAAGTATTATCAAATGTCAAGAGCGGGAGCAGAGTTCTTGGGAGCTTAATCGTTACTTAGATGAGAAACACCTGGATGGATTTAGGGTACTTGTAATGGCCTAGAATAAAATTTGATTGATGGCTAATCTCTGGGGTATAGTCCACTGTTGTACAACCACTAAGTGAGGTTAAATCACTTGCTTC\n+>chr1:5072821-5073021\n+AAAATGAAGTTAGGTAAACAATGAATAACTTTTTCTGAGACATGTATTACTGGTGGGTCTACATATACTTATACCATGGGTCTATGATTCATTTCTCAGTCTGAGTCTTATGCTTAAAATCGGAGTATGAATGCCTGGCCCGCTGAGATGTTTAATTCAAACAAACCCAAACCACTCAAAGGTCATAAACCAAGCTTCTT\n+>chr1:5168670-5168870\n+AAAGTCAGCACTCCTCAGGAACTTGTGAAAATGATTTCCATCTGCTAAAAGAAAACATTTCCCTTGCCTCTGGCAGAATGGACATATGGGATATGCATGGACCCCTCTCAGCAGATCAGAGCCCATGCAGGCCTCCAGGCTCCCACAGTCCCTATTCACAAGTACTTAGTCCTTGCTGCCCTCCCCACCTACTTATCCTC\n+>chr1:5805347-5805547\n+TCTATTTCAGAGAAGGTTAAACAAAAAACAGGTCTGGATATTTTCACAACCACTTGAAGTCAGCAATAATTTCTTAGTCTGACAAGGACCATGAGAAATTCTTTCTATACCTTGTCTGTGTCAAATTACAATGTGAAGTTTCCAGAATTACAGTATTTTATTTCTGAACTTGCAAAGTAATTTGAACAGGTTTTTCCTGG\n+>chr1:6090845-6091045\n+GAGCCATTTTCATATGTACAGTAATGCTTTTCACAAAGTTGCATTATTCTAGATGCGTATGTGTGTGTCTGCAGACTGAACCAATTATTGTAATTATACTCTTGAAAAAACTGCTTGAATTCAATTTAAAGTGGCTTCTGCTAGCTGTCATTTTAACATCCTGCTTATGATTAAAAAGACTTGACACTTGGAATAAACTA\n+>chr1:6183701-6183901\n+GTAATTAGTTATACAAGAACACCTGTCACAGTGGCTGATCCATGACAGGTCTTTGTTGAGTGAACTAACCATGAATCAGTAAGACATTGAATGCAGGGCATGCTGAGTGTCATGGTGTGGACACGTGTCAGACGGTGGCTGACATCCTTAGGAACTGCAATGCCTAAGGGAAGTAGTCATAGGTATAGCTGGTGTTCTCC\n+>chr1:6205539-6205739\n+TAATCCTGTCCCTTCCGTCATGAGTCTTTAAAATGTGATCATTCCATGAACTGGTACATAGGACTGATGCCACGGAACACTCGTGGACTCACTTATTGATAGAGTCCCTGCAAGACATTCAGTTGGGTGCGGATGTTGACAAGTCTATTATGAGCGTTAAAATTTATACATTTTGCTAACCATTTTCTGTAAAGAATTGG\n+>chr1:6262414-6262614\n+CTGAGTCCAAGTTGTGTGTGTGTTGCTGCTGCTGTGCTAGGCACTGGTTACAGCTTCCTTCACACACTGACCGACTGCCAGCCACTCAGAAGTCTGGAGGTTGTCATTCGTTCCTTCAGTAGGTGGCAGTGTTGAGTCAGTTATTGGTTCATCACTTAGATTACTAGTGTTCCGTGAATGAGTATTTTAAAAAGCACACA\n+>chr1:6266967-6267167\n+ATTAAAATCTGATAGTGAATTAtagttacctttctgttggctgttaaaaagccaaggtaacctgtagaagaaagggtggtgtttggcatatggtttcagatgtccatgatgatgatggagcagaggtgacaggtggcagacagctaattggagcagcagcagctgagagttcacatttcaacTCTCTTCCAGGTTCAACC\n+>chr1:6277750-6277950\n+CCCGTCTCCTTTCAGCTTGTCTTATTGAATTCACATTCAAAACTCATTGCCACCATCAAGGAGGAAAAATCTGGAAAACAAAAGTGCTCTGCTATCTATCATTCCAAGACCAGCTTACAGCTGCCAGAAACACCCAGAAAAGCAAGCATCTCAGGAAGCTGAAATCAGTCCTCAGAGCTAATGCCAGGAAAGGGCTCTCT\n+>chr1:6360131-6360331\n+gtttttgtttaaggcagagttttgcaatatagtccagggtaaacttaagctccagtcctcctgcttcagcctccggagtgccaagattactggcatgtgccaccctgactagttatgtgggttctTTCTTAAAGCATTTTTATTAAACATGACAGAGGTTCCTAGCCTGATGCTTTGCTGTACACTTACATAGAAAATGA\n+>chr1:6396504-6396704\n+TAAATAAAGATCAAACAACAAACAAAACTGGGAAGTCAACTGATCACTGCGGGAATTATTTTTAATGTAGGGTCATCTTTCTTCCACACGGTGGCGCCTCGCTGTCAGACTTAAAAACACTGAAACATTGTG'..b'ctaagtgacaggagtcatattcagaagtctttacctatgccagtataatgaagTGTTTTCCATTtgtggtggtttaaatgatgatggcctctataggcttctatgtttgaatactcggATTCTTCCAAAACAGTTTCAAAACTGTACCTGGAAACACTATTTAACTAGTCATA\n+>chr1:7768736-7768936\n+aaatatagtggtggattaccttagggtaaggagctacgtctgttttctagctctcaagctacagttaccttacctgtcaatgttatcagaagtctggggaagaataaattatgatctaagagtcatgtattaattaaaatgacagagtttcatctatggtccattacctaaacagttcccaagtccctgcggtaaccatg\n+>chr1:8797248-8797448\n+catttattgatttatgtaggctgaaccaccttcacatctgtgggatgaagtctactctatcatggtggtgctattttttatgtacttaattccatttcttcataacatctttatgtggtttaagtgtcaacataactatgatttcataaaatgagctgggaaaggtttcttctgtttcaattatgtggaacattattgga\n+>chr1:8851545-8851745\n+gaggtgtagtggcttagaacagctctgctttatcagcacccaattttgtaagtcaaaagtcagacatgggatagatggactcttaattcatagactcaaatgttaaaatagaggGGGAATCAATCATCCATGAAGACAAAAGAATACTGGTGGTCTGTTTGGATTTCAGCCAGAGCAATCTGTTCAAATTTGACAGCAGA\n+>chr1:9013525-9013725\n+AGAAACACCACACACACATTTCATTTTTCTGAAGAAAGAAATGATCTGCTGTTTCACTTACATTCCACACACCAGGGAATATAAAGTCTAAATAGCACTCTAGTCTGCAGTTTAAAATTCATGTGTCACAAGAATAAAATAACAAGTCAGAGTCTCCAGTTGTTTAAGTAAGGTTAATCATAAGTAATCTTTATGACTCT\n+>chr1:9460371-9460571\n+ccaaccaccaagtcaccgggttcaaggcctgtgcttgactgggaaccaggctgtctatgcactgcctcacTGCACCGCAGCTGGTATCAATTtgtcttagtcagagttactattgttatgatgaaacaccatgagcaaagagcaaattggggaggaaatggcttatttagcttacatttccactgttcatcatcaaaaga\n+>chr1:9535189-9535389\n+TTTACCCATAGACACTGTGGTGTAGACGCTCCATTCGGAAGCTACAATGCAGGCACTTCCAAGAGTTTGAGCAGCCCGCGTCCTACTGCACTACCTCTGCCCCACAGCATGCTGGGAAACGTAGTCCCAACCAGGTCCTGAGCTGGTTAGCCAACCCTCAGCGCCAGTCGGGCCAACATCCGGTGACGAATCCAAGTCCC\n+>chr1:9554705-9554905\n+ACACCAGCCCTTTGTGTGCCCCAGGGCTCCAGGTGCTGTGTGGGGAATGTTAATGTCAGAAGCCCGGGACTTGGACCCAAGCCCAGGCTTCAGTTCACAGTATGTACTGTGTGCACACATTGGCAGTGATTCCAGGGGCCTGTATCCCTTCTCCCTTTAGGGAAGGGAATTTCGGCCATGCTGAGACATAGCTCTGGCCT\n+>chr1:9568576-9568776\n+TCTAAGTTGAGCCACGTCACTGTGATTTATTGTCACATGCCACAATAAGCATGTTTTGCTCTCTTGTCCCTTCTGCAGGAAGGGCTGTGTTGAGTTGGGCATTGTTTGTATAGTGCTGGACACAGCTGCAGGGTGTTTTTACGTGTTCAATGAAGTAATTCTGTTCTCAGAAAGCTCAGACAAGTACAGTATCAAAACAG\n+>chr1:9574131-9574331\n+AGGGTAAACAGTTCCATTTGTGACCATGAAATCCCCCATCTGTTGAGTGTATTCATGACTTAAGAGTTCTCGGCAGAGACAGATGAATGAACCTGCCTCCCCGATGTTAATGACATCCATCATACAACTGAACATGTGGCTTCATAAATCAAGGGCTAATGGTAATACTGTCTGCTCGTGTCATGAACAAGCTATCCGAT\n+>chr1:9738400-9738600\n+CTTCTCACTTCTCCACTGCCTTAGCCGTTGCCCCGAACGTAACGGCCACCACCCCACCCCGCACTCACACTCACTCACTCTCGCTCTCTCCCTCAGACACAGACATACACGCCCTCACTGAGACTGCGCAGGCGTAGCTTCTGCTCTGCCCTCTGGGAACCAGAGTCTTCCGGCTCCTCTCTCGCGAAGGAGTGCTAGGC\n+>chr1:9813405-9813605\n+TTTAGTTTGCTAGCAGCTGTCAGGAAGTACAATATAGGTCGAAGGACCCCATGTTCAAATCTCTAATGTGAGGACAGCCTGGGCCCCTCAGGAAAGTGAAAGCGGTGTGTCTTTCCCGTTCTCTGGTTTTTTAAAATGAGTTTTAAATTACAAgcatgtgccgctggagtccaggtgttggcttctctggggttggggtt\n+>chr1:9932599-9932799\n+CGGCAAAAACTTAAGTCATCATTGTGCCAGCTATTAAGGCCCTGTTAATCAAACCTCAAAGAAAAAAAAAAACCACACACACAATTTGTACCTTGTTATTGGCAAATGTAGTCCTGGCAGCTTGTTTCAATCACTCTCCATGACAAGTGTTTAGAAAATATTTGTTCAAGCACCTTCAAATGAGCAGGTTTTGTTGCACC\n+>chr1:9942670-9942870\n+AAGAGAAACATAAATGACCCTAGTCAGAAATGCATGACTTTCCTATATAAAAGCCTTTCACCCTCCGGCAAGATTATTTTTAAACATAATATTGACATTGAGATAATCGTAGCACTTATATATTTTGTCCAATTTAAAATTCCAAATCTGTGGAAAATACATTGAATTGTCAGAAAATAGGGCATTGATCTAGATGAAAC\n+>chr1:9948240-9948440\n+CAATTCTCCAGCTCACAATTTAAAAGCTTCTGAGAGTTAAATGCTATGGCATAGTTTATAATGGAGCACTCGAGTGACTGATGTGTATTTCAGTGAACTGGTCTCTCGGAGTTGAAACTACGACTTGTATAAACAGGAATGGAAATCCTTCTGGGTTTATGTAGTTAGGCAAGAATATTCCCAACTGTGTGCCGCTCCCA\n+>chr1:9956513-9956713\n+TCCCACTCATGACCACTTCACATATACGTAAAGAAATCATGCAAAATGGTTTCTCTTGGCTAATCTGAATACAGACCAATTCACTCAACATACAGGCAATCTAATATGACAGTTAATGGAAACATGGGTTTTCTTCTAACATTCTTGGTGGCTGTATATTCCACTAGATTTAACTGTGATTGAAATTTTACTCTATGGGG\n+>chr1:9962925-9963125\n+TATTTTTTAAACCACCAAGGTTCAAGCACCCCCTAGCATATTTGAAGAAAGGAAACGTCTGTCAGAAGTGCTCGCTGTGACAAATACAGAGGGAAGTGACAATGTGCTGCCTTTGCTTACTTAACACCATACATTATTTTTAACTGAACAGTGAAGGCATTTGTTCAGAACCAGCCTTTACATAAAAAATAATTTAGACC\n+>chr1:9978791-9978991\n+AAATCCTTTCACTCTCCTGCCCCTCGATAAATTACTCAAGGCACCAGACACTTTTCTGGACAGTCTCTGTTTGATAAATGATCATGTCATGCTATCGCTTAGAGGCGCACTGCAAAATTCTGAGTGGCCAATTGTCTTTCCCTGCAGGCTGGTGCCCTGCTCCTGCCTGGGTGTTTGTGGCAGGCGGTCTCAGCTTTAAT\n'
b
diff -r 000000000000 -r 496bc4eff47e tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed Nov 19 07:56:27 2014 -0500
b
@@ -0,0 +1,39 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="weeder" version="2.0">
+    <install version="1.0">
+      <actions>
+ <action type="download_by_url">http://159.149.160.51/modtools/downloads/weeder2.0.tar.gz</action>
+ <action type="shell_command">
+   g++ weeder2.cpp -o weeder2 -O3
+ </action>
+ <!-- Move weeder2 executable -->
+ <action type="move_file">
+   <source>weeder2</source>
+   <destination>$INSTALL_DIR/bin</destination>
+ </action>
+ <!-- Move data files -->
+        <action type="move_directory_files">
+          <source_directory>FreqFiles</source_directory>
+          <destination_directory>$INSTALL_DIR/FreqFiles</destination_directory>
+        </action>
+ <!-- Set environment variables -->
+ <action type="set_environment">
+   <environment_variable name="WEEDER_DIR" action="set_to">$INSTALL_DIR</environment_variable>
+ </action>
+ <action type="set_environment">
+   <environment_variable name="WEEDER_FREQFILES_DIR" action="set_to">$INSTALL_DIR/FreqFiles</environment_variable>
+        </action>
+        <action type="set_environment">
+          <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
+        </action>
+      </actions>
+    </install>
+    <readme>Installs Weeder 2.0
+
+      See http://159.149.160.51/modtools/downloads/weeder2.html
+      and http://159.149.160.51/modtools/
+    </readme>
+  </package>
+</tool_dependency>
+
b
diff -r 000000000000 -r 496bc4eff47e weeder2_wrapper.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/weeder2_wrapper.sh Wed Nov 19 07:56:27 2014 -0500
[
@@ -0,0 +1,62 @@
+#!/bin/sh -e
+#
+# Wrapper script to run weeder2 as a Galaxy tool
+#
+# Usage: weeder_wrapper.sh FASTA_IN SPECIES_CODE MOTIFS_OUT MATRIX_OUT [ ARGS... ]
+#
+# ARGS: one or more arguments to supply directly to weeder2
+#
+# Process command line
+FASTA_IN=$1
+SPECIES_CODE=$2
+MOTIFS_OUT=$3
+MATRIX_OUT=$4
+#
+# Other arguments
+ARGS=""
+while [ ! -z "$5" ] ; do
+    ARGS="$ARGS $5"
+    shift
+done
+#
+# Link to input file
+ln -s $FASTA_IN
+#
+# Link to the FreqFiles directory as weeder2 executable
+# expects it to be the same directory
+freqfiles_dir=$WEEDER_FREQFILES_DIR
+if [ -d $freqfiles_dir ] ; then
+    echo "Linking to FreqFiles directory"
+    ln -s $freqfiles_dir FreqFiles
+else
+    echo "ERROR FreqFiles directory not found" >&2
+    exit 1
+fi
+#
+# Construct names of input and output files
+fasta=`basename $FASTA_IN`
+motifs_out=$fasta.w2
+matrix_out=$fasta.matrix.w2
+#
+# Construct and run weeder command
+# NB weeder logs output to stderr so redirect to stdout
+# to prevent the Galaxy tool reporting failure
+weeder_cmd="weeder2 -f $fasta -O $SPECIES_CODE $ARGS"
+echo "Running $weeder_cmd"
+$weeder_cmd 2>&1
+status=$?
+if [ $status -ne 0 ] ; then
+    echo weeder2 command finished with nonzero exit code $status >&2
+    echo Command was: $weeder_cmd
+    exit $status
+fi
+#
+# Move outputs to final destinations
+if [ -e $motifs_out ] ; then
+    /bin/mv $motifs_out $MOTIFS_OUT
+fi
+if [ -e $matrix_out ] ; then
+    /bin/mv $matrix_out $MATRIX_OUT
+fi
+#
+# Done
b
diff -r 000000000000 -r 496bc4eff47e weeder2_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/weeder2_wrapper.xml Wed Nov 19 07:56:27 2014 -0500
b
b'@@ -0,0 +1,181 @@\n+<tool id="motiffinding_weeder2" name="Weeder2" version="2.0.0">\n+  <description>Motif discovery in sequences from coregulated genes of a single species</description>\n+  <command interpreter="bash">weeder2_wrapper.sh\n+  $sequence_file $species_code\n+  $output_motifs_file $output_matrix_file\n+  $strands\n+  #if $chipseq.use_chipseq\n+     -chipseq -top $chipseq.top\n+  #end if\n+  #if str( $advanced_options.advanced_options_selector ) == "on"\n+     -maxm $advanced_options.n_motifs_report\n+     -b $advanced_options.n_motifs_build\n+     -sim $advanced_options.sim_threshold\n+     -em $advanced_options.em_cycles\n+  #end if\n+</command>\n+  <requirements>\n+    <requirement type="package" version="2.0">weeder</requirement>\n+  </requirements>\n+  <inputs>\n+    <param name="sequence_file" type="data" format="fasta" label="Input sequence" />\n+    <param name="species_code" type="select" label="Species to use for background comparison">\n+      <!-- Hard code options for now\n+\t   See weeder\'s "organisms.txt" for full list\n+      -->\n+      <option value="HS">Homo sapiens (HS)</option>\n+      <option value="MM">Mus musculus (MM)</option>\n+      <option value="DM">Drosophila melanogaster (DM)</option>\n+      <option value="SC">Saccharomyces cerevisiae (SC)</option>\n+      <option value="AT">Arabidopsis thaliana (AT)</option>\n+    </param>\n+    <param name="strands" label="Use both strands of sequence" type="boolean"\n+\t   truevalue="" falsevalue="-ss" checked="True"\n+\t   help="If not checked then use -ss option" />\n+    <conditional name="chipseq">\n+      <param name="use_chipseq" type="boolean"\n+\t     label="Use the ChIP-seq heuristic"\n+\t     help="Speeds up the computation (-chipseq)"\n+\t     truevalue="yes" falsevalue="no" checked="on" />\n+      <when value="yes">\n+\t<param name="top" type="integer" value="100"\n+\t       label="Number of top input sequences with oligos to scan for"\n+\t       help="Increase this value to improve the chance of finding motifs enriched only in a subset of your input sequences (-top)" />\n+      </when>\n+      <when value="no"></when>\n+    </conditional>\n+    <conditional name="advanced_options">\n+      <param name="advanced_options_selector" type="select"\n+\t     label="Display advanced options">\n+\t<option value="off">Hide</option>\n+\t<option value="on">Display</option>\n+      </param>\n+      <when value="on">\n+\t<param name="n_motifs_report" type="integer" value="25"\n+\t       label="Number of discovered motifs to report" help="(-maxm)" />\n+\t<param name="n_motifs_build" type="integer" value="50"\n+\t       label="Number of top scoring motifs to build occurrences matrix profiles and outputs for"\n+\t       help="(-b)" />\n+\t<param name="sim_threshold" type="float" min="0.0" max="1.0" value="0.95"\n+\t       label="Similarity threshold for the redundancy filter"\n+\t       help="Remove motifs that are too similar, with lower values imposing a stricter filter. Must be between 0.0 and 1.0 (-sim)" />\n+\t<param name="em_cycles" type="integer" min="0" max="100" value="1"\n+\t       label="Number of expectation maximization (EM) cycles to perform"\n+\t       help="Number of cycles must be between 0 and 100 (-em)" />\n+      </when>\n+      <when value="off">\n+      </when>\n+    </conditional>\n+  </inputs>\n+  <outputs>\n+    <data name="output_motifs_file" format="txt" label="Weeder2 on ${on_string} (motifs)" />\n+    <data name="output_matrix_file" format="txt" label="Weeder2 on ${on_string} (matrix)" />\n+  </outputs>\n+  <tests>\n+    <test>\n+      <param name="sequence_file" value="weeder_in.fa" ftype="fasta" />\n+      <param name="species_code" value="MM" />\n+      <output name="output_motifs_file" file="weeder2_motifs.out" lines_diff="2" />\n+      <output name="output_matrix_file" file="weeder2_matrix.out" />\n+    </test>\n+  </tests>\n+  <help>\n+\n+.. class:: infomark\n+\n+**What it does**\n+\n+Weeder2 is a program for finding novel motifs (transcription factor binding sites)\n+conserved in a set of regulatory regions of related genes.'..b'gos** are specific sequences found within the input sequences or genomic\n+background.\n+\n+**Input sequence** (in FASTA format) should be short (100-200bp) and be reasonably\n+expected to contain an enriched motif(s).  This is not generally an issue with\n+transcription factor ChIP-seq derived sequences centred on the summit of binding\n+regions that are expected to contain a dominant motif and possibly secondary motifs.\n+\n+There is **no need to mask sequence for repetitive sequence** as factors may\n+legitimately bind repetitive sequence.\n+\n+**Use both strands of sequence** by default, unless there is a specific reason not\n+to do so.\n+\n+**Species to use for background comparison** should match the genome used to\n+generate the **input sequence**. The background genome motif frequencies are\n+generated from within the promoter regions of annotated genes and are shown to be a\n+good background for both promoter and other regulatory regions. \n+\n+**Use the ChIP-seq heuristic** (-chipseq) when there are a large number of\n+input sequences (hundreds or thousands). When -chipseq is used Weeder will use\n+only oligos from the first 100 sequences to build motifs with which it scans\n+all of the input sequences. This speeds up the computational time without too much\n+risk of losing important motifs. Even if not strictly necessary it\'s advisable to\n+order input sequences by their significance, e.g. fold enrichment or Pvalue. For\n+large data sets (-top) should be set to a number equating at least 10 to 20% of\n+input sequences (as recommended by the authors).\n+\n+**Number of discovered motifs to report** (-maxm) limits the number of reported\n+motifs even if there are more than -maxm. **Number of top scoring motifs to build\n+occurrences matrix profiles and outputs for** (-b) changes the number of top\n+scoring motifs of length 6, 8 and 10 for which the occurrence matrix is built.\n+Increasing -b may result in a larger number of reported motifs, but with potentially\n+more of low significance and increases the computational time. If increasing -b does\n+not result in more motifs in your results it means that the additional motifs are\n+filtered out by the redundancy filter or that the maximum number of reported motifs\n+set by -maxm has been reached.\n+\n+**Similarity threshold for the redundancy filter** (-sim) default setting is\n+recommended.\n+\n+**Number of expectation maximization (EM) cycles to perform** (-em) default is\n+recommended.  The option is included to help "clean up" the resulting motif matrices.\n+In this version the number of EM steps can be increased, which can be useful for\n+motifs with highly redundant stretches of sequence.\n+\n+-------------\n+\n+.. class:: infomark\n+\n+**A note on the results**\n+\n+The resulting matrices are the result of scanning (by default both strands) for\n+oligos of length 6, 8 and 8, allowing 1, 2 and 3 substitutions respectively. The\n+matrices within the matrix.w2 file can be input into other tools. The recommended\n+next step is to use **STAMP** (http://www.benoslab.pitt.edu/stamp/), which displays\n+the motifs as logos and identifies matches with libraries of known DNA binding\n+motifs, such as TRANSFAC or JASPAR.\n+\n+-------------\n+\n+.. class:: infomark\n+\n+**Credits**\n+\n+This Galaxy tool has been developed by Peter Briggs and Ian Donaldson within the\n+Bioinformatics Core Facility at the University of Manchester, and runs the Weeder2\n+motif discovery package:\n+\n+ * Zambelli, F., Pesole, G. and Pavesi, G. 2014. Using Weeder, Pscan, and PscanChIP\n+   for the Discovery of Enriched Transcription Factor Binding Site Motifs in\n+   Nucleotide Sequences. Current Protocols in Bioinformatics. 47:2.11:2.11.1\xe2\x80\x932.11.31.\n+ * http://onlinelibrary.wiley.com/doi/10.1002/0471250953.bi0211s47/full\n+\n+This tool is compatible with Weeder 2.0:\n+\n+ * http://159.149.160.51/modtools/downloads/weeder2.html\n+\n+Please kindly acknowledge both this Galaxy tool, the Weeder package and the utility\n+scripts if you use it in your work.\n+  </help>\n+</tool>\n'