Repository 'w4mkmeans'
hg clone https://toolshed.g2.bx.psu.edu/repos/eschen42/w4mkmeans

Changeset 0:6ccbe18131a6 (2017-08-08)
Next changeset 1:02cafb660b72 (2017-08-09)
Commit message:
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
added:
LICENSE
README
test-data/input_dataMatrix.tsv
test-data/input_sampleMetadata.tsv
test-data/input_variableMetadata.tsv
w4m_general_purpose_routines.R
w4mkmeans.xml
w4mkmeans_routines.R
w4mkmeans_wrapper.R
b
diff -r 000000000000 -r 6ccbe18131a6 LICENSE
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE Tue Aug 08 15:30:38 2017 -0400
b
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 Hegeman Lab
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
b
diff -r 000000000000 -r 6ccbe18131a6 README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README Tue Aug 08 15:30:38 2017 -0400
b
@@ -0,0 +1,2 @@
+# w4mkmeans_galaxy_wrapper
+Planemo-based galaxy-tool-wrapper to wrap the stats::kmeans R package for the W4M dataMatrix
b
diff -r 000000000000 -r 6ccbe18131a6 test-data/input_dataMatrix.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_dataMatrix.tsv Tue Aug 08 15:30:38 2017 -0400
b
b'@@ -0,0 +1,50 @@\n+\tY11_1_RA5_01_213\tY2_1_RB1_01_218\tY4_1_RB3_01_220\tY12_1_RB4_01_221\tY1_1_RC1_01_228\tY14_1_RC6_01_234\tY1_2_RD1_01_239\tY14_2_RD2_01_240\tY4_2_RD3_01_241\tY11_2_RD7_01_246\tY2_2_RE4_01_253\tY12_2_RE6_01_255\tY14_3_GA2_01_260\tY2_3_GA4_01_264\tY1_3_GA6_01_266\tY4_3_GA7_01_267\tY12_3_GB1_01_270\tY11_3_GC3_01_283\tY14_4_GC7_01_287\tY11_4_GD8_01_299\tY2_4_GE1_01_300\tY12_4_GE2_01_304\tY1_4_GE3_01_305\tY4_4_GE7_01_309\n+M118T229\t95180.7747000001\t283910.279545455\t172325.198333334\t174004.4176\t163525.775666667\t194233.618999999\t142895.435454546\t201401.5926\t170930.6395\t156553.24775\t306528.603090909\t254326.335272728\t205081.407083333\t276873.723636364\t172071.209999999\t275012.5056\t211128.511\t168167.790333333\t209328.198222222\t207513.570083333\t258813.932083334\t229502.711454545\t149345.9256\t314364.292\n+M144T249\t326771.492625001\t512639.358421052\t353343.595999999\t237878.822999999\t339952.473666668\t283992.739200001\t452071.323999999\t315416.134166667\t375718.035999999\t239514.176\t223116.3613\t325831.656181819\t397814.869333331\t247409.932\t413466.492\t269627.942181817\t234804.7824\t251907.6144\t339409.032\t220480.5408\t397906.873411764\t264368.2988\t318008.1856\t495033.681090908\n+M146T229\t320441.164249999\t651210.312000002\t429015.22876923\t417925.811454544\t423543.081153847\t506829.407999998\t487752.597818182\t514688.158400001\t678459.668923076\t396639.590750001\t687614.607272727\t579226.515333335\t468291.098181819\t569996.119583333\t454626.169846155\t622650.143272727\t478350.217636364\t318360.932923077\t499228.386583333\t403412.987666666\t688496.831769231\t496628.723000001\t429311.235818182\t838101.268454545\n+M162T214\t133545.175\t32892.6798\t104879.959\t17067.3888000001\t120440.1933\t84904.1830000002\t316369.630727272\t96002.5383333335\t96790.2144000002\t116384.262555555\t71055.4319999999\t74141.2103999997\t87130.2437142859\t140641.192222222\t208159.7504\t163545.61125\t60130.1777777777\t89822.8736000002\t42445.1565999999\t71159.8343999999\t48624.3143999998\t40807.9287000001\t133222.676\t131059.152\n+M163T243\t3320290.99999999\t3108104.81549999\t3448471.301\t2649747.70925\t3185779.57349999\t4484308.74208335\t3190403.006\t3461277.36441667\t4739358.17723077\t4988993.7978\t4326959.19833332\t4229307.74861538\t4462765.79775\t3372438.61476924\t2588784.26284211\t3698171.4\t3981525.46875\t5212799.26566667\t3029175.68083334\t4289629.53125\t3360903.98691667\t4298080.61900001\t4601251.09542858\t4758480.73153845\n+M163T227\t81353.0119999997\t277375.471090909\t177928.7208\t209416.268\t336317.096571428\t313326.249599999\t285323.6838\t277747.251000001\t220211.9752\t412879.704545456\t351173.200800001\t268427.1216\t347385.8924\t222852.162727273\t210297.4016\t462226.94\t243806.3265\t458488.170555556\t251712.213600001\t447141.623636363\t314488.332250001\t324182.184700001\t404536.579230769\t192976.228727273\n+M165T338\t724868.499090913\t877319.328000006\t759359.831833331\t705376.317666666\t899233.227846154\t800838.637500001\t1185786.86941666\t511026.113076926\t770710.490692306\t639682.233333337\t880641.546416661\t626728.389999999\t774059.748583339\t900734.519384618\t1045032.61569231\t893720.37375\t746035.217769229\t681933.343461539\t642841.190916669\t761710.223\t716082.415999997\t939827.908000004\t725854.212461543\t896462.216000001\n+M165T317\t1130886.05307692\t1812034.41157895\t1452469.27115385\t1516359.36084616\t1558264.69400001\t1580774.47499999\t2004662.84615385\t986370.671999996\t1315054.05833333\t981890.410666671\t1628334.48492307\t1547053.44276924\t1404010.065\t1901171.5945\t1782603.46833333\t1720287.20733333\t1345033.98538462\t1169690.92984615\t1239343.35369231\t1370985.05716666\t1245982.68483333\t1579861.11230769\t1297590.04075001\t1816625.20433332\n+M165T256\t445597.162363634\t752569.200000003\t778348.672666668\t421562.802000002\t654155.689090909\t752461.291363638\t587120.120363634\t374952.432888887\t766559.366153846\t308674.929090909\t280340.442272728\t638563.938833331\t679017.778181819\t662184.475666663\t725624.464714288\t671300.992250002\t509622.759272728\t312362.860222223\t394442.381818181\t520020.93\t655205.484727275\t437002.535999998\t284057.6\t744655.189923079\n+M166T317\t727736.923058826\t1400721.315\t1119526.94750001\t1131558.16892308\t1288'..b'\t271358.777666667\t503074.115083331\t44970.0660833335\t131609.623090909\t280617.921818184\t773743.02866666\t2334917.91500002\t99518.4412499995\t1016073.89891667\t338413.903833333\t55441.1083333332\t220399.192666668\t188045.522166668\t310482.079249999\t1530415.97333333\t60800.495\t1328460.313\t1203955.012\t778120.18475\t119439.517666666\t84655.04375\t452362.827999998\t1983764.26125001\n+M246T512\t146788.109999999\t407066.762249999\t633426.109\t533463.833615383\t1956593.89342856\t165702.49815\t120181.589833332\t353320.577749999\t642874.945142854\t119179.247615384\t99590.2682500005\t758379.92076923\t52436.3834166666\t147546.816500001\t294954.356923078\t1609162.86685714\t584555.170230768\t170359.68353846\t51917.8174615381\t229662.194400003\t211933.249999999\t60445.8974166666\t634458.219230768\t100737.631500001\n+M246T490\t75256.4203333334\t34590.6509090912\t150163.741636363\t34411.6830909094\t74755.9112727271\t103711.803166667\t45954.4614545453\t35152.0549999998\t24719.0138181817\t71617.0521818181\t11311.7707272727\t63841.0036363632\t27595.3401818182\t31920.2162500003\t98890.9431818186\t135013.109083333\t47742.2570909091\t27862.1563636363\t36125.5670909093\t62861.4490833336\t20261.5554545455\t22206.8672727271\t73136.961545455\t235336.321846153\n+M247T433\t4836161.03384619\t1044492.01435715\t951798.914285713\t1424537.325\t3787406.10666667\t9887497.97830775\t30488070.5421429\t3441749.28\t8499038.62799999\t17388611.7989999\t20526331.7456\t1106400.24907692\t3016821.11076925\t1076241.15692308\t35494666.1914286\t2461932.47007691\t1529940.06230769\t38365027.5439287\t18163699.0110001\t9636879.56399995\t10746013.236\t18833821.2581429\t4966775.00357139\t1639208.92369231\n+M247T452\t5195507.70825003\t369661.240285716\t490699.764615384\t436494.736999998\t7412698.44276928\t12170776.00775\t34440641.0083078\t5016978.85769229\t8803133.89476923\t27844873.7553571\t36488147.0250003\t506057.533076924\t3027637.58846154\t653070.954888888\t18741495.8692308\t552111.216461536\t608777.786\t42140646.1521431\t16718584.3028572\t10782086.7869286\t551145.999000004\t1022681.34276923\t5037210.42115387\t1667076.72061537\n+M248T433\t1132061.80500001\t198989.691727273\t242631.120000001\t385296.71625\t533583.706545452\t1700043.58833333\t7857412.81114289\t725243.771999994\t876190.803333335\t4407848.9363846\t2654510.973\t270592.230999999\t537803.304363637\t294857.264307691\t3564818.11400002\t422974.383666665\t327743.053846153\t4390963.79250004\t3575371.16159998\t1580044.82999999\t1915246.6695\t3058478.66324999\t981666.862307692\t318243.068\n+M251T497\t1063427.48939999\t865129.956142858\t495022.085833334\t488440.844615385\t1129829.22\t729926.57776923\t1940279.044\t1221215.83823077\t736401.295384619\t760975.140166666\t273042.048615386\t561850.154166666\t1121087.55323077\t721610.410153842\t2108837.44992856\t530356.117499997\t934878.418692311\t1242937.12242857\t328026.640384617\t1036373.51330769\t718361.729800002\t536136.268499999\t1419632.77730769\t2391210.31999999\n+M257T1014\t1566972.91999999\t1893127.35857143\t2151729.80857143\t1146142.15333334\t1970881.65571426\t1402985.23499999\t2812863.02500003\t1880269.89428571\t2527579.56705883\t1461081.88615383\t1791326.51\t1478730.04615386\t1823267.80461538\t2234492.50153846\t2529937.4047059\t3148326.66461539\t1868762.54266667\t1421995.43307692\t1517500.28\t1805290.20999998\t1642292.79999997\t1683944.028\t2379338.33999998\t2487090.41230771\n+M261T332\t1930046.88\t1414066.78508333\t3490063.85953848\t1047626.07700001\t4522749.77961537\t1593664.43775\t5263343.87100001\t2824216.94399999\t1630093.88518182\t4023852.75425\t1510231.9848\t1225828.25025001\t2182490.55266666\t1439822.48483333\t8412854.24699993\t3388651.75384613\t991437.89583333\t3719176.7\t1188956.48025\t2188517.84249999\t1310827.23100001\t1540731.44238462\t1604828.60533333\t2495122.53646153\n+M263T323\t1171838.28333332\t1092287.00399999\t1784798.51050001\t230456.097454544\t3162077.20061541\t901831.286555555\t2870354.68692308\t3141344.14099999\t795229.416727274\t6039038.19407691\t3336169.10769232\t134580.004545455\t1114891.535\t919091.57784615\t2457832.58563637\t5292105.64400001\t263116.985833333\t6938595.35746151\t4367916.80500001\t3698151.2689231\t268597.518\t276733.936000001\t1068827.01066667\t9589520.52215389\n'
b
diff -r 000000000000 -r 6ccbe18131a6 test-data/input_sampleMetadata.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_sampleMetadata.tsv Tue Aug 08 15:30:38 2017 -0400
b
@@ -0,0 +1,25 @@
+sampleMetadata class polarity sampleType injectionOrder batch tissue hotelling_pval missing_pval decile_pval PCA_XSCOR.p1 PCA_XSCOR.p2 class_PLSDA_XSCOR.p1 class_PLSDA_XSCOR.p2 class_PLSDA_predictions
+Y11_1_RA5_01_213 y1 positive sample 213 1 2 0.0955561581467602 1 0.0306775551319138 -2.26882060894901 1.94958116765736 -3.05527623038242 2.32594165405491 y1
+Y2_1_RB1_01_218 y2 positive sample 218 1 1 0.090775969547078 1 0.0334308308237932 -5.43790231069006 3.28509884002914 -5.09396184849057 3.13632363820691 y2
+Y4_1_RB3_01_220 y4 positive sample 220 1 1 0.0922380872343134 1 0.0343065627030201 -5.96519534532645 2.76065569212045 -5.61271725241037 2.60836215684335 y4
+Y12_1_RB4_01_221 y2 positive sample 221 1 2 0.0731025791938841 1 0.0335814402688999 -3.90074024447077 2.32567583717618 -4.06427508572245 2.47867307479093 y2
+Y1_1_RC1_01_228 y1 positive sample 228 1 1 0.948646526283138 1 0.0150153992414774 -5.79172889541087 3.18442356006801 -5.71894211121787 3.14075608795096 y1
+Y14_1_RC6_01_234 y4 positive sample 234 1 2 0.961424772615561 1 0.0889762542416943 -3.50543091786719 1.90332246047248 -3.60257250798236 1.95341548985651 y4
+Y1_2_RD1_01_239 y1 positive sample 239 1 1 0.391486624975171 1 0.419632697534464 -10.0290510611276 3.46916578350898 -9.30301404180818 3.22425994348737 y1
+Y14_2_RD2_01_240 y4 positive sample 240 1 2 0.334478686842038 1 0.471265236704114 -0.955577667004931 1.62643379077323 -1.17127923245195 1.73770179646644 y4
+Y4_2_RD3_01_241 y4 positive sample 241 1 1 0.243979127543208 1 0.115904447650611 -6.29053214398527 3.48768497975009 -5.86080882473825 3.19393908077519 y4
+Y11_2_RD7_01_246 y1 positive sample 246 1 2 0.639085015503201 1 0.496291025606805 -0.737703114796199 1.89206669195622 -1.33734677265265 2.19119862732508 y1
+Y2_2_RE4_01_253 y2 positive sample 253 1 1 0.681339414372971 1 0.713644697663014 -4.43122798643441 2.59136016132011 -4.21959323228049 2.4942150403311 y2
+Y12_2_RE6_01_255 y2 positive sample 255 1 2 0.581861317126264 1 0.446040669279691 -3.42333388673909 2.19844489197916 -3.40077262161465 2.21882800112511 y2
+Y14_3_GA2_01_260 y4 positive sample 260 1 2 0.792323381194401 1 0.812319191661791 -2.61403564986014 1.9025507158402 -2.90132077481451 2.05744453719897 y4
+Y2_3_GA4_01_264 y2 positive sample 264 1 1 0.278347988263537 1 0.668405316795454 -6.19672954480257 4.11371745717593 -5.72942704887795 3.94423530839635 y2
+Y1_3_GA6_01_266 y1 positive sample 266 1 1 0.303133108610158 1 0.521065147801524 -5.91283168480956 2.57721868167528 -5.8128281040434 2.56623732055011 y1
+Y4_3_GA7_01_267 y4 positive sample 267 1 1 0.204620420161485 1 0.53551459376182 -5.81862869528986 3.42191037440281 -5.37442797934098 3.22465790741629 y4
+Y12_3_GB1_01_270 y2 positive sample 270 1 2 0.7747649633382 1 0.698966513767803 -3.0854085700971 1.67899209632345 -3.14572560562774 1.75648836353689 y2
+Y11_3_GC3_01_283 y1 positive sample 283 1 2 0.918803505111851 1 0.396638581468035 -1.16946485386388 1.66851916844539 -1.7032576378218 1.94860768310552 y1
+Y14_4_GC7_01_287 y4 positive sample 287 1 2 0.577273975934045 1 0.14919566995266 -1.24666389579168 2.84891525888206 -1.58652468539139 2.90364189714377 y4
+Y11_4_GD8_01_299 y1 positive sample 299 1 2 0.31302025978985 1 0.426766355892969 -2.15936901108787 1.66989335813642 -2.66042240568943 1.95509478589954 y1
+Y2_4_GE1_01_300 y2 positive sample 300 1 1 0.0338929937565918 1 0.419149865807458 -5.76080121045973 3.47845733452933 -5.39212267305567 3.34452446158071 y2
+Y12_4_GE2_01_304 y2 positive sample 304 1 2 0.130905883509031 1 0.59698349195307 -4.15585900988913 3.22702525356271 -4.28382960930699 3.34874792551519 y2
+Y1_4_GE3_01_305 y1 positive sample 305 1 1 0.129479197101219 1 0.618449187175638 -2.9921645121991 3.14523577730793 -2.85639638001866 3.08197032598192 y1
+Y4_4_GE7_01_309 y4 positive sample 309 1 1 0.758837157578886 1 0.339564008612217 -5.95949084754462 3.20317151028856 -5.46675286145534 3.04585537553442 y4
b
diff -r 000000000000 -r 6ccbe18131a6 test-data/input_variableMetadata.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_variableMetadata.tsv Tue Aug 08 15:30:38 2017 -0400
[
b'@@ -0,0 +1,50 @@\n+variableMetadata\tnamecustom\tmz\tmzmin\tmzmax\trt\trtmin\trtmax\tnpeaks\tmy_blank\tpool\ty0\ty1\ty2\ty3\ty4\ty5\ty6\ty7\ty8\ty9\tisotopes\tadduct\tpcgroup\tCV.samp\tCV.pool\tCV.ind\tblank_mean\tblank_sd\tblank_CV\tsample_mean\tsample_sd\tsample_CV\tblankMean_over_sampleMean\tpool_mean\tpool_sd\tpool_CV\tpoolCV_over_sampleCV\tclass_kruskal_fdr\tclass_kruskal_sig\tclass_kruskal_y1.y0_dif\tclass_kruskal_y2.y0_dif\tclass_kruskal_y3.y0_dif\tclass_kruskal_y4.y0_dif\tclass_kruskal_y5.y0_dif\tclass_kruskal_y6.y0_dif\tclass_kruskal_y7.y0_dif\tclass_kruskal_y8.y0_dif\tclass_kruskal_y9.y0_dif\tclass_kruskal_y2.y1_dif\tclass_kruskal_y3.y1_dif\tclass_kruskal_y4.y1_dif\tclass_kruskal_y5.y1_dif\tclass_kruskal_y6.y1_dif\tclass_kruskal_y7.y1_dif\tclass_kruskal_y8.y1_dif\tclass_kruskal_y9.y1_dif\tclass_kruskal_y3.y2_dif\tclass_kruskal_y4.y2_dif\tclass_kruskal_y5.y2_dif\tclass_kruskal_y6.y2_dif\tclass_kruskal_y7.y2_dif\tclass_kruskal_y8.y2_dif\tclass_kruskal_y9.y2_dif\tclass_kruskal_y4.y3_dif\tclass_kruskal_y5.y3_dif\tclass_kruskal_y6.y3_dif\tclass_kruskal_y7.y3_dif\tclass_kruskal_y8.y3_dif\tclass_kruskal_y9.y3_dif\tclass_kruskal_y5.y4_dif\tclass_kruskal_y6.y4_dif\tclass_kruskal_y7.y4_dif\tclass_kruskal_y8.y4_dif\tclass_kruskal_y9.y4_dif\tclass_kruskal_y6.y5_dif\tclass_kruskal_y7.y5_dif\tclass_kruskal_y8.y5_dif\tclass_kruskal_y9.y5_dif\tclass_kruskal_y7.y6_dif\tclass_kruskal_y8.y6_dif\tclass_kruskal_y9.y6_dif\tclass_kruskal_y8.y7_dif\tclass_kruskal_y9.y7_dif\tclass_kruskal_y9.y8_dif\tclass_kruskal_y1.y0_fdr\tclass_kruskal_y2.y0_fdr\tclass_kruskal_y3.y0_fdr\tclass_kruskal_y4.y0_fdr\tclass_kruskal_y5.y0_fdr\tclass_kruskal_y6.y0_fdr\tclass_kruskal_y7.y0_fdr\tclass_kruskal_y8.y0_fdr\tclass_kruskal_y9.y0_fdr\tclass_kruskal_y2.y1_fdr\tclass_kruskal_y3.y1_fdr\tclass_kruskal_y4.y1_fdr\tclass_kruskal_y5.y1_fdr\tclass_kruskal_y6.y1_fdr\tclass_kruskal_y7.y1_fdr\tclass_kruskal_y8.y1_fdr\tclass_kruskal_y9.y1_fdr\tclass_kruskal_y3.y2_fdr\tclass_kruskal_y4.y2_fdr\tclass_kruskal_y5.y2_fdr\tclass_kruskal_y6.y2_fdr\tclass_kruskal_y7.y2_fdr\tclass_kruskal_y8.y2_fdr\tclass_kruskal_y9.y2_fdr\tclass_kruskal_y4.y3_fdr\tclass_kruskal_y5.y3_fdr\tclass_kruskal_y6.y3_fdr\tclass_kruskal_y7.y3_fdr\tclass_kruskal_y8.y3_fdr\tclass_kruskal_y9.y3_fdr\tclass_kruskal_y5.y4_fdr\tclass_kruskal_y6.y4_fdr\tclass_kruskal_y7.y4_fdr\tclass_kruskal_y8.y4_fdr\tclass_kruskal_y9.y4_fdr\tclass_kruskal_y6.y5_fdr\tclass_kruskal_y7.y5_fdr\tclass_kruskal_y8.y5_fdr\tclass_kruskal_y9.y5_fdr\tclass_kruskal_y7.y6_fdr\tclass_kruskal_y8.y6_fdr\tclass_kruskal_y9.y6_fdr\tclass_kruskal_y8.y7_fdr\tclass_kruskal_y9.y7_fdr\tclass_kruskal_y9.y8_fdr\tclass_kruskal_y1.y0_sig\tclass_kruskal_y2.y0_sig\tclass_kruskal_y3.y0_sig\tclass_kruskal_y4.y0_sig\tclass_kruskal_y5.y0_sig\tclass_kruskal_y6.y0_sig\tclass_kruskal_y7.y0_sig\tclass_kruskal_y8.y0_sig\tclass_kruskal_y9.y0_sig\tclass_kruskal_y2.y1_sig\tclass_kruskal_y3.y1_sig\tclass_kruskal_y4.y1_sig\tclass_kruskal_y5.y1_sig\tclass_kruskal_y6.y1_sig\tclass_kruskal_y7.y1_sig\tclass_kruskal_y8.y1_sig\tclass_kruskal_y9.y1_sig\tclass_kruskal_y3.y2_sig\tclass_kruskal_y4.y2_sig\tclass_kruskal_y5.y2_sig\tclass_kruskal_y6.y2_sig\tclass_kruskal_y7.y2_sig\tclass_kruskal_y8.y2_sig\tclass_kruskal_y9.y2_sig\tclass_kruskal_y4.y3_sig\tclass_kruskal_y5.y3_sig\tclass_kruskal_y6.y3_sig\tclass_kruskal_y7.y3_sig\tclass_kruskal_y8.y3_sig\tclass_kruskal_y9.y3_sig\tclass_kruskal_y5.y4_sig\tclass_kruskal_y6.y4_sig\tclass_kruskal_y7.y4_sig\tclass_kruskal_y8.y4_sig\tclass_kruskal_y9.y4_sig\tclass_kruskal_y6.y5_sig\tclass_kruskal_y7.y5_sig\tclass_kruskal_y8.y5_sig\tclass_kruskal_y9.y5_sig\tclass_kruskal_y7.y6_sig\tclass_kruskal_y8.y6_sig\tclass_kruskal_y9.y6_sig\tclass_kruskal_y8.y7_sig\tclass_kruskal_y9.y7_sig\tclass_kruskal_y9.y8_sig\tPCA_XLOAD.p1\tPCA_XLOAD.p2\tclass_PLSDA_XLOAD.p1\tclass_PLSDA_XLOAD.p2\tclass_PLSDA_VIP\tclass_PLSDA_y0.COEFF\tclass_PLSDA_y1.COEFF\tclass_PLSDA_y2.COEFF\tclass_PLSDA_y3.COEFF\tclass_PLSDA_y4.COEFF\tclass_PLSDA_y5.COEFF\tclass_PLSDA_y6.COEFF\tclass_PLSDA_y7.COEFF\tclass_PLSDA_y8.COEFF\tclass_PLSDA_y9.COEFF\n+M118T229\tM118T229.46\t118\t118\t118\t229.455291748047\t228.736724853516\t230.594131469727\t55\t0\t8\t7\t5\t7\t6\t5\t2\t3\t7\t3\t2\t\t\t118\t0.5'..b'0648\t-0.039765504390462\t1.72565015970653\t-0.00477543679721769\t-0.0281135207826401\t-0.0119711230496188\t-0.0122760173542411\t0.0145888092804331\t-0.0294872643686666\t-0.0529647825012226\t-0.00758914454936816\t0.00100245807777303\t0.180847689248479\n+M261T332\tM261T331.57\t261\t261\t261\t331.569473266602\t330.826965332031\t332.320007324219\t46\t0\t9\t0\t6\t5\t5\t5\t5\t7\t0\t0\t4\t[7][M]+\t\t57\t1.04326999694138\t0.221252808937921\t1\t1146683.03090926\t1186248.76846245\t1.03450451126133\t1145403.0063442\t1327072.12823314\t1.1586071634898\t1.0011175320459\t919208.931999451\t1174249.82975397\t1.27745694028425\t1.10257987395527\t4.40385233462652e-09\t1\t3708696.83879167\t1199629.11970834\t1182375.57066666\t2175988.65623076\t1469656.86694872\t622503.69551282\t292955.338295453\t-112722.763935607\t558652.956707152\t-2509067.71908333\t-2526321.26812501\t-1532708.18256091\t-2239039.97184295\t-3086193.14327885\t-3415741.50049621\t-3821419.60272727\t-3150043.88208451\t-17253.5490416749\t976359.536522425\t270027.747240385\t-577125.424195516\t-906673.781412883\t-1312351.88364394\t-640976.163001184\t993613.0855641\t287281.29628206\t-559871.875153841\t-889420.232371208\t-1295098.33460227\t-623722.613959509\t-706331.78928204\t-1553484.96071794\t-1883033.31793531\t-2288711.42016637\t-1617335.69952361\t-847153.171435901\t-1176701.52865327\t-1582379.63088433\t-911003.910241569\t-329548.357217367\t-735226.459448427\t-63850.7388056681\t-405678.10223106\t265697.618411698\t671375.720642759\t0.00139002475692429\t0.218861820635036\t0.193929048539156\t0.00482795212523277\t0.0509098162949605\t1\t1\t1\t1\t1\t1\t1\t1\t1\t0.0191097224190392\t2.03460175168813e-05\t1\t1\t1\t1\t1\t0.887240474852222\t0.0308625758419715\t1\t1\t1\t1\t0.942236742973925\t0.0420005984433424\t1\t1\t1\t0.0713925658780487\t0.000728869250554339\t1\t1\t0.350219990140754\t0.0109198413823081\t1\t1\t0.951550886136829\t1\t1\t1\t1\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t-0.0955479004664773\t0.0474159124863094\t-0.0956108691815066\t0.0479267590250545\t1.1753217493328\t-0.0082863984982916\t0.0640358876530409\t-0.0618603905583846\t-0.00919670186957342\t0.0539591279777716\t0.045494114757647\t-0.0516806945814743\t-0.0130716890731868\t-0.00514638420130288\t-0.0195804521557219\n+M263T323\tM263T323.29\t263\t263\t263\t323.286376953125\t317.427062988281\t324.498107910156\t95\t0\t10\t8\t7\t3\t1\t8\t7\t5\t8\t2\t3\t[8][M]+\t\t25\t1.24549325872202\t0.169688974744568\t1\t4856126.41100817\t6417229.02375924\t1.32147075274076\t3269419.74893288\t4306448.22482372\t1.31719037490653\t1.48531751317437\t2850327.25225239\t3833675.62917697\t1.34499490405795\t1.02110896775524\t1.30827348411226e-07\t1\t-10747041.5308823\t-13490591.7476515\t-13452461.0366136\t-11300186.1489015\t-11332369.7322848\t-12211498.8419752\t-4048990.23979574\t-13581671.4958788\t-12655268.6170606\t-2743550.21676924\t-2705419.50573137\t-553144.618019246\t-585328.201402571\t-1464457.31109296\t6698051.29108652\t-2834629.96499652\t-1908227.08617834\t38130.711037879\t2190405.59875\t2158222.01536667\t1279092.90567629\t9441601.50785576\t-91079.748227274\t835323.13059091\t2152274.88771212\t2120091.3043288\t1240962.19463841\t9403470.79681788\t-129210.459265153\t797192.419553031\t-32183.583383325\t-911312.69307371\t7251195.90910576\t-2281485.34697727\t-1355082.46815909\t-879129.109690385\t7283379.49248909\t-2249301.76359395\t-1322898.88477576\t8162508.60217947\t-1370172.65390356\t-443769.77508538\t-9532681.25608304\t-8606278.37726485\t926402.878818184\t1\t0.00765410902822879\t0.00267847637147507\t0.838782008701031\t0.660394577658549\t0.19497635500955\t1\t0.00012469466674357\t0.627859985085531\t1\t1\t1\t1\t1\t1\t0.182190863683119\t1\t1\t1\t1\t1\t0.0192386196102813\t1\t1\t1\t1\t1\t0.00908419374209192\t1\t1\t1\t1\t1\t0.367815979832692\t1\t1\t1\t0.632284650795659\t1\t0.753705194106446\t1\t1\t0.000950448517736164\t1\t1\t0\t1\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0.10766040080656\t0.100365831043394\t0.108802051106214\t0.0987892780891889\t0.861169551654294\t0.0236849274997751\t0.00167571590625499\t-0.0522278718910634\t0.00931335493080596\t0.0263424077901622\t0.0245391324631854\t-0.0190290011300382\t0.0116801484011658\t-0.0161456190570957\t-0.0135144337547376\n'
b
diff -r 000000000000 -r 6ccbe18131a6 w4m_general_purpose_routines.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/w4m_general_purpose_routines.R Tue Aug 08 15:30:38 2017 -0400
[
b'@@ -0,0 +1,283 @@\n+# prepare.data.matrix - Prepare x.datamatrix for multivariate statistical analaysis (MVA)\n+#   - Motivation:\n+#     - Selection:\n+#       - You may want to exclude several samples from your analysis:\n+#         - If so, set the argument \'exclude.samples\' to a vector of sample names\n+#       - You may want to exclude several features or features from your analysis:\n+#         - If so, set the argument \'exclude.features\' to a vector of feature names\n+#     - Renaming samples:\n+#       - You may want to rename several samples from your analysis:\n+#         - If so, set the argument \'sample.rename.function\' to a function accepting a vector \n+#           of sample names and producing a vector of strings of equivalent length\n+#     - MVA is confounded by missing values.\n+#       - By default, this function imputes missing values as zero.\n+#       - For a different imputation, set the \'data.imputation\' argument to a function\n+#         accepting a single matrix argument and returning  a matrix of the same\n+#         dimensions as the argument.\n+#     - Transformation\n+#       - It may be desirable to transform the intensity data to reduce the range.\n+#       - By default, this function performs an eigth-root transformation:\n+#         - Any root-tranformation has the advantage of never being negative.\n+#         - Calculation of the eight-root is four times faster in my hands than log10.\n+#         - However, it has the disadvantage that calculation of fold-differences \n+#           is not additive as with log-transformation.\n+#           - Rather, you must divide the values and raise to the eighth power.\n+#       - For a different transformation, set the \'data.transformation\' argument\n+#           to a function accepting a single matrix argument.\n+#         - The function should be written to return a matrix of the same dimensions\n+#           as the argument.\n+# arguments:\n+#   - x.matrix - matrix of intensities (or data.frame of sample metadata)\n+#     - one row per sample\n+#     - one column per feature or metadata attribute\n+#   - exclude.samples - vector of labels of matrix rows (samples) to omit from analysis\n+#   - exclude.features - vector of labels of matrix columnss (features) to omit from analysis\n+#   - sample.rename.function - function to be used to rename rows if necessary, or NULL\n+#     - e.g., sample.rename.function = function(x) {\n+#               sub("(.*)_.*","\\\\1", row.names(x))\n+#             }\n+#   - data.imputation - function applied to matrix to impute missing values\n+#     - e.g., data.imputation = function(m) {\n+#               m[is.na(m)] <- min(m, na.rm = TRUE) / 100\n+#               return (m)\n+#             }\n+#   - data.transformation - function applied to matrix cells\n+#     - e.g., data.transformation = function(x) { return( log10(x) ) }\n+#         or, data.transformation = log10\n+# result value:\n+#   transformed, imputed x.datamatrix with renamed rows and with neither excluded values nor features\n+#\n+################################\n+##\n+##  Notes regarding the effectiveness and performance of the data transformation method.\n+##\n+##  The two transformations that I tried (log10 and 8th root) required different imputation methods.\n+##\n+##  For the LCMS resin data set that I was working with, separation in MVA was nearly equivalent for:\n+##    data.imputation <- function(x.matrix) {\n+##      x.matrix[is.na(x.matrix)] <- 0\n+##      return (x.matrix)\n+##    }\n+##    data.transformation <- function(x) {\n+##      sqrt( sqrt( sqrt(x) ) )\n+##    }\n+##  and\n+##    data.imputation <- function(x.matrix) {\n+##      x.matrix[is.na(x.matrix)] <- min(x.matrix, na.rm = TRUE) / 100\n+##      return (x.matrix)\n+##    }\n+##    data.transformation <- function(x) {\n+##      log10(x)\n+##    }\n+##\n+##  Note further that triple application of the square root:\n+##  - may be four times faster than log10:\n+##  - may be three times faster than log2:\n+##\n+##      system.time( junk <- sqrt( sqrt( sqrt(1:100000'..b'  if (nrow(x) == 0) {\n+      print(str(x))\n+      stop("matrix has no rows")\n+    }\n+    if (ncol(x) == 0) {\n+      print(str(x))\n+      stop("matrix has no columns")\n+    }\n+    if ( is.numeric(x) ) {\n+      # exclude any rows with zero variance\n+      row.vars <- MatVar(x, dim = 1)\n+      nonzero.row.vars <- row.vars > 0\n+      nonzero.rows <- row.vars[nonzero.row.vars]\n+      if ( length(rownames(x)) != length(rownames(nonzero.rows)) ) {\n+        row.names <- attr(nonzero.rows,"names")\n+        x <- x[ row.names, , drop = FALSE ]\n+      }\n+      \n+      # exclude any columns with zero variance\n+      column.vars <- MatVar(x, dim = 2)\n+      nonzero.column.vars <- column.vars > 0\n+      nonzero.columns <- column.vars[nonzero.column.vars]\n+      if ( length(colnames(x)) != length(colnames(nonzero.columns)) ) {\n+        column.names <- attr(nonzero.columns,"names")\n+        x <- x[ , column.names, drop = FALSE ]\n+      }\n+    }\n+    return (x)\n+  }\n+\n+  if (is.null(x.matrix)) {\n+    stop("FATAL ERROR - prepare.data.matrix was called with null x.matrix")\n+  }\n+\n+  en$xpre <- x <- x.matrix\n+\n+  # exclude any samples as indicated\n+  if ( !is.null(exclude.features) ) {\n+    my.colnames <- colnames(x)\n+    my.col.diff <- setdiff(my.colnames, exclude.features)\n+    x <- x[ , my.col.diff , drop = FALSE ]\n+  }\n+\n+  # exclude any features as indicated\n+  if ( !is.null(exclude.samples) ) {\n+    my.rownames <- rownames(x)\n+    my.row.diff <- setdiff(my.rownames, exclude.samples)\n+    x <- x[ my.row.diff, , drop = FALSE ]\n+  }\n+\n+  # rename rows if desired\n+  if ( !is.null(sample.rename.function) ) {\n+    renamed <- sample.rename.function(x)\n+    rownames(x) <- renamed\n+  }\n+\n+  # save redacted x.datamatrix to environment\n+  en$redacted.data.matrix <- x\n+\n+  # impute values missing from the x.datamatrix\n+  if ( !is.null(data.imputation) ) {\n+    x <- data.imputation(x)\n+  }\n+\n+  # perform transformation if desired\n+  if ( !is.null(data.transformation) ) {\n+    x <- data.transformation(x)\n+  } else {\n+    x <- x\n+  }\n+\n+  # purge rows and columns that have zero variance\n+  if ( is.numeric(x) ) {\n+    x <- nonzero.var(x)\n+  }\n+\n+  # save imputed, transformed x.datamatrix to environment\n+  en$imputed.transformed.data.matrix <- x\n+\n+  return(x)\n+}\n+\n+\n+##-----------------------------------------------\n+## helper functions for error detection/reporting\n+##-----------------------------------------------\n+\n+# log-printing to stderr\n+log_print <- function(x, ...) { \n+  cat(\n+    format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")\n+  , " "\n+  , c(x, ...)\n+  , "\\n"\n+  , sep=""\n+  , file=stderr()\n+  )\n+}\n+\n+# tryCatchFunc produces a list\n+#   On success of expr(), tryCatchFunc produces\n+#     list(success TRUE, value = expr(), msg = "")\n+#   On failure of expr(), tryCatchFunc produces\n+#     list(success = FALSE, value = NA, msg = "the error message")\n+tryCatchFunc <- function(expr) {\n+  # format error for logging\n+  format_error <- function(e) {\n+    paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ")\n+  }\n+  my_expr <- expr\n+  retval <- NULL\n+  tryCatch(\n+    expr = {\n+      retval <- ( list( success = TRUE, value = my_expr(), msg = "" ) )\n+    }\n+  , error = function(e) {\n+      retval <<- list( success = FALSE, value = NA, msg = format_error(e) )\n+    }\n+  )\n+  return (retval)\n+}\n+\n+# tryCatchProc produces a list\n+#   On success of expr(), tryCatchProc produces\n+#     list(success TRUE, msg = "")\n+#   On failure of expr(), tryCatchProc produces\n+#     list(success = FALSE, msg = "the error message")\n+tryCatchProc <- function(expr) {\n+  # format error for logging\n+  format_error <- function(e) {\n+    paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ")\n+  }\n+  retval <- NULL\n+  tryCatch(\n+    expr = {\n+      expr()\n+      retval <- ( list( success = TRUE, msg = "" ) )\n+    }\n+  , error = function(e) {\n+      retval <<- list( success = FALSE, msg = format_error(e) )\n+    }\n+  )\n+  return (retval)\n+}\n+\n'
b
diff -r 000000000000 -r 6ccbe18131a6 w4mkmeans.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/w4mkmeans.xml Tue Aug 08 15:30:38 2017 -0400
[
b'@@ -0,0 +1,319 @@\n+\xef\xbb\xbf<tool id="w4mkmeans" name="Kmeans_for_W4M" version="0.98.1">\n+  <description>Calculate K-means for dataMatrix features or samples</description>\n+\n+  <requirements>\n+    <requirement type="package" version="3.3.2">r-base</requirement>\n+    <requirement type="package" version="1.1_4">r-batch</requirement>\n+  </requirements>\n+\n+  <stdio>\n+    <exit_code range="1:" level="fatal" />\n+  </stdio>\n+\n+\n+  <command detect_errors="aggressive"><![CDATA[\n+    Rscript $__tool_directory__/w4mkmeans_wrapper.R\n+      tool_directory $__tool_directory__\n+      data_matrix_path \'$dataMatrix_in\'\n+      variable_metadata_path \'$variableMetadata_in\'\n+      sample_metadata_path \'$sampleMetadata_in\'\n+      ksamples \'$ksamples\'\n+      kfeatures \'$kfeatures\'\n+      iter_max \'$iter_max\'\n+      nstart \'$nstart\'\n+      algorithm \'$algorithm\'\n+      scores_out \'$scores_out\'\n+      sampleMetadata_out \'$sampleMetadata_out\'\n+      variableMetadata_out \'$variableMetadata_out\'\n+      slots "\\${GALAXY_SLOTS:-1}"\n+    ; echo exit code $?\n+  ]]></command>\n+\n+  <inputs>\n+    <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: \'.\', missing: NA, mode: numerical, separator: tab" />\n+    <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata columns, separator: tab" />\n+    <param name="variableMetadata_in" label="Variable metadata file" type="data" format="tabular" help="variable x metadata columns, separator: tab" />\n+    <param name="ksamples" label="K value(s) for samples" type="text" value = "0" help="[ksamples] Single K or comma-separated Ks for samples, or 0 for none." />\n+    <param name="kfeatures" label="K value(s) for features" type="text" value = "0" help="[kfeatures] Single K or comma-separated Ks for features (variables), or 0 for none." />\n+    <param name="iter_max" label="Max number of iterations" type="text" value = "10" help="[iter_max] The maximum number of iterations allowed; default 10." />\n+    <param name="nstart" label="Number of random sets" type="text" value = "1" help="[nstart] How many random sets should be chosen; default 1." />\n+    <param name="algorithm" label="Algorithm for clustering" type="select" value = "Hartigan-Wong" help="[algorithm] K-means clustering algorithm, default \'Hartigan-Wong\'; alternatives \'Lloyd\', \'MacQueen\'; \'Forgy\' is a synonym for \'Lloyd\', see references for further info.">\n+      <option value="Forgy">Forgy</option>\n+      <option value="Hartigan-Wong" selected="True">Hartigan-Wong</option>\n+      <option value="Lloyd">Lloyd</option>\n+      <option value="MacQueen">MacQueen</option>\n+    </param>\n+  </inputs>\n+\n+  <outputs>\n+    <data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data>\n+    <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data>\n+    <data name="scores_out" label="${tool.name}_${dataMatrix_in.name}.kmeans" format="tabular" ></data>\n+  </outputs>\n+\n+  <tests>\n+        <test>\n+      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>\n+      <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>\n+      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>\n+      <param name="ksamples" value="3,4"/>\n+      <param name="kfeatures" value="5,6,7"/>\n+      <param name="iter_max" value="10"/>\n+      <param name="nstart" value="1"/>\n+      <param name="algorithm" value="Hartigan-Wong"/>\n+      <output name="scores_out">\n+        <assert_contents>\n+          <has_text     text="proportion" />\n+          <has_text     text="0.87482" />\n+          <has_text     text="0.89248" />\n+          <has_text     text="0.95355" />\n+          <has_text     text="0.95673" />\n+          <has_text     text="0.95963" />\n+        </assert_contents>\n+      </output>\n+    </test>\n+  </tests>\n+\n+  <help>\n+    <![CDATA[\n+\n+**Author** - Arthur Eschenlaue'..b'+-------------------+-------------------------------------------------------------------------------------------------------------------+\n+| Feature metadata  | https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/input_variableMetadata.tsv |\n++-------------------+-------------------------------------------------------------------------------------------------------------------+\n+\n+**Other input parameters**\n+\n++-----------------+---------------+\n+| Input Parameter | Value         |\n++=================+===============+\n+| ksamples        | 3,4           |\n++-----------------+---------------+\n+| kfeatures       | 5,6,7         |\n++-----------------+---------------+\n+| iter_max        | 10            |\n++-----------------+---------------+\n+| nstart          | 1             |\n++-----------------+---------------+\n+| algorithm       | Hartigan-Wong |\n++-----------------+---------------+\n+\n+----\n+NEWS\n+----\n+\n+August 2017, Version 0.98.1 - First release\n+\n+---------\n+Citations\n+---------\n+\n+    ]]>\n+  </help>\n+  <citations>\n+    <citation type="bibtex"><![CDATA[\n+@incollection{RCoreTeam2017,\n+  title = {stats::kmeans - K-Means Clustering},\n+  booktitle = {R: A Language and Environment for Statistical Computing},\n+  author = {{R Core Team}},\n+  publisher = {R Foundation for Statistical Computing},\n+  address = {Vienna, Austria},\n+  year = {2017},\n+  url = {https://stat.ethz.ch/R-manual/R-devel/library/stats/html/kmeans.html},\n+}\n+    ]]></citation>\n+    <!-- Forgy algorithm -->\n+    <citation type="bibtex"><![CDATA[\n+@article{forgy65,\n+  added-at = {2006-03-23T12:22:43.000+0100},\n+  author = {Forgy, E.},\n+  biburl = {https://www.bibsonomy.org/bibtex/21e31409932ce91df646c4731350e1207/hotho},\n+  interhash = {c86383cba8cfe00d5e6ef200016aca3f},\n+  intrahash = {1e31409932ce91df646c4731350e1207},\n+  journal = {Biometrics},\n+  keywords = {clustering kmeans},\n+  number = 3,\n+  pages = {768-769},\n+  timestamp = {2006-03-23T12:22:43.000+0100},\n+  title = {Cluster Analysis of Multivariate Data: Efficiency versus Interpretability of Classification},\n+  volume = 21,\n+  year = 1965\n+}\n+    ]]></citation>\n+    <!-- W4M 3.0 - Guitton et al. 2017-->\n+    <citation type="doi">10.1016/j.biocel.2017.07.002</citation>\n+    <!-- W4M 2.5 - Giacomini et al. 2014 -->\n+    <citation type="doi">10.1093/bioinformatics/btu813</citation>\n+    <!-- Hartigan and Wong algorithm -->\n+    <citation type="bibtex"><![CDATA[\n+@article{Hartigan79,\n+  added-at = {2007-02-27T16:22:09.000+0100},\n+  author = {Hartigan, J. and Wong, M.},\n+  biburl = {https://www.bibsonomy.org/bibtex/23d8bfc440c5725783876929c022f67ce/pierpaolo.pk81},\n+  description = {WSD},\n+  interhash = {10d6d33920d9af578a4d0a556dc1477d},\n+  intrahash = {3d8bfc440c5725783876929c022f67ce},\n+  journal = {Applied Statistics},\n+  keywords = {imported},\n+  pages = {100-108},\n+  timestamp = {2007-02-27T16:22:11.000+0100},\n+  title = {Algorithm AS136: A k-means clustering algorithm},\n+  volume = 28,\n+  year = 1979\n+}\n+    ]]></citation>\n+    <!-- Lloyd algorithm -->\n+    <citation type="doi">10.1109/TIT.1982.1056489</citation>\n+    <!-- MacQueen algorithm -->\n+    <citation type="bibtex"><![CDATA[\n+@inproceedings{MacQueen1967,\n+  added-at = {2011-01-11T13:35:01.000+0100},\n+  author = {MacQueen, J. B.},\n+  biburl = {https://www.bibsonomy.org/bibtex/25dcdb8cd9fba78e0e791af619d61d66d/enitsirhc},\n+  booktitle = {Proc. of the fifth Berkeley Symposium on Mathematical Statistics and Probability},\n+  editor = {Cam, L. M. Le and Neyman, J.},\n+  interhash = {8d7d4dfe7d3a06b8c9c3c2bb7aa91e28},\n+  intrahash = {5dcdb8cd9fba78e0e791af619d61d66d},\n+  keywords = {kmeans clustering},\n+  pages = {281-297},\n+  publisher = {University of California Press},\n+  timestamp = {2011-01-11T13:35:01.000+0100},\n+  title = {Some Methods for Classification and Analysis of MultiVariate Observations},\n+  volume = 1,\n+  year = 1967\n+}\n+    ]]></citation>\n+  </citations>\n+  <!--\n+     vim:et:sw=2:ts=2:\n+--> </tool>\n'
b
diff -r 000000000000 -r 6ccbe18131a6 w4mkmeans_routines.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/w4mkmeans_routines.R Tue Aug 08 15:30:38 2017 -0400
[
b'@@ -0,0 +1,216 @@\n+##------------------------------------------------------------------------------------------------------\n+## these are the batch-independent and file-structure-independent routines to support the w4mkmeans tool\n+##------------------------------------------------------------------------------------------------------\n+\n+library(parallel)\n+\n+w4kmeans_usage <- function() {\n+  return ( \n+    c(\n+     "w4mkmeans: bad input.",\n+     "# contract:",\n+     "    required - caller will provide an environment comprising:",\n+     "      log_print        - a logging function with the signature function(x, ...) expecting strings as x and ...",\n+     "      variableMetadata - the corresponding W4M data.frame having feature metadata",\n+     "      sampleMetdata    - the corresponding W4M data.frame having sample metadata",\n+     "      dataMatrix       - the corresponding W4M matrix",\n+     "      slots            - the number of parallel slots for calculating kmeans",\n+     "    optional - environment may comprise:",\n+     "      kfeatures        - an array of integers, the k\'s to apply for clustering by feature (default, empty array)",\n+     "      ksamples         - an array of integers, the k\'s to apply for clustering by sample (default, empty array)",\n+     "      iter.max         - the maximum number of iterations when calculating a cluster (default = 10)",\n+     "      nstart           - how many random sets of centers should be chosen (default = 1)",\n+     "      algorithm        - string from c(\'Hartigan-Wong\', \'Lloyd\', \'Forgy\', \'MacQueen\') (default = Hartigan-Wong)",\n+     "      ",\n+     "    this routine will return a list comprising:",\n+     "      variableMetadata - the input variableMetadata data.frame with updates, if any",\n+     "      sampleMetadata   - the input sampleMetadata data.frame with updates, if any",\n+     "      scores           - an array of strings, each representing a line of a tsv having the following header:",\n+     "                           clusterOn TAB k TAB totalSS TAB betweenSS TAB proportion"\n+    )\n+  )\n+}\n+\n+w4mkmeans <- function(env) {\n+  # abort if \'env\' is null or is not an environment\n+  if ( is.null(env) || ! is.environment(env) ) {\n+    lapply(w4kmeans_usage(),print)\n+  } \n+  # supply default arguments\n+  if ( ! exists("iter.max" , env) ) env$iter.max  <- 10\n+  if ( ! exists("nstart"   , env) ) env$nstart    <- 1\n+  if ( ! exists("algorithm", env) ) env$algorithm <- \'Hartigan-Wong\'\n+  if ( ! exists("ksamples" , env) ) env$ksamples  <- c()\n+  if ( ! exists("kfeatures", env) ) env$kfeatures <- c()\n+  # check mandatory arguments\n+  expected <- c(\n+    "log_print"\n+  , "variableMetadata"\n+  , "sampleMetadata"\n+  , "dataMatrix"\n+  , "slots"\n+  )\n+  missing_from_env <- setdiff(expected, (ls(env)))\n+  if ( length(missing_from_env) > 0 ) {\n+    print(paste(c(\'expected environment members not found: \', as.character(missing_from_env)), collapse = ", "))\n+    lapply(w4kmeans_usage(),print)\n+    stop("w4mkmeans: contract has been broken")\n+  } \n+  # extract parameters from \'env\'\n+  failure_action  <- env$log_print\n+  scores          <- c( "clusterOn\\tk\\ttotalSS\\tbetweenSS\\tproportion" )\n+  sampleMetadata  <- env$sampleMetadata\n+  featureMetadata <- env$variableMetadata\n+  ksamples        <- as.numeric(env$ksamples)\n+  kfeatures       <- as.numeric(env$kfeatures)\n+  slots           <- env$slots\n+\n+  myLapply <- parLapply\n+  # uncomment the next line to mimic parLapply, but without parallelization (for testing/experimentation)\n+  # myLapply <- function(cl, ...) lapply(...)\n+  cl <- NULL\n+  if ( identical(myLapply, parLapply) ) {\n+    failure_action(sprintf("w4mkmeans: using parallel evaluation with %d slots", slots))\n+    failure_action(names(cl))\n+    cl <- makePSOCKcluster(names = slots)\n+    # from ?makePSOCKcluster: "It is good practice to shut down the workers by calling stopCluster."\n+    clusterExport(\n+      cl = cl\n+    , varlist = c(\n+        "tryCatchFunc"\n+      , "calc_kmeans'..b'st[[i]]\n+          if (result$success) {\n+            sampleMetadata[sprintf("k%d",ksamples[i])] <- result$value$clusters\n+            scores <- c(scores, result$value$scores)\n+          }\n+        }\n+      }\n+\n+      # for each $i in kfeatures, append column \'k$i\' to data frame featureMetadata\n+      kfeatures_length <- length(kfeatures)\n+      if ( kfeatures_length > 0 ) {\n+        feat_result_list <- myLapply( \n+            cl = cl\n+          , kfeatures\n+          , calc_kmeans_one_dimension_one_k\n+          , env = env\n+          , dimension = "features"\n+          )\n+        for ( i in 1:kfeatures_length ) {\n+          result <- feat_result_list[[i]]\n+          if (result$success) {\n+            featureMetadata[sprintf("k%d",kfeatures[i])] <- result$value$clusters\n+            scores <- c(scores, result$value$scores)\n+          }\n+        }\n+      }\n+\n+      return ( \n+        list(\n+          variableMetadata = featureMetadata\n+        , sampleMetadata   = sampleMetadata  \n+        , scores           = scores          \n+        )\n+      )\n+    }\n+  , finally = final(cl)\n+  )\n+}\n+\n+# calculate k-means for features or samples\n+#   - recall that the dataMatrix has features in rows and samples in columns\n+# return value:\n+#   list(clusters = km$cluster, scores = scores) \n+# arguments:\n+#   env:\n+#     environment having dataMatrix\n+#   dimension:\n+#   - "samples":  produce clusters column to add to the sampleMetadata table\n+#     - this is the default case\n+#   - "variables":  produce clusters column to add to the variableMetadata table\n+#   k:\n+#     integer, the number of clusters to make\n+calc_kmeans_one_dimension_one_k <- function(k, env, dimension = "samples") {\n+  # abort if environment is not as expected\n+  if ( is.null(env) || ! is.environment(env) ) {\n+    stop("calc_kmeans_one_dimension_one_k - argument \'env\' is not an environment")\n+  } \n+  if ( ! exists("log_print", env) || ! is.function(env$log_print) ) {\n+    stop("calc_kmeans_one_dimension_one_k - argument \'env\' - environment does not include log_print or it is not a function")\n+  } \n+  # abort if k is not as expected\n+  if ( ! is.numeric(k) ) {\n+    stop(sprintf("calc_kmeans_one_dimension_one_k - expected numeric argument \'k\' but type is %s", typeof(k)))\n+  } \n+  k <- as.integer(k)\n+  # abort if dimension is not as expected\n+  if (   ! is.character(dimension) \n+      || ! Reduce( f =`|`, x = sapply(X = c("features","samples"), FUN = `==`, dimension), init = FALSE) ) {\n+    stop("calc_kmeans_one_dimension_one_k - argument \'dimension\' is neither \'features\' nor \'samples\'")\n+  } \n+  dm           <- env$dataMatrix\n+  iter.max     <- env$iter.max\n+  nstart       <- env$nstart\n+  algorithm    <- env$algorithm\n+  dim_features <- dimension == "features"\n+  # tryCatchFunc produces a list\n+  #   On success of expr(), tryCatchFunc produces\n+  #     list(success TRUE, value = expr(), msg = "")\n+  #   On failure of expr(), tryCatchFunc produces\n+  #     list(success = FALSE, value = NA, msg = "the error message")\n+  result_list <- tryCatchFunc( expr = function() {\n+    # kmeans clusters the rows; features are the columns of args_env$dataMatrix; samples, the rows\n+    # - to calculate sample-clusters, no transposition is needed because samples are rows\n+    # - to calculate feature-clusters, transposition is needed so that features will be the rows\n+    if ( ! dim_features ) dm <- t(dm)\n+    dm <- prepare.data.matrix( x.matrix = dm, data.transformation = function(x) { x } )\n+    # need to set.seed to get reproducible results from kmeans\n+    set.seed(4567)\n+    # do the k-means clustering\n+    km <- kmeans( x = dm, centers = k, iter.max, nstart = nstart, algorithm = algorithm )\n+    scores <-\n+      sprintf("%s\\t%d\\t%0.5e\\t%0.5e\\t%0.5f"\n+             , dimension\n+             , k\n+             , km$totss\n+             , km$betweenss\n+             , km$betweenss/km$totss\n+             )\n+    list(clusters = km$cluster, scores = scores)\n+  })\n+  return ( result_list )\n+}\n+\n'
b
diff -r 000000000000 -r 6ccbe18131a6 w4mkmeans_wrapper.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/w4mkmeans_wrapper.R Tue Aug 08 15:30:38 2017 -0400
[
b'@@ -0,0 +1,370 @@\n+#!/usr/bin/env Rscript\n+\n+# references:\n+#   what this does:\n+#   - [stats::kmeans](https://stat.ethz.ch/R-manual/R-devel/library/stats/html/kmeans.html)\n+#   - [stats::p.adjust](https://stat.ethz.ch/R-manual/R-devel/library/stats/html/p.adjust.html)\n+#   how this does what it does:\n+#   - [parallel::clusterApply](https://stat.ethz.ch/R-manual/R-devel/library/parallel/html/clusterApply.html)\n+\n+# invocation:\n+#   Rscript $__tool_directory__/w4mkmeans_wrapper.R \\\n+#     tool_directory $__tool_directory__\n+#     data_matrix_path \'$dataMatrix_in\' \\\n+#     variable_metadata_path \'$variableMetadata_in\' \\\n+#     sample_metadata_path \'$sampleMetadata_in\' \\\n+#     kfeatures \'$kfeatures\' \\\n+#     ksamples \'$ksamples\' \\\n+#     iter_max \'$iter_max\' \\\n+#     nstart \'$nstart\' \\\n+#     algorithm \'$algorithm\' \\\n+#     scores \'$scores\' \\\n+#     sampleMetadata_out \'$sampleMetadata_out\' \\\n+#     variableMetadata_out \'$variableMetadata_out\' \\\n+#     slots "\\${GALAXY_SLOTS:-1}" \\\n+# \n+# <inputs>\n+#   <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: \'.\', missing: NA, mode: numerical, separator: tab" />\n+#   <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata columns, separator: tab" />\n+#   <param name="variableMetadata_in" label="Variable metadata file" type="data" format="tabular" help="variable x metadata columns, separator: tab" />\n+#   <param name="kfeatures" label="K value(s) for features" type="text" value="0" help="Single or min,max value(s) for K for features (variables), or 0 for none." />\n+#   <param name="ksamples" label="K value(s) for samples" type="text" value="0" help="Single or min,max value(s) for K for samples, or 0 for none." />\n+#   <param name="iter_max" label="Max number of iterations" type="text" value="10" help="The maximum number of iterations allowed; default 10." />\n+#   <param name="nstart" label="Number of random sets" type="text" value="1" help="How many random sets should be chosen; default 1." />\n+# \t<param name="algorithm" label="Algorithm for clustering" type="select" value = "Hartigan-Wong" help="K-means clustering algorithm, default \'Hartigan-Wong\'; alternatives \'Lloyd\', \'MacQueen\'; \'Forgy\' is a synonym for \'Lloyd\', see stats::kmeans reference for further info and references.">\n+# \t  <option value="Hartigan-Wong" selected="TRUE">Hartigan-Wong</option>\n+# \t  <option value="Lloyd">Lloyd</option>\n+# \t  <option value="MacQueen">MacQueen</option>\n+# \t  <option value="Forgy">Forgy</option>\n+# \t</param>\n+# </inputs>\n+# <outputs>\n+#   <data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data>\n+#   <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data>\n+# </outputs>\n+\n+##------------------------\n+## libraries for this file\n+##------------------------\n+\n+library(batch) ## for \'parseCommandArgs\'\n+\n+##-------------------\n+## Pre-initialization\n+##-------------------\n+\n+argVc <- unlist(parseCommandArgs(evaluate=FALSE))\n+if ( Reduce( `|`, grepl("tool_directory",names(argVc)) ) ) {\n+  tool_directory <- as.character(argVc["tool_directory"])\n+} else {\n+  tool_directory <- "."\n+}\n+r_path <- function(f) paste( tool_directory, f, sep = "/" )\n+\n+##----------------------------------------------------------\n+## Computation - source general and module-specific routines\n+##----------------------------------------------------------\n+\n+log_print <- function(x, ...) { \n+  cat(\n+    format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")\n+  , " "\n+  , c(x, ...)\n+  , "\\n"\n+  , sep=""\n+  , file=stderr()\n+  )\n+}\n+\n+# log_print(sprintf("tool_directory is %s", tool_directory))\n+\n+w4m_general_purpose_routines_path <- r_path("w4m_general_purpose_routines.R")\n+# log_print(sprintf("w4m_general_purpose_routines_path is %s", w4m_general_purpose_routines_path))\n+if ( ! file.exists(w4m_general_purpose_routines_path) ) {\n+'..b'=F, sep=\'\\t\')\n+\n+## output files\n+sampleMetadata_out              <- as.character(argVc["sampleMetadata_out"])\n+variableMetadata_out            <- as.character(argVc["variableMetadata_out"])\n+scores_out                      <- as.character(argVc["scores_out"])\n+## input files\n+args_env$data_matrix_path       <- as.character(argVc["data_matrix_path"])\n+args_env$variable_metadata_path <- as.character(argVc["variable_metadata_path"])\n+args_env$sample_metadata_path   <- as.character(argVc["sample_metadata_path"])\n+  \n+# other parameters\n+\n+# multi-string args - split csv: "1,2,3" -> c("1","2","3")\n+args_env$kfeatures <- strsplit(x = as.character(argVc[\'kfeatures\']), split = ",", fixed = TRUE)[[1]]\n+args_env$ksamples  <- strsplit(x = as.character(argVc[\'ksamples\' ]), split = ",", fixed = TRUE)[[1]]\n+# numeric args\n+args_env$iter_max  <- as.numeric(               argVc[\'iter_max\'  ])\n+args_env$nstart    <- as.numeric(               argVc[\'nstart\'   ])\n+args_env$slots     <- as.numeric(               argVc[\'slots\'    ])\n+# string args\n+args_env$algorithm <- as.character(             argVc[\'algorithm\'])\n+args_env$log_print <- log_print\n+\n+log_print("PARAMETERS (parsed):")\n+for (member in ls(args_env)) {\n+  value <- get(member, args_env)\n+  value <- ifelse(length(value) == 1, value, sprintf("c(%s)", paste(value, collapse=", ")))\n+  \n+  log_print(sprintf("  - %s: %s", member, ifelse( !is.function(value) , value, "function" )))\n+}\n+log_print("")\n+\n+##---------------------------------------------------------\n+## Computation - attempt to read input data\n+##---------------------------------------------------------\n+if ( ! read_input_data(args_env, failure_action = read_input_failure_action) ) {\n+  result <- -1\n+} else {\n+  log_print("Input data was read successfully.")\n+  result <- w4mkmeans(env = args_env)\n+  log_print("returned from call to w4mkmeans.")\n+}\n+\n+if ( length(result) == 0 ) {\n+  log_print("no results were produced")\n+  # exit with status code non-zero to indicate error\n+  q(save = "no", status = 1, runLast = FALSE)\n+} else if ( ! setequal(names(result),c("variableMetadata","sampleMetadata","scores")) ) {\n+  log_print(sprintf("unexpected result keys %s", names(result)))\n+  # exit with status code non-zero to indicate error\n+  q(save = "no", status = 1, runLast = FALSE)\n+} else if ( ! write_result(result = result$variableMetadata, file_path = variableMetadata_out, kind_string = "clustered variableMetadata")$success ) {\n+  log_print("failed to write output file for clustered variableMetadata")\n+  # exit with status code non-zero to indicate error\n+  q(save = "no", status = 1, runLast = FALSE)\n+} else if ( ! write_result(result = result$sampleMetadata, file_path = sampleMetadata_out, kind_string = "clustered sampleMetadata")$success ) {\n+  log_print("failed to write output file for clustered sampleMetadata")\n+  # exit with status code non-zero to indicate error\n+  q(save = "no", status = 1, runLast = FALSE)\n+} else {\n+  tryCatch(\n+    expr = {\n+      fileConn<-file(scores_out)\n+      writeLines(result$scores, fileConn)\n+      close(fileConn)\n+    }\n+  , error = function(e) {\n+      log_print(sprintf("failed to write output file for cluster scores - %s", format_error(e)))\n+      # exit with status code non-zero to indicate error\n+      q(save = "no", status = 1, runLast = FALSE)\n+    }\n+  )\n+}\n+\n+##--------\n+## Closing\n+##--------\n+\n+\n+if (!file.exists(sampleMetadata_out)) {\n+  log_print(sprintf("ERROR %s::w4m_kmeans_wrapper - file \'%s\' was not created", modNamC, sampleMetadata_out))\n+}\n+\n+if (!file.exists(variableMetadata_out)) {\n+  log_print(sprintf("ERROR %s::w4m_kmeans_wrapper - file \'%s\' was not created", modNamC, variableMetadata_out))\n+}\n+\n+if (!file.exists(scores_out)) {\n+  log_print(sprintf("ERROR %s::w4m_kmeans_wrapper - file \'%s\' was not created", modNamC, scores_out))\n+}\n+\n+log_print("Normal termination of \'", modNamC, "\' Galaxy module call")\n+\n+# exit with status code zero\n+q(save = "no", status = 0, runLast = FALSE)\n'