Next changeset 1:02cafb660b72 (2017-08-09) |
Commit message:
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d |
added:
LICENSE README test-data/input_dataMatrix.tsv test-data/input_sampleMetadata.tsv test-data/input_variableMetadata.tsv w4m_general_purpose_routines.R w4mkmeans.xml w4mkmeans_routines.R w4mkmeans_wrapper.R |
b |
diff -r 000000000000 -r 6ccbe18131a6 LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE Tue Aug 08 15:30:38 2017 -0400 |
b |
@@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Hegeman Lab + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. |
b |
diff -r 000000000000 -r 6ccbe18131a6 README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Tue Aug 08 15:30:38 2017 -0400 |
b |
@@ -0,0 +1,2 @@ +# w4mkmeans_galaxy_wrapper +Planemo-based galaxy-tool-wrapper to wrap the stats::kmeans R package for the W4M dataMatrix |
b |
diff -r 000000000000 -r 6ccbe18131a6 test-data/input_dataMatrix.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input_dataMatrix.tsv Tue Aug 08 15:30:38 2017 -0400 |
b |
b'@@ -0,0 +1,50 @@\n+\tY11_1_RA5_01_213\tY2_1_RB1_01_218\tY4_1_RB3_01_220\tY12_1_RB4_01_221\tY1_1_RC1_01_228\tY14_1_RC6_01_234\tY1_2_RD1_01_239\tY14_2_RD2_01_240\tY4_2_RD3_01_241\tY11_2_RD7_01_246\tY2_2_RE4_01_253\tY12_2_RE6_01_255\tY14_3_GA2_01_260\tY2_3_GA4_01_264\tY1_3_GA6_01_266\tY4_3_GA7_01_267\tY12_3_GB1_01_270\tY11_3_GC3_01_283\tY14_4_GC7_01_287\tY11_4_GD8_01_299\tY2_4_GE1_01_300\tY12_4_GE2_01_304\tY1_4_GE3_01_305\tY4_4_GE7_01_309\n+M118T229\t95180.7747000001\t283910.279545455\t172325.198333334\t174004.4176\t163525.775666667\t194233.618999999\t142895.435454546\t201401.5926\t170930.6395\t156553.24775\t306528.603090909\t254326.335272728\t205081.407083333\t276873.723636364\t172071.209999999\t275012.5056\t211128.511\t168167.790333333\t209328.198222222\t207513.570083333\t258813.932083334\t229502.711454545\t149345.9256\t314364.292\n+M144T249\t326771.492625001\t512639.358421052\t353343.595999999\t237878.822999999\t339952.473666668\t283992.739200001\t452071.323999999\t315416.134166667\t375718.035999999\t239514.176\t223116.3613\t325831.656181819\t397814.869333331\t247409.932\t413466.492\t269627.942181817\t234804.7824\t251907.6144\t339409.032\t220480.5408\t397906.873411764\t264368.2988\t318008.1856\t495033.681090908\n+M146T229\t320441.164249999\t651210.312000002\t429015.22876923\t417925.811454544\t423543.081153847\t506829.407999998\t487752.597818182\t514688.158400001\t678459.668923076\t396639.590750001\t687614.607272727\t579226.515333335\t468291.098181819\t569996.119583333\t454626.169846155\t622650.143272727\t478350.217636364\t318360.932923077\t499228.386583333\t403412.987666666\t688496.831769231\t496628.723000001\t429311.235818182\t838101.268454545\n+M162T214\t133545.175\t32892.6798\t104879.959\t17067.3888000001\t120440.1933\t84904.1830000002\t316369.630727272\t96002.5383333335\t96790.2144000002\t116384.262555555\t71055.4319999999\t74141.2103999997\t87130.2437142859\t140641.192222222\t208159.7504\t163545.61125\t60130.1777777777\t89822.8736000002\t42445.1565999999\t71159.8343999999\t48624.3143999998\t40807.9287000001\t133222.676\t131059.152\n+M163T243\t3320290.99999999\t3108104.81549999\t3448471.301\t2649747.70925\t3185779.57349999\t4484308.74208335\t3190403.006\t3461277.36441667\t4739358.17723077\t4988993.7978\t4326959.19833332\t4229307.74861538\t4462765.79775\t3372438.61476924\t2588784.26284211\t3698171.4\t3981525.46875\t5212799.26566667\t3029175.68083334\t4289629.53125\t3360903.98691667\t4298080.61900001\t4601251.09542858\t4758480.73153845\n+M163T227\t81353.0119999997\t277375.471090909\t177928.7208\t209416.268\t336317.096571428\t313326.249599999\t285323.6838\t277747.251000001\t220211.9752\t412879.704545456\t351173.200800001\t268427.1216\t347385.8924\t222852.162727273\t210297.4016\t462226.94\t243806.3265\t458488.170555556\t251712.213600001\t447141.623636363\t314488.332250001\t324182.184700001\t404536.579230769\t192976.228727273\n+M165T338\t724868.499090913\t877319.328000006\t759359.831833331\t705376.317666666\t899233.227846154\t800838.637500001\t1185786.86941666\t511026.113076926\t770710.490692306\t639682.233333337\t880641.546416661\t626728.389999999\t774059.748583339\t900734.519384618\t1045032.61569231\t893720.37375\t746035.217769229\t681933.343461539\t642841.190916669\t761710.223\t716082.415999997\t939827.908000004\t725854.212461543\t896462.216000001\n+M165T317\t1130886.05307692\t1812034.41157895\t1452469.27115385\t1516359.36084616\t1558264.69400001\t1580774.47499999\t2004662.84615385\t986370.671999996\t1315054.05833333\t981890.410666671\t1628334.48492307\t1547053.44276924\t1404010.065\t1901171.5945\t1782603.46833333\t1720287.20733333\t1345033.98538462\t1169690.92984615\t1239343.35369231\t1370985.05716666\t1245982.68483333\t1579861.11230769\t1297590.04075001\t1816625.20433332\n+M165T256\t445597.162363634\t752569.200000003\t778348.672666668\t421562.802000002\t654155.689090909\t752461.291363638\t587120.120363634\t374952.432888887\t766559.366153846\t308674.929090909\t280340.442272728\t638563.938833331\t679017.778181819\t662184.475666663\t725624.464714288\t671300.992250002\t509622.759272728\t312362.860222223\t394442.381818181\t520020.93\t655205.484727275\t437002.535999998\t284057.6\t744655.189923079\n+M166T317\t727736.923058826\t1400721.315\t1119526.94750001\t1131558.16892308\t1288'..b'\t271358.777666667\t503074.115083331\t44970.0660833335\t131609.623090909\t280617.921818184\t773743.02866666\t2334917.91500002\t99518.4412499995\t1016073.89891667\t338413.903833333\t55441.1083333332\t220399.192666668\t188045.522166668\t310482.079249999\t1530415.97333333\t60800.495\t1328460.313\t1203955.012\t778120.18475\t119439.517666666\t84655.04375\t452362.827999998\t1983764.26125001\n+M246T512\t146788.109999999\t407066.762249999\t633426.109\t533463.833615383\t1956593.89342856\t165702.49815\t120181.589833332\t353320.577749999\t642874.945142854\t119179.247615384\t99590.2682500005\t758379.92076923\t52436.3834166666\t147546.816500001\t294954.356923078\t1609162.86685714\t584555.170230768\t170359.68353846\t51917.8174615381\t229662.194400003\t211933.249999999\t60445.8974166666\t634458.219230768\t100737.631500001\n+M246T490\t75256.4203333334\t34590.6509090912\t150163.741636363\t34411.6830909094\t74755.9112727271\t103711.803166667\t45954.4614545453\t35152.0549999998\t24719.0138181817\t71617.0521818181\t11311.7707272727\t63841.0036363632\t27595.3401818182\t31920.2162500003\t98890.9431818186\t135013.109083333\t47742.2570909091\t27862.1563636363\t36125.5670909093\t62861.4490833336\t20261.5554545455\t22206.8672727271\t73136.961545455\t235336.321846153\n+M247T433\t4836161.03384619\t1044492.01435715\t951798.914285713\t1424537.325\t3787406.10666667\t9887497.97830775\t30488070.5421429\t3441749.28\t8499038.62799999\t17388611.7989999\t20526331.7456\t1106400.24907692\t3016821.11076925\t1076241.15692308\t35494666.1914286\t2461932.47007691\t1529940.06230769\t38365027.5439287\t18163699.0110001\t9636879.56399995\t10746013.236\t18833821.2581429\t4966775.00357139\t1639208.92369231\n+M247T452\t5195507.70825003\t369661.240285716\t490699.764615384\t436494.736999998\t7412698.44276928\t12170776.00775\t34440641.0083078\t5016978.85769229\t8803133.89476923\t27844873.7553571\t36488147.0250003\t506057.533076924\t3027637.58846154\t653070.954888888\t18741495.8692308\t552111.216461536\t608777.786\t42140646.1521431\t16718584.3028572\t10782086.7869286\t551145.999000004\t1022681.34276923\t5037210.42115387\t1667076.72061537\n+M248T433\t1132061.80500001\t198989.691727273\t242631.120000001\t385296.71625\t533583.706545452\t1700043.58833333\t7857412.81114289\t725243.771999994\t876190.803333335\t4407848.9363846\t2654510.973\t270592.230999999\t537803.304363637\t294857.264307691\t3564818.11400002\t422974.383666665\t327743.053846153\t4390963.79250004\t3575371.16159998\t1580044.82999999\t1915246.6695\t3058478.66324999\t981666.862307692\t318243.068\n+M251T497\t1063427.48939999\t865129.956142858\t495022.085833334\t488440.844615385\t1129829.22\t729926.57776923\t1940279.044\t1221215.83823077\t736401.295384619\t760975.140166666\t273042.048615386\t561850.154166666\t1121087.55323077\t721610.410153842\t2108837.44992856\t530356.117499997\t934878.418692311\t1242937.12242857\t328026.640384617\t1036373.51330769\t718361.729800002\t536136.268499999\t1419632.77730769\t2391210.31999999\n+M257T1014\t1566972.91999999\t1893127.35857143\t2151729.80857143\t1146142.15333334\t1970881.65571426\t1402985.23499999\t2812863.02500003\t1880269.89428571\t2527579.56705883\t1461081.88615383\t1791326.51\t1478730.04615386\t1823267.80461538\t2234492.50153846\t2529937.4047059\t3148326.66461539\t1868762.54266667\t1421995.43307692\t1517500.28\t1805290.20999998\t1642292.79999997\t1683944.028\t2379338.33999998\t2487090.41230771\n+M261T332\t1930046.88\t1414066.78508333\t3490063.85953848\t1047626.07700001\t4522749.77961537\t1593664.43775\t5263343.87100001\t2824216.94399999\t1630093.88518182\t4023852.75425\t1510231.9848\t1225828.25025001\t2182490.55266666\t1439822.48483333\t8412854.24699993\t3388651.75384613\t991437.89583333\t3719176.7\t1188956.48025\t2188517.84249999\t1310827.23100001\t1540731.44238462\t1604828.60533333\t2495122.53646153\n+M263T323\t1171838.28333332\t1092287.00399999\t1784798.51050001\t230456.097454544\t3162077.20061541\t901831.286555555\t2870354.68692308\t3141344.14099999\t795229.416727274\t6039038.19407691\t3336169.10769232\t134580.004545455\t1114891.535\t919091.57784615\t2457832.58563637\t5292105.64400001\t263116.985833333\t6938595.35746151\t4367916.80500001\t3698151.2689231\t268597.518\t276733.936000001\t1068827.01066667\t9589520.52215389\n' |
b |
diff -r 000000000000 -r 6ccbe18131a6 test-data/input_sampleMetadata.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input_sampleMetadata.tsv Tue Aug 08 15:30:38 2017 -0400 |
b |
@@ -0,0 +1,25 @@ +sampleMetadata class polarity sampleType injectionOrder batch tissue hotelling_pval missing_pval decile_pval PCA_XSCOR.p1 PCA_XSCOR.p2 class_PLSDA_XSCOR.p1 class_PLSDA_XSCOR.p2 class_PLSDA_predictions +Y11_1_RA5_01_213 y1 positive sample 213 1 2 0.0955561581467602 1 0.0306775551319138 -2.26882060894901 1.94958116765736 -3.05527623038242 2.32594165405491 y1 +Y2_1_RB1_01_218 y2 positive sample 218 1 1 0.090775969547078 1 0.0334308308237932 -5.43790231069006 3.28509884002914 -5.09396184849057 3.13632363820691 y2 +Y4_1_RB3_01_220 y4 positive sample 220 1 1 0.0922380872343134 1 0.0343065627030201 -5.96519534532645 2.76065569212045 -5.61271725241037 2.60836215684335 y4 +Y12_1_RB4_01_221 y2 positive sample 221 1 2 0.0731025791938841 1 0.0335814402688999 -3.90074024447077 2.32567583717618 -4.06427508572245 2.47867307479093 y2 +Y1_1_RC1_01_228 y1 positive sample 228 1 1 0.948646526283138 1 0.0150153992414774 -5.79172889541087 3.18442356006801 -5.71894211121787 3.14075608795096 y1 +Y14_1_RC6_01_234 y4 positive sample 234 1 2 0.961424772615561 1 0.0889762542416943 -3.50543091786719 1.90332246047248 -3.60257250798236 1.95341548985651 y4 +Y1_2_RD1_01_239 y1 positive sample 239 1 1 0.391486624975171 1 0.419632697534464 -10.0290510611276 3.46916578350898 -9.30301404180818 3.22425994348737 y1 +Y14_2_RD2_01_240 y4 positive sample 240 1 2 0.334478686842038 1 0.471265236704114 -0.955577667004931 1.62643379077323 -1.17127923245195 1.73770179646644 y4 +Y4_2_RD3_01_241 y4 positive sample 241 1 1 0.243979127543208 1 0.115904447650611 -6.29053214398527 3.48768497975009 -5.86080882473825 3.19393908077519 y4 +Y11_2_RD7_01_246 y1 positive sample 246 1 2 0.639085015503201 1 0.496291025606805 -0.737703114796199 1.89206669195622 -1.33734677265265 2.19119862732508 y1 +Y2_2_RE4_01_253 y2 positive sample 253 1 1 0.681339414372971 1 0.713644697663014 -4.43122798643441 2.59136016132011 -4.21959323228049 2.4942150403311 y2 +Y12_2_RE6_01_255 y2 positive sample 255 1 2 0.581861317126264 1 0.446040669279691 -3.42333388673909 2.19844489197916 -3.40077262161465 2.21882800112511 y2 +Y14_3_GA2_01_260 y4 positive sample 260 1 2 0.792323381194401 1 0.812319191661791 -2.61403564986014 1.9025507158402 -2.90132077481451 2.05744453719897 y4 +Y2_3_GA4_01_264 y2 positive sample 264 1 1 0.278347988263537 1 0.668405316795454 -6.19672954480257 4.11371745717593 -5.72942704887795 3.94423530839635 y2 +Y1_3_GA6_01_266 y1 positive sample 266 1 1 0.303133108610158 1 0.521065147801524 -5.91283168480956 2.57721868167528 -5.8128281040434 2.56623732055011 y1 +Y4_3_GA7_01_267 y4 positive sample 267 1 1 0.204620420161485 1 0.53551459376182 -5.81862869528986 3.42191037440281 -5.37442797934098 3.22465790741629 y4 +Y12_3_GB1_01_270 y2 positive sample 270 1 2 0.7747649633382 1 0.698966513767803 -3.0854085700971 1.67899209632345 -3.14572560562774 1.75648836353689 y2 +Y11_3_GC3_01_283 y1 positive sample 283 1 2 0.918803505111851 1 0.396638581468035 -1.16946485386388 1.66851916844539 -1.7032576378218 1.94860768310552 y1 +Y14_4_GC7_01_287 y4 positive sample 287 1 2 0.577273975934045 1 0.14919566995266 -1.24666389579168 2.84891525888206 -1.58652468539139 2.90364189714377 y4 +Y11_4_GD8_01_299 y1 positive sample 299 1 2 0.31302025978985 1 0.426766355892969 -2.15936901108787 1.66989335813642 -2.66042240568943 1.95509478589954 y1 +Y2_4_GE1_01_300 y2 positive sample 300 1 1 0.0338929937565918 1 0.419149865807458 -5.76080121045973 3.47845733452933 -5.39212267305567 3.34452446158071 y2 +Y12_4_GE2_01_304 y2 positive sample 304 1 2 0.130905883509031 1 0.59698349195307 -4.15585900988913 3.22702525356271 -4.28382960930699 3.34874792551519 y2 +Y1_4_GE3_01_305 y1 positive sample 305 1 1 0.129479197101219 1 0.618449187175638 -2.9921645121991 3.14523577730793 -2.85639638001866 3.08197032598192 y1 +Y4_4_GE7_01_309 y4 positive sample 309 1 1 0.758837157578886 1 0.339564008612217 -5.95949084754462 3.20317151028856 -5.46675286145534 3.04585537553442 y4 |
b |
diff -r 000000000000 -r 6ccbe18131a6 test-data/input_variableMetadata.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input_variableMetadata.tsv Tue Aug 08 15:30:38 2017 -0400 |
[ |
b'@@ -0,0 +1,50 @@\n+variableMetadata\tnamecustom\tmz\tmzmin\tmzmax\trt\trtmin\trtmax\tnpeaks\tmy_blank\tpool\ty0\ty1\ty2\ty3\ty4\ty5\ty6\ty7\ty8\ty9\tisotopes\tadduct\tpcgroup\tCV.samp\tCV.pool\tCV.ind\tblank_mean\tblank_sd\tblank_CV\tsample_mean\tsample_sd\tsample_CV\tblankMean_over_sampleMean\tpool_mean\tpool_sd\tpool_CV\tpoolCV_over_sampleCV\tclass_kruskal_fdr\tclass_kruskal_sig\tclass_kruskal_y1.y0_dif\tclass_kruskal_y2.y0_dif\tclass_kruskal_y3.y0_dif\tclass_kruskal_y4.y0_dif\tclass_kruskal_y5.y0_dif\tclass_kruskal_y6.y0_dif\tclass_kruskal_y7.y0_dif\tclass_kruskal_y8.y0_dif\tclass_kruskal_y9.y0_dif\tclass_kruskal_y2.y1_dif\tclass_kruskal_y3.y1_dif\tclass_kruskal_y4.y1_dif\tclass_kruskal_y5.y1_dif\tclass_kruskal_y6.y1_dif\tclass_kruskal_y7.y1_dif\tclass_kruskal_y8.y1_dif\tclass_kruskal_y9.y1_dif\tclass_kruskal_y3.y2_dif\tclass_kruskal_y4.y2_dif\tclass_kruskal_y5.y2_dif\tclass_kruskal_y6.y2_dif\tclass_kruskal_y7.y2_dif\tclass_kruskal_y8.y2_dif\tclass_kruskal_y9.y2_dif\tclass_kruskal_y4.y3_dif\tclass_kruskal_y5.y3_dif\tclass_kruskal_y6.y3_dif\tclass_kruskal_y7.y3_dif\tclass_kruskal_y8.y3_dif\tclass_kruskal_y9.y3_dif\tclass_kruskal_y5.y4_dif\tclass_kruskal_y6.y4_dif\tclass_kruskal_y7.y4_dif\tclass_kruskal_y8.y4_dif\tclass_kruskal_y9.y4_dif\tclass_kruskal_y6.y5_dif\tclass_kruskal_y7.y5_dif\tclass_kruskal_y8.y5_dif\tclass_kruskal_y9.y5_dif\tclass_kruskal_y7.y6_dif\tclass_kruskal_y8.y6_dif\tclass_kruskal_y9.y6_dif\tclass_kruskal_y8.y7_dif\tclass_kruskal_y9.y7_dif\tclass_kruskal_y9.y8_dif\tclass_kruskal_y1.y0_fdr\tclass_kruskal_y2.y0_fdr\tclass_kruskal_y3.y0_fdr\tclass_kruskal_y4.y0_fdr\tclass_kruskal_y5.y0_fdr\tclass_kruskal_y6.y0_fdr\tclass_kruskal_y7.y0_fdr\tclass_kruskal_y8.y0_fdr\tclass_kruskal_y9.y0_fdr\tclass_kruskal_y2.y1_fdr\tclass_kruskal_y3.y1_fdr\tclass_kruskal_y4.y1_fdr\tclass_kruskal_y5.y1_fdr\tclass_kruskal_y6.y1_fdr\tclass_kruskal_y7.y1_fdr\tclass_kruskal_y8.y1_fdr\tclass_kruskal_y9.y1_fdr\tclass_kruskal_y3.y2_fdr\tclass_kruskal_y4.y2_fdr\tclass_kruskal_y5.y2_fdr\tclass_kruskal_y6.y2_fdr\tclass_kruskal_y7.y2_fdr\tclass_kruskal_y8.y2_fdr\tclass_kruskal_y9.y2_fdr\tclass_kruskal_y4.y3_fdr\tclass_kruskal_y5.y3_fdr\tclass_kruskal_y6.y3_fdr\tclass_kruskal_y7.y3_fdr\tclass_kruskal_y8.y3_fdr\tclass_kruskal_y9.y3_fdr\tclass_kruskal_y5.y4_fdr\tclass_kruskal_y6.y4_fdr\tclass_kruskal_y7.y4_fdr\tclass_kruskal_y8.y4_fdr\tclass_kruskal_y9.y4_fdr\tclass_kruskal_y6.y5_fdr\tclass_kruskal_y7.y5_fdr\tclass_kruskal_y8.y5_fdr\tclass_kruskal_y9.y5_fdr\tclass_kruskal_y7.y6_fdr\tclass_kruskal_y8.y6_fdr\tclass_kruskal_y9.y6_fdr\tclass_kruskal_y8.y7_fdr\tclass_kruskal_y9.y7_fdr\tclass_kruskal_y9.y8_fdr\tclass_kruskal_y1.y0_sig\tclass_kruskal_y2.y0_sig\tclass_kruskal_y3.y0_sig\tclass_kruskal_y4.y0_sig\tclass_kruskal_y5.y0_sig\tclass_kruskal_y6.y0_sig\tclass_kruskal_y7.y0_sig\tclass_kruskal_y8.y0_sig\tclass_kruskal_y9.y0_sig\tclass_kruskal_y2.y1_sig\tclass_kruskal_y3.y1_sig\tclass_kruskal_y4.y1_sig\tclass_kruskal_y5.y1_sig\tclass_kruskal_y6.y1_sig\tclass_kruskal_y7.y1_sig\tclass_kruskal_y8.y1_sig\tclass_kruskal_y9.y1_sig\tclass_kruskal_y3.y2_sig\tclass_kruskal_y4.y2_sig\tclass_kruskal_y5.y2_sig\tclass_kruskal_y6.y2_sig\tclass_kruskal_y7.y2_sig\tclass_kruskal_y8.y2_sig\tclass_kruskal_y9.y2_sig\tclass_kruskal_y4.y3_sig\tclass_kruskal_y5.y3_sig\tclass_kruskal_y6.y3_sig\tclass_kruskal_y7.y3_sig\tclass_kruskal_y8.y3_sig\tclass_kruskal_y9.y3_sig\tclass_kruskal_y5.y4_sig\tclass_kruskal_y6.y4_sig\tclass_kruskal_y7.y4_sig\tclass_kruskal_y8.y4_sig\tclass_kruskal_y9.y4_sig\tclass_kruskal_y6.y5_sig\tclass_kruskal_y7.y5_sig\tclass_kruskal_y8.y5_sig\tclass_kruskal_y9.y5_sig\tclass_kruskal_y7.y6_sig\tclass_kruskal_y8.y6_sig\tclass_kruskal_y9.y6_sig\tclass_kruskal_y8.y7_sig\tclass_kruskal_y9.y7_sig\tclass_kruskal_y9.y8_sig\tPCA_XLOAD.p1\tPCA_XLOAD.p2\tclass_PLSDA_XLOAD.p1\tclass_PLSDA_XLOAD.p2\tclass_PLSDA_VIP\tclass_PLSDA_y0.COEFF\tclass_PLSDA_y1.COEFF\tclass_PLSDA_y2.COEFF\tclass_PLSDA_y3.COEFF\tclass_PLSDA_y4.COEFF\tclass_PLSDA_y5.COEFF\tclass_PLSDA_y6.COEFF\tclass_PLSDA_y7.COEFF\tclass_PLSDA_y8.COEFF\tclass_PLSDA_y9.COEFF\n+M118T229\tM118T229.46\t118\t118\t118\t229.455291748047\t228.736724853516\t230.594131469727\t55\t0\t8\t7\t5\t7\t6\t5\t2\t3\t7\t3\t2\t\t\t118\t0.5'..b'0648\t-0.039765504390462\t1.72565015970653\t-0.00477543679721769\t-0.0281135207826401\t-0.0119711230496188\t-0.0122760173542411\t0.0145888092804331\t-0.0294872643686666\t-0.0529647825012226\t-0.00758914454936816\t0.00100245807777303\t0.180847689248479\n+M261T332\tM261T331.57\t261\t261\t261\t331.569473266602\t330.826965332031\t332.320007324219\t46\t0\t9\t0\t6\t5\t5\t5\t5\t7\t0\t0\t4\t[7][M]+\t\t57\t1.04326999694138\t0.221252808937921\t1\t1146683.03090926\t1186248.76846245\t1.03450451126133\t1145403.0063442\t1327072.12823314\t1.1586071634898\t1.0011175320459\t919208.931999451\t1174249.82975397\t1.27745694028425\t1.10257987395527\t4.40385233462652e-09\t1\t3708696.83879167\t1199629.11970834\t1182375.57066666\t2175988.65623076\t1469656.86694872\t622503.69551282\t292955.338295453\t-112722.763935607\t558652.956707152\t-2509067.71908333\t-2526321.26812501\t-1532708.18256091\t-2239039.97184295\t-3086193.14327885\t-3415741.50049621\t-3821419.60272727\t-3150043.88208451\t-17253.5490416749\t976359.536522425\t270027.747240385\t-577125.424195516\t-906673.781412883\t-1312351.88364394\t-640976.163001184\t993613.0855641\t287281.29628206\t-559871.875153841\t-889420.232371208\t-1295098.33460227\t-623722.613959509\t-706331.78928204\t-1553484.96071794\t-1883033.31793531\t-2288711.42016637\t-1617335.69952361\t-847153.171435901\t-1176701.52865327\t-1582379.63088433\t-911003.910241569\t-329548.357217367\t-735226.459448427\t-63850.7388056681\t-405678.10223106\t265697.618411698\t671375.720642759\t0.00139002475692429\t0.218861820635036\t0.193929048539156\t0.00482795212523277\t0.0509098162949605\t1\t1\t1\t1\t1\t1\t1\t1\t1\t0.0191097224190392\t2.03460175168813e-05\t1\t1\t1\t1\t1\t0.887240474852222\t0.0308625758419715\t1\t1\t1\t1\t0.942236742973925\t0.0420005984433424\t1\t1\t1\t0.0713925658780487\t0.000728869250554339\t1\t1\t0.350219990140754\t0.0109198413823081\t1\t1\t0.951550886136829\t1\t1\t1\t1\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t-0.0955479004664773\t0.0474159124863094\t-0.0956108691815066\t0.0479267590250545\t1.1753217493328\t-0.0082863984982916\t0.0640358876530409\t-0.0618603905583846\t-0.00919670186957342\t0.0539591279777716\t0.045494114757647\t-0.0516806945814743\t-0.0130716890731868\t-0.00514638420130288\t-0.0195804521557219\n+M263T323\tM263T323.29\t263\t263\t263\t323.286376953125\t317.427062988281\t324.498107910156\t95\t0\t10\t8\t7\t3\t1\t8\t7\t5\t8\t2\t3\t[8][M]+\t\t25\t1.24549325872202\t0.169688974744568\t1\t4856126.41100817\t6417229.02375924\t1.32147075274076\t3269419.74893288\t4306448.22482372\t1.31719037490653\t1.48531751317437\t2850327.25225239\t3833675.62917697\t1.34499490405795\t1.02110896775524\t1.30827348411226e-07\t1\t-10747041.5308823\t-13490591.7476515\t-13452461.0366136\t-11300186.1489015\t-11332369.7322848\t-12211498.8419752\t-4048990.23979574\t-13581671.4958788\t-12655268.6170606\t-2743550.21676924\t-2705419.50573137\t-553144.618019246\t-585328.201402571\t-1464457.31109296\t6698051.29108652\t-2834629.96499652\t-1908227.08617834\t38130.711037879\t2190405.59875\t2158222.01536667\t1279092.90567629\t9441601.50785576\t-91079.748227274\t835323.13059091\t2152274.88771212\t2120091.3043288\t1240962.19463841\t9403470.79681788\t-129210.459265153\t797192.419553031\t-32183.583383325\t-911312.69307371\t7251195.90910576\t-2281485.34697727\t-1355082.46815909\t-879129.109690385\t7283379.49248909\t-2249301.76359395\t-1322898.88477576\t8162508.60217947\t-1370172.65390356\t-443769.77508538\t-9532681.25608304\t-8606278.37726485\t926402.878818184\t1\t0.00765410902822879\t0.00267847637147507\t0.838782008701031\t0.660394577658549\t0.19497635500955\t1\t0.00012469466674357\t0.627859985085531\t1\t1\t1\t1\t1\t1\t0.182190863683119\t1\t1\t1\t1\t1\t0.0192386196102813\t1\t1\t1\t1\t1\t0.00908419374209192\t1\t1\t1\t1\t1\t0.367815979832692\t1\t1\t1\t0.632284650795659\t1\t0.753705194106446\t1\t1\t0.000950448517736164\t1\t1\t0\t1\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0.10766040080656\t0.100365831043394\t0.108802051106214\t0.0987892780891889\t0.861169551654294\t0.0236849274997751\t0.00167571590625499\t-0.0522278718910634\t0.00931335493080596\t0.0263424077901622\t0.0245391324631854\t-0.0190290011300382\t0.0116801484011658\t-0.0161456190570957\t-0.0135144337547376\n' |
b |
diff -r 000000000000 -r 6ccbe18131a6 w4m_general_purpose_routines.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/w4m_general_purpose_routines.R Tue Aug 08 15:30:38 2017 -0400 |
[ |
b'@@ -0,0 +1,283 @@\n+# prepare.data.matrix - Prepare x.datamatrix for multivariate statistical analaysis (MVA)\n+# - Motivation:\n+# - Selection:\n+# - You may want to exclude several samples from your analysis:\n+# - If so, set the argument \'exclude.samples\' to a vector of sample names\n+# - You may want to exclude several features or features from your analysis:\n+# - If so, set the argument \'exclude.features\' to a vector of feature names\n+# - Renaming samples:\n+# - You may want to rename several samples from your analysis:\n+# - If so, set the argument \'sample.rename.function\' to a function accepting a vector \n+# of sample names and producing a vector of strings of equivalent length\n+# - MVA is confounded by missing values.\n+# - By default, this function imputes missing values as zero.\n+# - For a different imputation, set the \'data.imputation\' argument to a function\n+# accepting a single matrix argument and returning a matrix of the same\n+# dimensions as the argument.\n+# - Transformation\n+# - It may be desirable to transform the intensity data to reduce the range.\n+# - By default, this function performs an eigth-root transformation:\n+# - Any root-tranformation has the advantage of never being negative.\n+# - Calculation of the eight-root is four times faster in my hands than log10.\n+# - However, it has the disadvantage that calculation of fold-differences \n+# is not additive as with log-transformation.\n+# - Rather, you must divide the values and raise to the eighth power.\n+# - For a different transformation, set the \'data.transformation\' argument\n+# to a function accepting a single matrix argument.\n+# - The function should be written to return a matrix of the same dimensions\n+# as the argument.\n+# arguments:\n+# - x.matrix - matrix of intensities (or data.frame of sample metadata)\n+# - one row per sample\n+# - one column per feature or metadata attribute\n+# - exclude.samples - vector of labels of matrix rows (samples) to omit from analysis\n+# - exclude.features - vector of labels of matrix columnss (features) to omit from analysis\n+# - sample.rename.function - function to be used to rename rows if necessary, or NULL\n+# - e.g., sample.rename.function = function(x) {\n+# sub("(.*)_.*","\\\\1", row.names(x))\n+# }\n+# - data.imputation - function applied to matrix to impute missing values\n+# - e.g., data.imputation = function(m) {\n+# m[is.na(m)] <- min(m, na.rm = TRUE) / 100\n+# return (m)\n+# }\n+# - data.transformation - function applied to matrix cells\n+# - e.g., data.transformation = function(x) { return( log10(x) ) }\n+# or, data.transformation = log10\n+# result value:\n+# transformed, imputed x.datamatrix with renamed rows and with neither excluded values nor features\n+#\n+################################\n+##\n+## Notes regarding the effectiveness and performance of the data transformation method.\n+##\n+## The two transformations that I tried (log10 and 8th root) required different imputation methods.\n+##\n+## For the LCMS resin data set that I was working with, separation in MVA was nearly equivalent for:\n+## data.imputation <- function(x.matrix) {\n+## x.matrix[is.na(x.matrix)] <- 0\n+## return (x.matrix)\n+## }\n+## data.transformation <- function(x) {\n+## sqrt( sqrt( sqrt(x) ) )\n+## }\n+## and\n+## data.imputation <- function(x.matrix) {\n+## x.matrix[is.na(x.matrix)] <- min(x.matrix, na.rm = TRUE) / 100\n+## return (x.matrix)\n+## }\n+## data.transformation <- function(x) {\n+## log10(x)\n+## }\n+##\n+## Note further that triple application of the square root:\n+## - may be four times faster than log10:\n+## - may be three times faster than log2:\n+##\n+## system.time( junk <- sqrt( sqrt( sqrt(1:100000'..b' if (nrow(x) == 0) {\n+ print(str(x))\n+ stop("matrix has no rows")\n+ }\n+ if (ncol(x) == 0) {\n+ print(str(x))\n+ stop("matrix has no columns")\n+ }\n+ if ( is.numeric(x) ) {\n+ # exclude any rows with zero variance\n+ row.vars <- MatVar(x, dim = 1)\n+ nonzero.row.vars <- row.vars > 0\n+ nonzero.rows <- row.vars[nonzero.row.vars]\n+ if ( length(rownames(x)) != length(rownames(nonzero.rows)) ) {\n+ row.names <- attr(nonzero.rows,"names")\n+ x <- x[ row.names, , drop = FALSE ]\n+ }\n+ \n+ # exclude any columns with zero variance\n+ column.vars <- MatVar(x, dim = 2)\n+ nonzero.column.vars <- column.vars > 0\n+ nonzero.columns <- column.vars[nonzero.column.vars]\n+ if ( length(colnames(x)) != length(colnames(nonzero.columns)) ) {\n+ column.names <- attr(nonzero.columns,"names")\n+ x <- x[ , column.names, drop = FALSE ]\n+ }\n+ }\n+ return (x)\n+ }\n+\n+ if (is.null(x.matrix)) {\n+ stop("FATAL ERROR - prepare.data.matrix was called with null x.matrix")\n+ }\n+\n+ en$xpre <- x <- x.matrix\n+\n+ # exclude any samples as indicated\n+ if ( !is.null(exclude.features) ) {\n+ my.colnames <- colnames(x)\n+ my.col.diff <- setdiff(my.colnames, exclude.features)\n+ x <- x[ , my.col.diff , drop = FALSE ]\n+ }\n+\n+ # exclude any features as indicated\n+ if ( !is.null(exclude.samples) ) {\n+ my.rownames <- rownames(x)\n+ my.row.diff <- setdiff(my.rownames, exclude.samples)\n+ x <- x[ my.row.diff, , drop = FALSE ]\n+ }\n+\n+ # rename rows if desired\n+ if ( !is.null(sample.rename.function) ) {\n+ renamed <- sample.rename.function(x)\n+ rownames(x) <- renamed\n+ }\n+\n+ # save redacted x.datamatrix to environment\n+ en$redacted.data.matrix <- x\n+\n+ # impute values missing from the x.datamatrix\n+ if ( !is.null(data.imputation) ) {\n+ x <- data.imputation(x)\n+ }\n+\n+ # perform transformation if desired\n+ if ( !is.null(data.transformation) ) {\n+ x <- data.transformation(x)\n+ } else {\n+ x <- x\n+ }\n+\n+ # purge rows and columns that have zero variance\n+ if ( is.numeric(x) ) {\n+ x <- nonzero.var(x)\n+ }\n+\n+ # save imputed, transformed x.datamatrix to environment\n+ en$imputed.transformed.data.matrix <- x\n+\n+ return(x)\n+}\n+\n+\n+##-----------------------------------------------\n+## helper functions for error detection/reporting\n+##-----------------------------------------------\n+\n+# log-printing to stderr\n+log_print <- function(x, ...) { \n+ cat(\n+ format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")\n+ , " "\n+ , c(x, ...)\n+ , "\\n"\n+ , sep=""\n+ , file=stderr()\n+ )\n+}\n+\n+# tryCatchFunc produces a list\n+# On success of expr(), tryCatchFunc produces\n+# list(success TRUE, value = expr(), msg = "")\n+# On failure of expr(), tryCatchFunc produces\n+# list(success = FALSE, value = NA, msg = "the error message")\n+tryCatchFunc <- function(expr) {\n+ # format error for logging\n+ format_error <- function(e) {\n+ paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ")\n+ }\n+ my_expr <- expr\n+ retval <- NULL\n+ tryCatch(\n+ expr = {\n+ retval <- ( list( success = TRUE, value = my_expr(), msg = "" ) )\n+ }\n+ , error = function(e) {\n+ retval <<- list( success = FALSE, value = NA, msg = format_error(e) )\n+ }\n+ )\n+ return (retval)\n+}\n+\n+# tryCatchProc produces a list\n+# On success of expr(), tryCatchProc produces\n+# list(success TRUE, msg = "")\n+# On failure of expr(), tryCatchProc produces\n+# list(success = FALSE, msg = "the error message")\n+tryCatchProc <- function(expr) {\n+ # format error for logging\n+ format_error <- function(e) {\n+ paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ")\n+ }\n+ retval <- NULL\n+ tryCatch(\n+ expr = {\n+ expr()\n+ retval <- ( list( success = TRUE, msg = "" ) )\n+ }\n+ , error = function(e) {\n+ retval <<- list( success = FALSE, msg = format_error(e) )\n+ }\n+ )\n+ return (retval)\n+}\n+\n' |
b |
diff -r 000000000000 -r 6ccbe18131a6 w4mkmeans.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/w4mkmeans.xml Tue Aug 08 15:30:38 2017 -0400 |
[ |
b'@@ -0,0 +1,319 @@\n+\xef\xbb\xbf<tool id="w4mkmeans" name="Kmeans_for_W4M" version="0.98.1">\n+ <description>Calculate K-means for dataMatrix features or samples</description>\n+\n+ <requirements>\n+ <requirement type="package" version="3.3.2">r-base</requirement>\n+ <requirement type="package" version="1.1_4">r-batch</requirement>\n+ </requirements>\n+\n+ <stdio>\n+ <exit_code range="1:" level="fatal" />\n+ </stdio>\n+\n+\n+ <command detect_errors="aggressive"><![CDATA[\n+ Rscript $__tool_directory__/w4mkmeans_wrapper.R\n+ tool_directory $__tool_directory__\n+ data_matrix_path \'$dataMatrix_in\'\n+ variable_metadata_path \'$variableMetadata_in\'\n+ sample_metadata_path \'$sampleMetadata_in\'\n+ ksamples \'$ksamples\'\n+ kfeatures \'$kfeatures\'\n+ iter_max \'$iter_max\'\n+ nstart \'$nstart\'\n+ algorithm \'$algorithm\'\n+ scores_out \'$scores_out\'\n+ sampleMetadata_out \'$sampleMetadata_out\'\n+ variableMetadata_out \'$variableMetadata_out\'\n+ slots "\\${GALAXY_SLOTS:-1}"\n+ ; echo exit code $?\n+ ]]></command>\n+\n+ <inputs>\n+ <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: \'.\', missing: NA, mode: numerical, separator: tab" />\n+ <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata columns, separator: tab" />\n+ <param name="variableMetadata_in" label="Variable metadata file" type="data" format="tabular" help="variable x metadata columns, separator: tab" />\n+ <param name="ksamples" label="K value(s) for samples" type="text" value = "0" help="[ksamples] Single K or comma-separated Ks for samples, or 0 for none." />\n+ <param name="kfeatures" label="K value(s) for features" type="text" value = "0" help="[kfeatures] Single K or comma-separated Ks for features (variables), or 0 for none." />\n+ <param name="iter_max" label="Max number of iterations" type="text" value = "10" help="[iter_max] The maximum number of iterations allowed; default 10." />\n+ <param name="nstart" label="Number of random sets" type="text" value = "1" help="[nstart] How many random sets should be chosen; default 1." />\n+ <param name="algorithm" label="Algorithm for clustering" type="select" value = "Hartigan-Wong" help="[algorithm] K-means clustering algorithm, default \'Hartigan-Wong\'; alternatives \'Lloyd\', \'MacQueen\'; \'Forgy\' is a synonym for \'Lloyd\', see references for further info.">\n+ <option value="Forgy">Forgy</option>\n+ <option value="Hartigan-Wong" selected="True">Hartigan-Wong</option>\n+ <option value="Lloyd">Lloyd</option>\n+ <option value="MacQueen">MacQueen</option>\n+ </param>\n+ </inputs>\n+\n+ <outputs>\n+ <data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data>\n+ <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data>\n+ <data name="scores_out" label="${tool.name}_${dataMatrix_in.name}.kmeans" format="tabular" ></data>\n+ </outputs>\n+\n+ <tests>\n+ <test>\n+ <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>\n+ <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>\n+ <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>\n+ <param name="ksamples" value="3,4"/>\n+ <param name="kfeatures" value="5,6,7"/>\n+ <param name="iter_max" value="10"/>\n+ <param name="nstart" value="1"/>\n+ <param name="algorithm" value="Hartigan-Wong"/>\n+ <output name="scores_out">\n+ <assert_contents>\n+ <has_text text="proportion" />\n+ <has_text text="0.87482" />\n+ <has_text text="0.89248" />\n+ <has_text text="0.95355" />\n+ <has_text text="0.95673" />\n+ <has_text text="0.95963" />\n+ </assert_contents>\n+ </output>\n+ </test>\n+ </tests>\n+\n+ <help>\n+ <![CDATA[\n+\n+**Author** - Arthur Eschenlaue'..b'+-------------------+-------------------------------------------------------------------------------------------------------------------+\n+| Feature metadata | https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/input_variableMetadata.tsv |\n++-------------------+-------------------------------------------------------------------------------------------------------------------+\n+\n+**Other input parameters**\n+\n++-----------------+---------------+\n+| Input Parameter | Value |\n++=================+===============+\n+| ksamples | 3,4 |\n++-----------------+---------------+\n+| kfeatures | 5,6,7 |\n++-----------------+---------------+\n+| iter_max | 10 |\n++-----------------+---------------+\n+| nstart | 1 |\n++-----------------+---------------+\n+| algorithm | Hartigan-Wong |\n++-----------------+---------------+\n+\n+----\n+NEWS\n+----\n+\n+August 2017, Version 0.98.1 - First release\n+\n+---------\n+Citations\n+---------\n+\n+ ]]>\n+ </help>\n+ <citations>\n+ <citation type="bibtex"><![CDATA[\n+@incollection{RCoreTeam2017,\n+ title = {stats::kmeans - K-Means Clustering},\n+ booktitle = {R: A Language and Environment for Statistical Computing},\n+ author = {{R Core Team}},\n+ publisher = {R Foundation for Statistical Computing},\n+ address = {Vienna, Austria},\n+ year = {2017},\n+ url = {https://stat.ethz.ch/R-manual/R-devel/library/stats/html/kmeans.html},\n+}\n+ ]]></citation>\n+ <!-- Forgy algorithm -->\n+ <citation type="bibtex"><![CDATA[\n+@article{forgy65,\n+ added-at = {2006-03-23T12:22:43.000+0100},\n+ author = {Forgy, E.},\n+ biburl = {https://www.bibsonomy.org/bibtex/21e31409932ce91df646c4731350e1207/hotho},\n+ interhash = {c86383cba8cfe00d5e6ef200016aca3f},\n+ intrahash = {1e31409932ce91df646c4731350e1207},\n+ journal = {Biometrics},\n+ keywords = {clustering kmeans},\n+ number = 3,\n+ pages = {768-769},\n+ timestamp = {2006-03-23T12:22:43.000+0100},\n+ title = {Cluster Analysis of Multivariate Data: Efficiency versus Interpretability of Classification},\n+ volume = 21,\n+ year = 1965\n+}\n+ ]]></citation>\n+ <!-- W4M 3.0 - Guitton et al. 2017-->\n+ <citation type="doi">10.1016/j.biocel.2017.07.002</citation>\n+ <!-- W4M 2.5 - Giacomini et al. 2014 -->\n+ <citation type="doi">10.1093/bioinformatics/btu813</citation>\n+ <!-- Hartigan and Wong algorithm -->\n+ <citation type="bibtex"><![CDATA[\n+@article{Hartigan79,\n+ added-at = {2007-02-27T16:22:09.000+0100},\n+ author = {Hartigan, J. and Wong, M.},\n+ biburl = {https://www.bibsonomy.org/bibtex/23d8bfc440c5725783876929c022f67ce/pierpaolo.pk81},\n+ description = {WSD},\n+ interhash = {10d6d33920d9af578a4d0a556dc1477d},\n+ intrahash = {3d8bfc440c5725783876929c022f67ce},\n+ journal = {Applied Statistics},\n+ keywords = {imported},\n+ pages = {100-108},\n+ timestamp = {2007-02-27T16:22:11.000+0100},\n+ title = {Algorithm AS136: A k-means clustering algorithm},\n+ volume = 28,\n+ year = 1979\n+}\n+ ]]></citation>\n+ <!-- Lloyd algorithm -->\n+ <citation type="doi">10.1109/TIT.1982.1056489</citation>\n+ <!-- MacQueen algorithm -->\n+ <citation type="bibtex"><![CDATA[\n+@inproceedings{MacQueen1967,\n+ added-at = {2011-01-11T13:35:01.000+0100},\n+ author = {MacQueen, J. B.},\n+ biburl = {https://www.bibsonomy.org/bibtex/25dcdb8cd9fba78e0e791af619d61d66d/enitsirhc},\n+ booktitle = {Proc. of the fifth Berkeley Symposium on Mathematical Statistics and Probability},\n+ editor = {Cam, L. M. Le and Neyman, J.},\n+ interhash = {8d7d4dfe7d3a06b8c9c3c2bb7aa91e28},\n+ intrahash = {5dcdb8cd9fba78e0e791af619d61d66d},\n+ keywords = {kmeans clustering},\n+ pages = {281-297},\n+ publisher = {University of California Press},\n+ timestamp = {2011-01-11T13:35:01.000+0100},\n+ title = {Some Methods for Classification and Analysis of MultiVariate Observations},\n+ volume = 1,\n+ year = 1967\n+}\n+ ]]></citation>\n+ </citations>\n+ <!--\n+ vim:et:sw=2:ts=2:\n+--> </tool>\n' |
b |
diff -r 000000000000 -r 6ccbe18131a6 w4mkmeans_routines.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/w4mkmeans_routines.R Tue Aug 08 15:30:38 2017 -0400 |
[ |
b'@@ -0,0 +1,216 @@\n+##------------------------------------------------------------------------------------------------------\n+## these are the batch-independent and file-structure-independent routines to support the w4mkmeans tool\n+##------------------------------------------------------------------------------------------------------\n+\n+library(parallel)\n+\n+w4kmeans_usage <- function() {\n+ return ( \n+ c(\n+ "w4mkmeans: bad input.",\n+ "# contract:",\n+ " required - caller will provide an environment comprising:",\n+ " log_print - a logging function with the signature function(x, ...) expecting strings as x and ...",\n+ " variableMetadata - the corresponding W4M data.frame having feature metadata",\n+ " sampleMetdata - the corresponding W4M data.frame having sample metadata",\n+ " dataMatrix - the corresponding W4M matrix",\n+ " slots - the number of parallel slots for calculating kmeans",\n+ " optional - environment may comprise:",\n+ " kfeatures - an array of integers, the k\'s to apply for clustering by feature (default, empty array)",\n+ " ksamples - an array of integers, the k\'s to apply for clustering by sample (default, empty array)",\n+ " iter.max - the maximum number of iterations when calculating a cluster (default = 10)",\n+ " nstart - how many random sets of centers should be chosen (default = 1)",\n+ " algorithm - string from c(\'Hartigan-Wong\', \'Lloyd\', \'Forgy\', \'MacQueen\') (default = Hartigan-Wong)",\n+ " ",\n+ " this routine will return a list comprising:",\n+ " variableMetadata - the input variableMetadata data.frame with updates, if any",\n+ " sampleMetadata - the input sampleMetadata data.frame with updates, if any",\n+ " scores - an array of strings, each representing a line of a tsv having the following header:",\n+ " clusterOn TAB k TAB totalSS TAB betweenSS TAB proportion"\n+ )\n+ )\n+}\n+\n+w4mkmeans <- function(env) {\n+ # abort if \'env\' is null or is not an environment\n+ if ( is.null(env) || ! is.environment(env) ) {\n+ lapply(w4kmeans_usage(),print)\n+ } \n+ # supply default arguments\n+ if ( ! exists("iter.max" , env) ) env$iter.max <- 10\n+ if ( ! exists("nstart" , env) ) env$nstart <- 1\n+ if ( ! exists("algorithm", env) ) env$algorithm <- \'Hartigan-Wong\'\n+ if ( ! exists("ksamples" , env) ) env$ksamples <- c()\n+ if ( ! exists("kfeatures", env) ) env$kfeatures <- c()\n+ # check mandatory arguments\n+ expected <- c(\n+ "log_print"\n+ , "variableMetadata"\n+ , "sampleMetadata"\n+ , "dataMatrix"\n+ , "slots"\n+ )\n+ missing_from_env <- setdiff(expected, (ls(env)))\n+ if ( length(missing_from_env) > 0 ) {\n+ print(paste(c(\'expected environment members not found: \', as.character(missing_from_env)), collapse = ", "))\n+ lapply(w4kmeans_usage(),print)\n+ stop("w4mkmeans: contract has been broken")\n+ } \n+ # extract parameters from \'env\'\n+ failure_action <- env$log_print\n+ scores <- c( "clusterOn\\tk\\ttotalSS\\tbetweenSS\\tproportion" )\n+ sampleMetadata <- env$sampleMetadata\n+ featureMetadata <- env$variableMetadata\n+ ksamples <- as.numeric(env$ksamples)\n+ kfeatures <- as.numeric(env$kfeatures)\n+ slots <- env$slots\n+\n+ myLapply <- parLapply\n+ # uncomment the next line to mimic parLapply, but without parallelization (for testing/experimentation)\n+ # myLapply <- function(cl, ...) lapply(...)\n+ cl <- NULL\n+ if ( identical(myLapply, parLapply) ) {\n+ failure_action(sprintf("w4mkmeans: using parallel evaluation with %d slots", slots))\n+ failure_action(names(cl))\n+ cl <- makePSOCKcluster(names = slots)\n+ # from ?makePSOCKcluster: "It is good practice to shut down the workers by calling stopCluster."\n+ clusterExport(\n+ cl = cl\n+ , varlist = c(\n+ "tryCatchFunc"\n+ , "calc_kmeans'..b'st[[i]]\n+ if (result$success) {\n+ sampleMetadata[sprintf("k%d",ksamples[i])] <- result$value$clusters\n+ scores <- c(scores, result$value$scores)\n+ }\n+ }\n+ }\n+\n+ # for each $i in kfeatures, append column \'k$i\' to data frame featureMetadata\n+ kfeatures_length <- length(kfeatures)\n+ if ( kfeatures_length > 0 ) {\n+ feat_result_list <- myLapply( \n+ cl = cl\n+ , kfeatures\n+ , calc_kmeans_one_dimension_one_k\n+ , env = env\n+ , dimension = "features"\n+ )\n+ for ( i in 1:kfeatures_length ) {\n+ result <- feat_result_list[[i]]\n+ if (result$success) {\n+ featureMetadata[sprintf("k%d",kfeatures[i])] <- result$value$clusters\n+ scores <- c(scores, result$value$scores)\n+ }\n+ }\n+ }\n+\n+ return ( \n+ list(\n+ variableMetadata = featureMetadata\n+ , sampleMetadata = sampleMetadata \n+ , scores = scores \n+ )\n+ )\n+ }\n+ , finally = final(cl)\n+ )\n+}\n+\n+# calculate k-means for features or samples\n+# - recall that the dataMatrix has features in rows and samples in columns\n+# return value:\n+# list(clusters = km$cluster, scores = scores) \n+# arguments:\n+# env:\n+# environment having dataMatrix\n+# dimension:\n+# - "samples": produce clusters column to add to the sampleMetadata table\n+# - this is the default case\n+# - "variables": produce clusters column to add to the variableMetadata table\n+# k:\n+# integer, the number of clusters to make\n+calc_kmeans_one_dimension_one_k <- function(k, env, dimension = "samples") {\n+ # abort if environment is not as expected\n+ if ( is.null(env) || ! is.environment(env) ) {\n+ stop("calc_kmeans_one_dimension_one_k - argument \'env\' is not an environment")\n+ } \n+ if ( ! exists("log_print", env) || ! is.function(env$log_print) ) {\n+ stop("calc_kmeans_one_dimension_one_k - argument \'env\' - environment does not include log_print or it is not a function")\n+ } \n+ # abort if k is not as expected\n+ if ( ! is.numeric(k) ) {\n+ stop(sprintf("calc_kmeans_one_dimension_one_k - expected numeric argument \'k\' but type is %s", typeof(k)))\n+ } \n+ k <- as.integer(k)\n+ # abort if dimension is not as expected\n+ if ( ! is.character(dimension) \n+ || ! Reduce( f =`|`, x = sapply(X = c("features","samples"), FUN = `==`, dimension), init = FALSE) ) {\n+ stop("calc_kmeans_one_dimension_one_k - argument \'dimension\' is neither \'features\' nor \'samples\'")\n+ } \n+ dm <- env$dataMatrix\n+ iter.max <- env$iter.max\n+ nstart <- env$nstart\n+ algorithm <- env$algorithm\n+ dim_features <- dimension == "features"\n+ # tryCatchFunc produces a list\n+ # On success of expr(), tryCatchFunc produces\n+ # list(success TRUE, value = expr(), msg = "")\n+ # On failure of expr(), tryCatchFunc produces\n+ # list(success = FALSE, value = NA, msg = "the error message")\n+ result_list <- tryCatchFunc( expr = function() {\n+ # kmeans clusters the rows; features are the columns of args_env$dataMatrix; samples, the rows\n+ # - to calculate sample-clusters, no transposition is needed because samples are rows\n+ # - to calculate feature-clusters, transposition is needed so that features will be the rows\n+ if ( ! dim_features ) dm <- t(dm)\n+ dm <- prepare.data.matrix( x.matrix = dm, data.transformation = function(x) { x } )\n+ # need to set.seed to get reproducible results from kmeans\n+ set.seed(4567)\n+ # do the k-means clustering\n+ km <- kmeans( x = dm, centers = k, iter.max, nstart = nstart, algorithm = algorithm )\n+ scores <-\n+ sprintf("%s\\t%d\\t%0.5e\\t%0.5e\\t%0.5f"\n+ , dimension\n+ , k\n+ , km$totss\n+ , km$betweenss\n+ , km$betweenss/km$totss\n+ )\n+ list(clusters = km$cluster, scores = scores)\n+ })\n+ return ( result_list )\n+}\n+\n' |
b |
diff -r 000000000000 -r 6ccbe18131a6 w4mkmeans_wrapper.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/w4mkmeans_wrapper.R Tue Aug 08 15:30:38 2017 -0400 |
[ |
b'@@ -0,0 +1,370 @@\n+#!/usr/bin/env Rscript\n+\n+# references:\n+# what this does:\n+# - [stats::kmeans](https://stat.ethz.ch/R-manual/R-devel/library/stats/html/kmeans.html)\n+# - [stats::p.adjust](https://stat.ethz.ch/R-manual/R-devel/library/stats/html/p.adjust.html)\n+# how this does what it does:\n+# - [parallel::clusterApply](https://stat.ethz.ch/R-manual/R-devel/library/parallel/html/clusterApply.html)\n+\n+# invocation:\n+# Rscript $__tool_directory__/w4mkmeans_wrapper.R \\\n+# tool_directory $__tool_directory__\n+# data_matrix_path \'$dataMatrix_in\' \\\n+# variable_metadata_path \'$variableMetadata_in\' \\\n+# sample_metadata_path \'$sampleMetadata_in\' \\\n+# kfeatures \'$kfeatures\' \\\n+# ksamples \'$ksamples\' \\\n+# iter_max \'$iter_max\' \\\n+# nstart \'$nstart\' \\\n+# algorithm \'$algorithm\' \\\n+# scores \'$scores\' \\\n+# sampleMetadata_out \'$sampleMetadata_out\' \\\n+# variableMetadata_out \'$variableMetadata_out\' \\\n+# slots "\\${GALAXY_SLOTS:-1}" \\\n+# \n+# <inputs>\n+# <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: \'.\', missing: NA, mode: numerical, separator: tab" />\n+# <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata columns, separator: tab" />\n+# <param name="variableMetadata_in" label="Variable metadata file" type="data" format="tabular" help="variable x metadata columns, separator: tab" />\n+# <param name="kfeatures" label="K value(s) for features" type="text" value="0" help="Single or min,max value(s) for K for features (variables), or 0 for none." />\n+# <param name="ksamples" label="K value(s) for samples" type="text" value="0" help="Single or min,max value(s) for K for samples, or 0 for none." />\n+# <param name="iter_max" label="Max number of iterations" type="text" value="10" help="The maximum number of iterations allowed; default 10." />\n+# <param name="nstart" label="Number of random sets" type="text" value="1" help="How many random sets should be chosen; default 1." />\n+# \t<param name="algorithm" label="Algorithm for clustering" type="select" value = "Hartigan-Wong" help="K-means clustering algorithm, default \'Hartigan-Wong\'; alternatives \'Lloyd\', \'MacQueen\'; \'Forgy\' is a synonym for \'Lloyd\', see stats::kmeans reference for further info and references.">\n+# \t <option value="Hartigan-Wong" selected="TRUE">Hartigan-Wong</option>\n+# \t <option value="Lloyd">Lloyd</option>\n+# \t <option value="MacQueen">MacQueen</option>\n+# \t <option value="Forgy">Forgy</option>\n+# \t</param>\n+# </inputs>\n+# <outputs>\n+# <data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data>\n+# <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data>\n+# </outputs>\n+\n+##------------------------\n+## libraries for this file\n+##------------------------\n+\n+library(batch) ## for \'parseCommandArgs\'\n+\n+##-------------------\n+## Pre-initialization\n+##-------------------\n+\n+argVc <- unlist(parseCommandArgs(evaluate=FALSE))\n+if ( Reduce( `|`, grepl("tool_directory",names(argVc)) ) ) {\n+ tool_directory <- as.character(argVc["tool_directory"])\n+} else {\n+ tool_directory <- "."\n+}\n+r_path <- function(f) paste( tool_directory, f, sep = "/" )\n+\n+##----------------------------------------------------------\n+## Computation - source general and module-specific routines\n+##----------------------------------------------------------\n+\n+log_print <- function(x, ...) { \n+ cat(\n+ format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")\n+ , " "\n+ , c(x, ...)\n+ , "\\n"\n+ , sep=""\n+ , file=stderr()\n+ )\n+}\n+\n+# log_print(sprintf("tool_directory is %s", tool_directory))\n+\n+w4m_general_purpose_routines_path <- r_path("w4m_general_purpose_routines.R")\n+# log_print(sprintf("w4m_general_purpose_routines_path is %s", w4m_general_purpose_routines_path))\n+if ( ! file.exists(w4m_general_purpose_routines_path) ) {\n+'..b'=F, sep=\'\\t\')\n+\n+## output files\n+sampleMetadata_out <- as.character(argVc["sampleMetadata_out"])\n+variableMetadata_out <- as.character(argVc["variableMetadata_out"])\n+scores_out <- as.character(argVc["scores_out"])\n+## input files\n+args_env$data_matrix_path <- as.character(argVc["data_matrix_path"])\n+args_env$variable_metadata_path <- as.character(argVc["variable_metadata_path"])\n+args_env$sample_metadata_path <- as.character(argVc["sample_metadata_path"])\n+ \n+# other parameters\n+\n+# multi-string args - split csv: "1,2,3" -> c("1","2","3")\n+args_env$kfeatures <- strsplit(x = as.character(argVc[\'kfeatures\']), split = ",", fixed = TRUE)[[1]]\n+args_env$ksamples <- strsplit(x = as.character(argVc[\'ksamples\' ]), split = ",", fixed = TRUE)[[1]]\n+# numeric args\n+args_env$iter_max <- as.numeric( argVc[\'iter_max\' ])\n+args_env$nstart <- as.numeric( argVc[\'nstart\' ])\n+args_env$slots <- as.numeric( argVc[\'slots\' ])\n+# string args\n+args_env$algorithm <- as.character( argVc[\'algorithm\'])\n+args_env$log_print <- log_print\n+\n+log_print("PARAMETERS (parsed):")\n+for (member in ls(args_env)) {\n+ value <- get(member, args_env)\n+ value <- ifelse(length(value) == 1, value, sprintf("c(%s)", paste(value, collapse=", ")))\n+ \n+ log_print(sprintf(" - %s: %s", member, ifelse( !is.function(value) , value, "function" )))\n+}\n+log_print("")\n+\n+##---------------------------------------------------------\n+## Computation - attempt to read input data\n+##---------------------------------------------------------\n+if ( ! read_input_data(args_env, failure_action = read_input_failure_action) ) {\n+ result <- -1\n+} else {\n+ log_print("Input data was read successfully.")\n+ result <- w4mkmeans(env = args_env)\n+ log_print("returned from call to w4mkmeans.")\n+}\n+\n+if ( length(result) == 0 ) {\n+ log_print("no results were produced")\n+ # exit with status code non-zero to indicate error\n+ q(save = "no", status = 1, runLast = FALSE)\n+} else if ( ! setequal(names(result),c("variableMetadata","sampleMetadata","scores")) ) {\n+ log_print(sprintf("unexpected result keys %s", names(result)))\n+ # exit with status code non-zero to indicate error\n+ q(save = "no", status = 1, runLast = FALSE)\n+} else if ( ! write_result(result = result$variableMetadata, file_path = variableMetadata_out, kind_string = "clustered variableMetadata")$success ) {\n+ log_print("failed to write output file for clustered variableMetadata")\n+ # exit with status code non-zero to indicate error\n+ q(save = "no", status = 1, runLast = FALSE)\n+} else if ( ! write_result(result = result$sampleMetadata, file_path = sampleMetadata_out, kind_string = "clustered sampleMetadata")$success ) {\n+ log_print("failed to write output file for clustered sampleMetadata")\n+ # exit with status code non-zero to indicate error\n+ q(save = "no", status = 1, runLast = FALSE)\n+} else {\n+ tryCatch(\n+ expr = {\n+ fileConn<-file(scores_out)\n+ writeLines(result$scores, fileConn)\n+ close(fileConn)\n+ }\n+ , error = function(e) {\n+ log_print(sprintf("failed to write output file for cluster scores - %s", format_error(e)))\n+ # exit with status code non-zero to indicate error\n+ q(save = "no", status = 1, runLast = FALSE)\n+ }\n+ )\n+}\n+\n+##--------\n+## Closing\n+##--------\n+\n+\n+if (!file.exists(sampleMetadata_out)) {\n+ log_print(sprintf("ERROR %s::w4m_kmeans_wrapper - file \'%s\' was not created", modNamC, sampleMetadata_out))\n+}\n+\n+if (!file.exists(variableMetadata_out)) {\n+ log_print(sprintf("ERROR %s::w4m_kmeans_wrapper - file \'%s\' was not created", modNamC, variableMetadata_out))\n+}\n+\n+if (!file.exists(scores_out)) {\n+ log_print(sprintf("ERROR %s::w4m_kmeans_wrapper - file \'%s\' was not created", modNamC, scores_out))\n+}\n+\n+log_print("Normal termination of \'", modNamC, "\' Galaxy module call")\n+\n+# exit with status code zero\n+q(save = "no", status = 0, runLast = FALSE)\n' |