Galaxy |

Changeset 2:c415b7dc6f37 (2018-03-05)

Previous changeset 1:02cafb660b72 (2017-08-09)

Commit message:
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 3e916537da6bb37e6f3927d7a11e98e0ab6ef5ec

modified:
README
test-data/input_sampleMetadata.tsv
test-data/input_variableMetadata.tsv
w4m_general_purpose_routines.R
w4mkmeans.xml
w4mkmeans_routines.R
w4mkmeans_wrapper.R

diff -r 02cafb660b72 -r c415b7dc6f37 README
--- a/README Wed Aug 09 18:06:55 2017 -0400
+++ b/README Mon Mar 05 12:40:17 2018 -0500

@@ -1,2 +1,5 @@
-# w4mkmeans_galaxy_wrapper
-Planemo-based galaxy-tool-wrapper to wrap the stats::kmeans R package for the W4M dataMatrix
+# Kmeans for W4m
+
+Kmeans for W4m is Galaxy tool-wrapper to wrap the R stats::kmeans package
+for use with the Workflow4Metabolomics flavor of Galaxy.
+This tool is built with Planemo.

diff -r 02cafb660b72 -r c415b7dc6f37 test-data/input_sampleMetadata.tsv
--- a/test-data/input_sampleMetadata.tsv Wed Aug 09 18:06:55 2017 -0400
+++ b/test-data/input_sampleMetadata.tsv Mon Mar 05 12:40:17 2018 -0500

@@ -1,25 +1,25 @@
-sampleMetadata class polarity sampleType injectionOrder batch tissue hotelling_pval missing_pval decile_pval PCA_XSCOR.p1 PCA_XSCOR.p2 class_PLSDA_XSCOR.p1 class_PLSDA_XSCOR.p2 class_PLSDA_predictions
-Y11_1_RA5_01_213 y1 positive sample 213 1 2 0.0955561581467602 1 0.0306775551319138 -2.26882060894901 1.94958116765736 -3.05527623038242 2.32594165405491 y1
-Y2_1_RB1_01_218 y2 positive sample 218 1 1 0.090775969547078 1 0.0334308308237932 -5.43790231069006 3.28509884002914 -5.09396184849057 3.13632363820691 y2
-Y4_1_RB3_01_220 y4 positive sample 220 1 1 0.0922380872343134 1 0.0343065627030201 -5.96519534532645 2.76065569212045 -5.61271725241037 2.60836215684335 y4
-Y12_1_RB4_01_221 y2 positive sample 221 1 2 0.0731025791938841 1 0.0335814402688999 -3.90074024447077 2.32567583717618 -4.06427508572245 2.47867307479093 y2
-Y1_1_RC1_01_228 y1 positive sample 228 1 1 0.948646526283138 1 0.0150153992414774 -5.79172889541087 3.18442356006801 -5.71894211121787 3.14075608795096 y1
-Y14_1_RC6_01_234 y4 positive sample 234 1 2 0.961424772615561 1 0.0889762542416943 -3.50543091786719 1.90332246047248 -3.60257250798236 1.95341548985651 y4
-Y1_2_RD1_01_239 y1 positive sample 239 1 1 0.391486624975171 1 0.419632697534464 -10.0290510611276 3.46916578350898 -9.30301404180818 3.22425994348737 y1
-Y14_2_RD2_01_240 y4 positive sample 240 1 2 0.334478686842038 1 0.471265236704114 -0.955577667004931 1.62643379077323 -1.17127923245195 1.73770179646644 y4
-Y4_2_RD3_01_241 y4 positive sample 241 1 1 0.243979127543208 1 0.115904447650611 -6.29053214398527 3.48768497975009 -5.86080882473825 3.19393908077519 y4
-Y11_2_RD7_01_246 y1 positive sample 246 1 2 0.639085015503201 1 0.496291025606805 -0.737703114796199 1.89206669195622 -1.33734677265265 2.19119862732508 y1
-Y2_2_RE4_01_253 y2 positive sample 253 1 1 0.681339414372971 1 0.713644697663014 -4.43122798643441 2.59136016132011 -4.21959323228049 2.4942150403311 y2
-Y12_2_RE6_01_255 y2 positive sample 255 1 2 0.581861317126264 1 0.446040669279691 -3.42333388673909 2.19844489197916 -3.40077262161465 2.21882800112511 y2
-Y14_3_GA2_01_260 y4 positive sample 260 1 2 0.792323381194401 1 0.812319191661791 -2.61403564986014 1.9025507158402 -2.90132077481451 2.05744453719897 y4
-Y2_3_GA4_01_264 y2 positive sample 264 1 1 0.278347988263537 1 0.668405316795454 -6.19672954480257 4.11371745717593 -5.72942704887795 3.94423530839635 y2
-Y1_3_GA6_01_266 y1 positive sample 266 1 1 0.303133108610158 1 0.521065147801524 -5.91283168480956 2.57721868167528 -5.8128281040434 2.56623732055011 y1
-Y4_3_GA7_01_267 y4 positive sample 267 1 1 0.204620420161485 1 0.53551459376182 -5.81862869528986 3.42191037440281 -5.37442797934098 3.22465790741629 y4
-Y12_3_GB1_01_270 y2 positive sample 270 1 2 0.7747649633382 1 0.698966513767803 -3.0854085700971 1.67899209632345 -3.14572560562774 1.75648836353689 y2
-Y11_3_GC3_01_283 y1 positive sample 283 1 2 0.918803505111851 1 0.396638581468035 -1.16946485386388 1.66851916844539 -1.7032576378218 1.94860768310552 y1
-Y14_4_GC7_01_287 y4 positive sample 287 1 2 0.577273975934045 1 0.14919566995266 -1.24666389579168 2.84891525888206 -1.58652468539139 2.90364189714377 y4
-Y11_4_GD8_01_299 y1 positive sample 299 1 2 0.31302025978985 1 0.426766355892969 -2.15936901108787 1.66989335813642 -2.66042240568943 1.95509478589954 y1
-Y2_4_GE1_01_300 y2 positive sample 300 1 1 0.0338929937565918 1 0.419149865807458 -5.76080121045973 3.47845733452933 -5.39212267305567 3.34452446158071 y2
-Y12_4_GE2_01_304 y2 positive sample 304 1 2 0.130905883509031 1 0.59698349195307 -4.15585900988913 3.22702525356271 -4.28382960930699 3.34874792551519 y2
-Y1_4_GE3_01_305 y1 positive sample 305 1 1 0.129479197101219 1 0.618449187175638 -2.9921645121991 3.14523577730793 -2.85639638001866 3.08197032598192 y1
-Y4_4_GE7_01_309 y4 positive sample 309 1 1 0.758837157578886 1 0.339564008612217 -5.95949084754462 3.20317151028856 -5.46675286145534 3.04585537553442 y4
+sampleMetadata class polarity sampleType injectionOrder batch tissue hotelling_pval missing_pval decile_pval
+Y11_1_RA5_01_213 y1 positive sample 213 1 2 0.0955561581467602 1 0.0306775551319138
+Y2_1_RB1_01_218 y2 positive sample 218 1 1 0.090775969547078 1 0.0334308308237932
+Y4_1_RB3_01_220 y4 positive sample 220 1 1 0.0922380872343134 1 0.0343065627030201
+Y12_1_RB4_01_221 y2 positive sample 221 1 2 0.0731025791938841 1 0.0335814402688999
+Y1_1_RC1_01_228 y1 positive sample 228 1 1 0.948646526283138 1 0.0150153992414774
+Y14_1_RC6_01_234 y4 positive sample 234 1 2 0.961424772615561 1 0.0889762542416943
+Y1_2_RD1_01_239 y1 positive sample 239 1 1 0.391486624975171 1 0.419632697534464
+Y14_2_RD2_01_240 y4 positive sample 240 1 2 0.334478686842038 1 0.471265236704114
+Y4_2_RD3_01_241 y4 positive sample 241 1 1 0.243979127543208 1 0.115904447650611
+Y11_2_RD7_01_246 y1 positive sample 246 1 2 0.639085015503201 1 0.496291025606805
+Y2_2_RE4_01_253 y2 positive sample 253 1 1 0.681339414372971 1 0.713644697663014
+Y12_2_RE6_01_255 y2 positive sample 255 1 2 0.581861317126264 1 0.446040669279691
+Y14_3_GA2_01_260 y4 positive sample 260 1 2 0.792323381194401 1 0.812319191661791
+Y2_3_GA4_01_264 y2 positive sample 264 1 1 0.278347988263537 1 0.668405316795454
+Y1_3_GA6_01_266 y1 positive sample 266 1 1 0.303133108610158 1 0.521065147801524
+Y4_3_GA7_01_267 y4 positive sample 267 1 1 0.204620420161485 1 0.53551459376182
+Y12_3_GB1_01_270 y2 positive sample 270 1 2 0.7747649633382 1 0.698966513767803
+Y11_3_GC3_01_283 y1 positive sample 283 1 2 0.918803505111851 1 0.396638581468035
+Y14_4_GC7_01_287 y4 positive sample 287 1 2 0.577273975934045 1 0.14919566995266
+Y11_4_GD8_01_299 y1 positive sample 299 1 2 0.31302025978985 1 0.426766355892969
+Y2_4_GE1_01_300 y2 positive sample 300 1 1 0.0338929937565918 1 0.419149865807458
+Y12_4_GE2_01_304 y2 positive sample 304 1 2 0.130905883509031 1 0.59698349195307
+Y1_4_GE3_01_305 y1 positive sample 305 1 1 0.129479197101219 1 0.618449187175638
+Y4_4_GE7_01_309 y4 positive sample 309 1 1 0.758837157578886 1 0.339564008612217

diff -r 02cafb660b72 -r c415b7dc6f37 test-data/input_variableMetadata.tsv
--- a/test-data/input_variableMetadata.tsv Wed Aug 09 18:06:55 2017 -0400
+++ b/test-data/input_variableMetadata.tsv Mon Mar 05 12:40:17 2018 -0500

[

b'@@ -1,50 +1,50 @@\n-variableMetadata\tnamecustom\tmz\tmzmin\tmzmax\trt\trtmin\trtmax\tnpeaks\tmy_blank\tpool\ty0\ty1\ty2\ty3\ty4\ty5\ty6\ty7\ty8\ty9\tisotopes\tadduct\tpcgroup\tCV.samp\tCV.pool\tCV.ind\tblank_mean\tblank_sd\tblank_CV\tsample_mean\tsample_sd\tsample_CV\tblankMean_over_sampleMean\tpool_mean\tpool_sd\tpool_CV\tpoolCV_over_sampleCV\tclass_kruskal_fdr\tclass_kruskal_sig\tclass_kruskal_y1.y0_dif\tclass_kruskal_y2.y0_dif\tclass_kruskal_y3.y0_dif\tclass_kruskal_y4.y0_dif\tclass_kruskal_y5.y0_dif\tclass_kruskal_y6.y0_dif\tclass_kruskal_y7.y0_dif\tclass_kruskal_y8.y0_dif\tclass_kruskal_y9.y0_dif\tclass_kruskal_y2.y1_dif\tclass_kruskal_y3.y1_dif\tclass_kruskal_y4.y1_dif\tclass_kruskal_y5.y1_dif\tclass_kruskal_y6.y1_dif\tclass_kruskal_y7.y1_dif\tclass_kruskal_y8.y1_dif\tclass_kruskal_y9.y1_dif\tclass_kruskal_y3.y2_dif\tclass_kruskal_y4.y2_dif\tclass_kruskal_y5.y2_dif\tclass_kruskal_y6.y2_dif\tclass_kruskal_y7.y2_dif\tclass_kruskal_y8.y2_dif\tclass_kruskal_y9.y2_dif\tclass_kruskal_y4.y3_dif\tclass_kruskal_y5.y3_dif\tclass_kruskal_y6.y3_dif\tclass_kruskal_y7.y3_dif\tclass_kruskal_y8.y3_dif\tclass_kruskal_y9.y3_dif\tclass_kruskal_y5.y4_dif\tclass_kruskal_y6.y4_dif\tclass_kruskal_y7.y4_dif\tclass_kruskal_y8.y4_dif\tclass_kruskal_y9.y4_dif\tclass_kruskal_y6.y5_dif\tclass_kruskal_y7.y5_dif\tclass_kruskal_y8.y5_dif\tclass_kruskal_y9.y5_dif\tclass_kruskal_y7.y6_dif\tclass_kruskal_y8.y6_dif\tclass_kruskal_y9.y6_dif\tclass_kruskal_y8.y7_dif\tclass_kruskal_y9.y7_dif\tclass_kruskal_y9.y8_dif\tclass_kruskal_y1.y0_fdr\tclass_kruskal_y2.y0_fdr\tclass_kruskal_y3.y0_fdr\tclass_kruskal_y4.y0_fdr\tclass_kruskal_y5.y0_fdr\tclass_kruskal_y6.y0_fdr\tclass_kruskal_y7.y0_fdr\tclass_kruskal_y8.y0_fdr\tclass_kruskal_y9.y0_fdr\tclass_kruskal_y2.y1_fdr\tclass_kruskal_y3.y1_fdr\tclass_kruskal_y4.y1_fdr\tclass_kruskal_y5.y1_fdr\tclass_kruskal_y6.y1_fdr\tclass_kruskal_y7.y1_fdr\tclass_kruskal_y8.y1_fdr\tclass_kruskal_y9.y1_fdr\tclass_kruskal_y3.y2_fdr\tclass_kruskal_y4.y2_fdr\tclass_kruskal_y5.y2_fdr\tclass_kruskal_y6.y2_fdr\tclass_kruskal_y7.y2_fdr\tclass_kruskal_y8.y2_fdr\tclass_kruskal_y9.y2_fdr\tclass_kruskal_y4.y3_fdr\tclass_kruskal_y5.y3_fdr\tclass_kruskal_y6.y3_fdr\tclass_kruskal_y7.y3_fdr\tclass_kruskal_y8.y3_fdr\tclass_kruskal_y9.y3_fdr\tclass_kruskal_y5.y4_fdr\tclass_kruskal_y6.y4_fdr\tclass_kruskal_y7.y4_fdr\tclass_kruskal_y8.y4_fdr\tclass_kruskal_y9.y4_fdr\tclass_kruskal_y6.y5_fdr\tclass_kruskal_y7.y5_fdr\tclass_kruskal_y8.y5_fdr\tclass_kruskal_y9.y5_fdr\tclass_kruskal_y7.y6_fdr\tclass_kruskal_y8.y6_fdr\tclass_kruskal_y9.y6_fdr\tclass_kruskal_y8.y7_fdr\tclass_kruskal_y9.y7_fdr\tclass_kruskal_y9.y8_fdr\tclass_kruskal_y1.y0_sig\tclass_kruskal_y2.y0_sig\tclass_kruskal_y3.y0_sig\tclass_kruskal_y4.y0_sig\tclass_kruskal_y5.y0_sig\tclass_kruskal_y6.y0_sig\tclass_kruskal_y7.y0_sig\tclass_kruskal_y8.y0_sig\tclass_kruskal_y9.y0_sig\tclass_kruskal_y2.y1_sig\tclass_kruskal_y3.y1_sig\tclass_kruskal_y4.y1_sig\tclass_kruskal_y5.y1_sig\tclass_kruskal_y6.y1_sig\tclass_kruskal_y7.y1_sig\tclass_kruskal_y8.y1_sig\tclass_kruskal_y9.y1_sig\tclass_kruskal_y3.y2_sig\tclass_kruskal_y4.y2_sig\tclass_kruskal_y5.y2_sig\tclass_kruskal_y6.y2_sig\tclass_kruskal_y7.y2_sig\tclass_kruskal_y8.y2_sig\tclass_kruskal_y9.y2_sig\tclass_kruskal_y4.y3_sig\tclass_kruskal_y5.y3_sig\tclass_kruskal_y6.y3_sig\tclass_kruskal_y7.y3_sig\tclass_kruskal_y8.y3_sig\tclass_kruskal_y9.y3_sig\tclass_kruskal_y5.y4_sig\tclass_kruskal_y6.y4_sig\tclass_kruskal_y7.y4_sig\tclass_kruskal_y8.y4_sig\tclass_kruskal_y9.y4_sig\tclass_kruskal_y6.y5_sig\tclass_kruskal_y7.y5_sig\tclass_kruskal_y8.y5_sig\tclass_kruskal_y9.y5_sig\tclass_kruskal_y7.y6_sig\tclass_kruskal_y8.y6_sig\tclass_kruskal_y9.y6_sig\tclass_kruskal_y8.y7_sig\tclass_kruskal_y9.y7_sig\tclass_kruskal_y9.y8_sig\tPCA_XLOAD.p1\tPCA_XLOAD.p2\tclass_PLSDA_XLOAD.p1\tclass_PLSDA_XLOAD.p2\tclass_PLSDA_VIP\tclass_PLSDA_y0.COEFF\tclass_PLSDA_y1.COEFF\tclass_PLSDA_y2.COEFF\tclass_PLSDA_y3.COEFF\tclass_PLSDA_y4.COEFF\tclass_PLSDA_y5.COEFF\tclass_PLSDA_y6.COEFF\tclass_PLSDA_y7.COEFF\tclass_PLSDA_y8.COEFF\tclass_PLSDA_y9.COEFF\n-M118T229\tM118T229.46\t118\t118\t118\t229.455291748047\t228.736724853516\t230.594131469727\t55\t0\t8\t7\t5\t7\t6\t5\t2\t3\t7\t3\t2\t\t\t118\t0.'..b'951\t700450.027146888\t1.48571473567732\t516530.140711984\t811624.468540371\t1.57130127473613\t0.912737842570226\t984267.219310835\t2418480.91825938\t2.45713854003261\t1.56376029189262\n+M237T507\tM237T506.53\t237\t237\t237\t506.528350830078\t505.624542236328\t507.116790771484\t25\t0\t4\t0\t4\t8\t1\t5\t0\t3\t0\t0\t0\t\t\t127\t1.01865281763957\t0.235510690078879\t1\t266747.756433983\t246188.472324805\t0.922926121726291\t395195.698150977\t426118.430841148\t1.07824663283242\t0.674976366600218\t171228.554719805\t159284.30294444\t0.930243808955165\t0.862737504230857\n+M245T458\tM245T457.72\t245\t245\t245\t457.721374511719\t457.333526611328\t458.681579589844\t17\t0\t5\t2\t1\t0\t0\t1\t2\t1\t4\t0\t1\t\t\t58\t1.52447574918459\t0.334348661020358\t1\t1430940.98768608\t2258010.24516098\t1.57798977357712\t846819.034346843\t1253911.39993487\t1.48073124135905\t1.68978368417259\t883292.908953789\t1776285.13300152\t2.01098085923211\t1.35809983814915\n+M246T512\tM246T512.25\t246\t246\t246\t512.251037597656\t511.492340087891\t513.263793945312\t48\t0\t7\t4\t3\t4\t0\t5\t2\t6\t8\t8\t1\t\t\t225\t1.36086854087834\t0.161038610753495\t1\t602860.498952085\t811567.686370073\t1.34619482912012\t759852.758985677\t1101614.20629598\t1.44977325313199\t0.793391208787396\t390439.831645514\t565254.077160012\t1.4477367095917\t0.998595267545533\n+M246T490\tM246T490.33\t246\t246\t246\t490.332672119141\t489.325378417969\t491.331573486328\t20\t0\t1\t6\t1\t0\t0\t1\t1\t1\t5\t4\t0\t\t\t104\t1.45774245205707\t0.248906664746694\t1\t193966.462671027\t323651.087764569\t1.66859303050492\t135031.53956809\t187343.420868419\t1.38740490901351\t1.43645301898687\t62694.2149545455\t56646.7427714119\t0.903540188077669\t0.651244767989265\n+M247T433\tM247T433.32\t247\t247\t247\t433.317993164062\t432.237213134766\t438.034820556641\t53\t0\t7\t5\t5\t6\t1\t7\t7\t4\t7\t0\t4\t[3][M]+\t\t3\t1.59824448907224\t0.193410302605114\t1\t16684736.0538948\t21487152.7308117\t1.28783294272107\t22804654.5731282\t38293603.669807\t1.67920121512957\t0.731637306778377\t10984823.9642482\t16518883.6528089\t1.50379138587674\t0.895539719914213\n+M247T452\tM247T451.94\t247\t247\t247\t451.937652587891\t450.494750976562\t452.778442382812\t42\t0\t7\t5\t4\t2\t0\t4\t7\t2\t7\t1\t3\t\t\t2\t1.8377037387191\t0.244212966980515\t1\t19304952.0532017\t33519245.0029629\t1.73630293981507\t26880324.4295464\t50698463.1885802\t1.88608077709261\t0.718181512421852\t11819323.2613698\t17126211.9235465\t1.44900105909801\t0.768260340011335\n+M248T433\tM248T433.34\t248\t248\t248\t433.344421386719\t432.868927001953\t434.076232910156\t21\t0\t2\t4\t4\t2\t1\t2\t3\t1\t1\t0\t1\t[3][M+1]+\t\t3\t1.59105092227126\t0.195371307889308\t1\t2480421.37630339\t3152657.65535375\t1.27101696730747\t3650972.15175104\t6060161.14033005\t1.65987602436889\t0.679386550542094\t1475786.50491227\t2195945.58877151\t1.4879832424691\t0.896442397277746\n+M251T497\tM251T497.27\t251\t251\t251\t497.265563964844\t496.463073730469\t498.130004882812\t40\t0\t10\t0\t7\t5\t8\t6\t4\t0\t0\t0\t0\t[4][M]+\t\t48\t1.96330612768638\t0.192700519895792\t1\t594224.888258475\t760181.506434167\t1.27928250979536\t830581.94802374\t1732410.35548682\t2.08577896450659\t0.715431980760423\t470765.000605745\t589833.527563223\t1.25292561427521\t0.600699132360655\n+M257T1014\tM257.125T1013.9\t257.125\t257.125\t257.125\t1013.90087890625\t1012.74542236328\t1014.60961914062\t73\t0\t7\t7\t6\t6\t7\t5\t8\t7\t8\t8\t4\t[6][M]+\t\t249\t0.881053983255579\t0.186470621075041\t1\t2202927.69616102\t2148769.26942137\t0.975415249972102\t2007043.93637984\t1762758.42680569\t0.878285918336807\t1.09759814233788\t2962754.71167807\t3892125.71725248\t1.31368476165481\t1.49573701937805\n+M261T332\tM261T331.57\t261\t261\t261\t331.569473266602\t330.826965332031\t332.320007324219\t46\t0\t9\t0\t6\t5\t5\t5\t5\t7\t0\t0\t4\t[7][M]+\t\t57\t1.04326999694138\t0.221252808937921\t1\t1146683.03090926\t1186248.76846245\t1.03450451126133\t1145403.0063442\t1327072.12823314\t1.1586071634898\t1.0011175320459\t919208.931999451\t1174249.82975397\t1.27745694028425\t1.10257987395527\n+M263T323\tM263T323.29\t263\t263\t263\t323.286376953125\t317.427062988281\t324.498107910156\t95\t0\t10\t8\t7\t3\t1\t8\t7\t5\t8\t2\t3\t[8][M]+\t\t25\t1.24549325872202\t0.169688974744568\t1\t4856126.41100817\t6417229.02375924\t1.32147075274076\t3269419.74893288\t4306448.22482372\t1.31719037490653\t1.48531751317437\t2850327.25225239\t3833675.62917697\t1.34499490405795\t1.02110896775524\n'

diff -r 02cafb660b72 -r c415b7dc6f37 w4m_general_purpose_routines.R
--- a/w4m_general_purpose_routines.R Wed Aug 09 18:06:55 2017 -0400
+++ b/w4m_general_purpose_routines.R Mon Mar 05 12:40:17 2018 -0500

[

@@ -1,3 +1,48 @@
+##-----------------------------------------------
+## helper functions for error detection/reporting
+##-----------------------------------------------
+
+# ISO 8601 date ref: https://en.wikipedia.org/wiki/ISO_8601
+iso_date <- function() {
+  format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")
+}
+
+# log-printing to stderr
+log_print <- function(x, ...) {
+  cat(
+    sep=""
+  , file=stderr()
+  , iso_date()
+  , " "
+  , c(x, ...)
+  , "\n"
+  )
+}
+
+# format error for logging
+format_error <- function(e) {
+  paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ")
+}
+
+# tryCatchFunc produces a list
+#   func - a function that takes no arguments
+#   On success of func(), tryCatchFunc produces
+#     list(success = TRUE, value = func(), msg = "")
+#   On failure of func(), tryCatchFunc produces
+#     list(success = FALSE, value = NA, msg = "the error message")
+tryCatchFunc <- function(func) {
+  retval <- NULL
+  tryCatch(
+    expr = {
+      retval <- ( list( success = TRUE, value = func(), msg = "" ) )
+    }
+  , error = function(e) {
+      retval <<- list( success = FALSE, value = NA, msg = format_error(e) )
+    }
+  )
+  return (retval)
+}
+
# prepare.data.matrix - Prepare x.datamatrix for multivariate statistical analaysis (MVA)
#   - Motivation:
#     - Selection:
@@ -7,7 +52,7 @@
#         - If so, set the argument 'exclude.features' to a vector of feature names
#     - Renaming samples:
#       - You may want to rename several samples from your analysis:
-#         - If so, set the argument 'sample.rename.function' to a function accepting a vector
+#         - If so, set the argument 'sample.rename.function' to a function accepting a vector
#           of sample names and producing a vector of strings of equivalent length
#     - MVA is confounded by missing values.
#       - By default, this function imputes missing values as zero.
@@ -19,7 +64,7 @@
#       - By default, this function performs an eigth-root transformation:
#         - Any root-tranformation has the advantage of never being negative.
#         - Calculation of the eight-root is four times faster in my hands than log10.
-#         - However, it has the disadvantage that calculation of fold-differences
+#         - However, it has the disadvantage that calculation of fold-differences
#           is not additive as with log-transformation.
#           - Rather, you must divide the values and raise to the eighth power.
#       - For a different transformation, set the 'data.transformation' argument
@@ -107,6 +152,13 @@
   }
, en = new.env()
) {
+  # log to environment
+  if ( !exists("log", envir = en) ) {
+    en$log <- c()
+  }
+  enlog <- function(s) { en$log <- c(en$log, s); s }
+  #enlog("foo")
+
   # MatVar - Compute variance of rows or columns of a matrix
   # ref: http://stackoverflow.com/a/25100036
   # For row variance, dim == 1, for col variance, dim == 2
@@ -137,11 +189,9 @@

   nonzero.var <- function(x) {
     if (nrow(x) == 0) {
-      print(str(x))
       stop("matrix has no rows")
     }
     if (ncol(x) == 0) {
-      print(str(x))
       stop("matrix has no columns")
     }
     if ( is.numeric(x) ) {
@@ -153,7 +203,7 @@
         row.names <- attr(nonzero.rows,"names")
         x <- x[ row.names, , drop = FALSE ]
       }
-
+
       # exclude any columns with zero variance
       column.vars <- MatVar(x, dim = 2)
       nonzero.column.vars <- column.vars > 0
@@ -170,10 +220,13 @@
     stop("FATAL ERROR - prepare.data.matrix was called with null x.matrix")
   }

+  enlog("prepare.data.matrix - get matrix")
+
   en$xpre <- x <- x.matrix

   # exclude any samples as indicated
   if ( !is.null(exclude.features) ) {
+    enlog("prepare.data.matrix - exclude any samples as indicated")
     my.colnames <- colnames(x)
     my.col.diff <- setdiff(my.colnames, exclude.features)
     x <- x[ , my.col.diff , drop = FALSE ]
@@ -181,6 +234,7 @@

   # exclude any features as indicated
   if ( !is.null(exclude.samples) ) {
+    enlog("prepare.data.matrix - exclude any features as indicated")
     my.rownames <- rownames(x)
     my.row.diff <- setdiff(my.rownames, exclude.samples)
     x <- x[ my.row.diff, , drop = FALSE ]
@@ -188,20 +242,25 @@

   # rename rows if desired
   if ( !is.null(sample.rename.function) ) {
+    enlog("prepare.data.matrix - rename rows if desired")
     renamed <- sample.rename.function(x)
     rownames(x) <- renamed
   }

+  enlog("prepare.data.matrix - save redacted x.datamatrix to environment")
+
   # save redacted x.datamatrix to environment
   en$redacted.data.matrix <- x

   # impute values missing from the x.datamatrix
   if ( !is.null(data.imputation) ) {
+    enlog("prepare.data.matrix - impute values missing from the x.datamatrix")
     x <- data.imputation(x)
   }

   # perform transformation if desired
   if ( !is.null(data.transformation) ) {
+    enlog("prepare.data.matrix - perform transformation")
     x <- data.transformation(x)
   } else {
     x <- x
@@ -209,6 +268,7 @@

   # purge rows and columns that have zero variance
   if ( is.numeric(x) ) {
+    enlog("prepare.data.matrix - purge rows and columns that have zero variance")
     x <- nonzero.var(x)
   }

@@ -218,66 +278,4 @@
   return(x)
}

-
-##-----------------------------------------------
-## helper functions for error detection/reporting
-##-----------------------------------------------
-
-# log-printing to stderr
-log_print <- function(x, ...) {
-  cat(
-    format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")
-  , " "
-  , c(x, ...)
-  , "\n"
-  , sep=""
-  , file=stderr()
-  )
-}
-
-# tryCatchFunc produces a list
-#   On success of expr(), tryCatchFunc produces
-#     list(success TRUE, value = expr(), msg = "")
-#   On failure of expr(), tryCatchFunc produces
-#     list(success = FALSE, value = NA, msg = "the error message")
-tryCatchFunc <- function(expr) {
-  # format error for logging
-  format_error <- function(e) {
-    paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ")
-  }
-  my_expr <- expr
-  retval <- NULL
-  tryCatch(
-    expr = {
-      retval <- ( list( success = TRUE, value = my_expr(), msg = "" ) )
-    }
-  , error = function(e) {
-      retval <<- list( success = FALSE, value = NA, msg = format_error(e) )
-    }
-  )
-  return (retval)
-}
-
-# tryCatchProc produces a list
-#   On success of expr(), tryCatchProc produces
-#     list(success TRUE, msg = "")
-#   On failure of expr(), tryCatchProc produces
-#     list(success = FALSE, msg = "the error message")
-tryCatchProc <- function(expr) {
-  # format error for logging
-  format_error <- function(e) {
-    paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ")
-  }
-  retval <- NULL
-  tryCatch(
-    expr = {
-      expr()
-      retval <- ( list( success = TRUE, msg = "" ) )
-    }
-  , error = function(e) {
-      retval <<- list( success = FALSE, msg = format_error(e) )
-    }
-  )
-  return (retval)
-}
-
+# vim: sw=2 ts=2 et :

diff -r 02cafb660b72 -r c415b7dc6f37 w4mkmeans.xml
--- a/w4mkmeans.xml Wed Aug 09 18:06:55 2017 -0400
+++ b/w4mkmeans.xml Mon Mar 05 12:40:17 2018 -0500

[

b'@@ -1,9 +1,11 @@\n-\xef\xbb\xbf<tool id="w4mkmeans" name="w4mKmeans" version="0.98.3">\n- <description>Calculate K-means for W4M dataMatrix features or samples</description>\n+\xef\xbb\xbf<tool id="w4mkmeans" name="Kmeans for W4m" version="0.98.4">\n+ <description>Calculate K-means for W4m dataMatrix features or samples</description>\n \n <requirements>\n- <requirement type="package" version="3.3.2">r-base</requirement>\n+ <requirement type="package" version="3.4.1">r-base</requirement>\n <requirement type="package" version="1.1_4">r-batch</requirement>\n+ <requirement type="package" version="1.8.0">libssh2</requirement>\n+ <requirement type="package" version="1.13.2">krb5</requirement>\n </requirements>\n \n <stdio>\n@@ -27,19 +29,18 @@\n slots "\\${GALAXY_SLOTS:-1}"\n variableMetadata_out \'$variableMetadata_out\'\n variable_metadata_path \'$variableMetadata_in\'\n- ; echo exit code $?\n ]]></command>\n \n <inputs>\n- <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: \'.\', missing: NA, mode: numerical, separator: tab" />\n- <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata columns, separator: tab" />\n- <param name="variableMetadata_in" label="Variable metadata file" type="data" format="tabular" help="variable x metadata columns, separator: tab" />\n- <param name="categoricalPrefix" label="prefix for cluster names " type="text" value="k" help="[categorical_prefix] Some tools require non-numeric values to discern categorical data; e.g., enter \'k\' here to prepend \'k\' to cluster numbers in the output; default \'k\'." />\n+ <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="Feature (variable) x sample; decimal point: \'.\'; missing: NA; mode: numerical; separator: tab" />\n+ <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="Sample x metadata columns; separator: tab" />\n+ <param name="variableMetadata_in" label="Variable (feature) metadata file" type="data" format="tabular" help="Feature (variable) x metadata columns; separator: tab" />\n+ <param name="categoricalPrefix" label="Prefix for cluster names " type="text" value="c" help="String prepended to cluster numbers in output; default \'c\'; leave blank for no prefix." />\n <param name="ksamples" label="K value(s) for samples" type="text" value = "0" help="[ksamples] Single K or comma-separated Ks for samples, or 0 for none." />\n <param name="kfeatures" label="K value(s) for features" type="text" value = "0" help="[kfeatures] Single K or comma-separated Ks for features (variables), or 0 for none." />\n- <param name="iter_max" label="Max number of iterations" type="text" value = "10" help="[iter_max] The maximum number of iterations allowed; default 10." />\n- <param name="nstart" label="Number of random sets" type="text" value = "1" help="[nstart] How many random sets should be chosen; default 1." />\n- <param name="algorithm" label="Algorithm for clustering" type="select" value = "Hartigan-Wong" help="[algorithm] K-means clustering algorithm, default \'Hartigan-Wong\'; alternatives \'Lloyd\', \'MacQueen\'; \'Forgy\' is a synonym for \'Lloyd\', see stats::kmeans reference for further info.">\n+ <param name="iter_max" label="Maximum number of iterations" type="text" value = "20" help="[iter_max] The maximum number of iterations allowed; default 20." />\n+ <param name="nstart" label="Number of random sets of clusters" type="text" value = "20" help="[nstart] How many random sets of clusters should be chosen initially; default 20." />\n+ <param name="algorithm" label="Algorithm for clustering" type="select" value = "Hartigan-Wong" help="[algorithm] K-means clustering algorithm, default \'Hartigan-Wong\'; <br />alternatives \'Lloyd\', \'MacQueen\'; \'Forgy\' (synonym for \'Lloyd\'); see references.">\n <option value="Forgy">Forgy</option>\n <opt'..b'v |\n++-------------------------------------------------------------------------------------------------------------------+\n+| https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/input_sampleMetadata.tsv |\n++-------------------------------------------------------------------------------------------------------------------+\n+| https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/input_variableMetadata.tsv |\n++-------------------------------------------------------------------------------------------------------------------+\n+\n+.. class:: infomark\n \n **Other input parameters**\n \n +-----------------+---------------+\n | Input Parameter | Value |\n +=================+===============+\n+| prefix | c |\n++-----------------+---------------+\n | ksamples | 3,4 |\n +-----------------+---------------+\n | kfeatures | 5,6,7 |\n +-----------------+---------------+\n-| iter_max | 10 |\n+| iter_max | 20 |\n +-----------------+---------------+\n-| nstart | 1 |\n+| nstart | 20 |\n +-----------------+---------------+\n | algorithm | Hartigan-Wong |\n +-----------------+---------------+\n \n+.. class:: infomark\n+\n+**Expected output files**\n+\n++-------------------------------------------------------------------------------------------------------------------+\n+| URL |\n++===================================================================================================================+\n+| https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/output_kmeans-score.tsv |\n++-------------------------------------------------------------------------------------------------------------------+\n+| https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/output_kmeans-vrbl.tsv |\n++-------------------------------------------------------------------------------------------------------------------+\n+| https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/output_kmeans-smpl.tsv |\n++-------------------------------------------------------------------------------------------------------------------+\n+\n ----\n NEWS\n ----\n \n+- February 2018, Version 0.98.4 - Renamed output datasets to append \'``.kmeans-smpl``\', \'``.kmeans-vrbl``\', or \'``.kmeans-score``\'; refactored multi-threading.\n - August 2017, Version 0.98.3 - Add (optional) prefix to category numbers for downstream tools that treat only non-numeric data as categorical.\n - August 2017, Version 0.98.1 - First release\n \n@@ -277,28 +284,12 @@\n year = 1965\n }\n ]]></citation>\n- \n+ \n <citation type="doi">10.1016/j.biocel.2017.07.002</citation>\n- \n+ \n <citation type="doi">10.1093/bioinformatics/btu813</citation>\n \n- <citation type="bibtex"><![CDATA[\n-@article{Hartigan79,\n- added-at = {2007-02-27T16:22:09.000+0100},\n- author = {Hartigan, J. and Wong, M.},\n- biburl = {https://www.bibsonomy.org/bibtex/23d8bfc440c5725783876929c022f67ce/pierpaolo.pk81},\n- description = {WSD},\n- interhash = {10d6d33920d9af578a4d0a556dc1477d},\n- intrahash = {3d8bfc440c5725783876929c022f67ce},\n- journal = {Applied Statistics},\n- keywords = {imported},\n- pages = {100-108},\n- timestamp = {2007-02-27T16:22:11.000+0100},\n- title = {Algorithm AS136: A k-means clustering algorithm},\n- volume = 28,\n- year = 1979\n-}\n- ]]></citation>\n+ <citation type="doi">10.2307/2346830</citation>\n \n <citation type="doi">10.1109/TIT.1982.1056489</citation>\n \n'

diff -r 02cafb660b72 -r c415b7dc6f37 w4mkmeans_routines.R
--- a/w4mkmeans_routines.R Wed Aug 09 18:06:55 2017 -0400
+++ b/w4mkmeans_routines.R Mon Mar 05 12:40:17 2018 -0500

[

b'@@ -4,11 +4,11 @@\n \n library(parallel)\n \n-w4kmeans_usage <- function() {\n- return ( \n+w4mkmeans_usage <- function() {\n+ return (\n c(\n "w4mkmeans: bad input.",\n- "# contract:",\n+ " contract:",\n " required - caller will provide an environment comprising:",\n " log_print - a logging function with the signature function(x, ...) expecting strings as x and ...",\n " variableMetadata - the corresponding W4M data.frame having feature metadata",\n@@ -18,8 +18,8 @@\n " optional - environment may comprise:",\n " kfeatures - an array of integers, the k\'s to apply for clustering by feature (default, empty array)",\n " ksamples - an array of integers, the k\'s to apply for clustering by sample (default, empty array)",\n- " iter.max - the maximum number of iterations when calculating a cluster (default = 10)",\n- " nstart - how many random sets of centers should be chosen (default = 1)",\n+ " iter_max - the maximum number of iterations when calculating a cluster (default = 20)",\n+ " nstart - how many random sets of centers should be chosen (default = 20)",\n " algorithm - string from c(\'Hartigan-Wong\', \'Lloyd\', \'Forgy\', \'MacQueen\') (default = Hartigan-Wong)",\n " categorical_prefix - string from c(\'Hartigan-Wong\', \'Lloyd\', \'Forgy\', \'MacQueen\') (default = Hartigan-Wong)",\n " ",\n@@ -35,13 +35,15 @@\n w4mkmeans <- function(env) {\n # abort if \'env\' is null or is not an environment\n if ( is.null(env) || ! is.environment(env) ) {\n- lapply(w4kmeans_usage(),print)\n- } \n+ lapply(w4mkmeans_usage(),print)\n+ }\n+ # extract parameters from \'env\'\n+ log_action <- env$log_print\n # supply default arguments\n- if ( ! exists("iter.max" , env) ) env$iter.max <- 10\n- if ( ! exists("nstart" , env) ) env$nstart <- 1\n+ if ( ! exists("iter_max" , env) ) env$iter_max <- 20\n+ if ( ! exists("nstart" , env) ) env$nstart <- 20\n if ( ! exists("algorithm" , env) ) env$algorithm <- \'Hartigan-Wong\'\n- if ( ! exists("categorical_prefix", env) ) env$categorical_prefix <- \'k\'\n+ if ( ! exists("categorical_prefix", env) ) env$categorical_prefix <- \'c\'\n if ( ! exists("ksamples" , env) ) env$ksamples <- c()\n if ( ! exists("kfeatures" , env) ) env$kfeatures <- c()\n # check mandatory arguments\n@@ -55,11 +57,11 @@\n missing_from_env <- setdiff(expected, (ls(env)))\n if ( length(missing_from_env) > 0 ) {\n print(paste(c(\'expected environment members not found: \', as.character(missing_from_env)), collapse = ", "))\n- lapply(w4kmeans_usage(),print)\n+ lapply(w4mkmeans_usage(),log_action)\n stop("w4mkmeans: contract has been broken")\n- } \n+ }\n # extract parameters from \'env\'\n- failure_action <- env$log_print\n+ log_action <- env$log_print\n scores <- c( "clusterOn\\tk\\ttotalSS\\tbetweenSS\\tproportion" )\n sampleMetadata <- env$sampleMetadata\n featureMetadata <- env$variableMetadata\n@@ -70,39 +72,79 @@\n i <- i[i > 0] # eliminate non-positive integers\n i <- unique(sort(i)) # eliminate redundancy and disorder\n if (length(a)!=length(i)) {\n- failure_action("Some values for \'", what, "\' were skipped where not unique, not positive, or not convertible to an integer.")\n+ log_action("Some values for \'", what, "\' were skipped where not unique, not positive, or not convertible to an integer.")\n }\n return (i) # return results, if any\n }\n ksamples <- positive_ints(env$ksamples , "ksamples")\n kfeatures <- positive_ints(env$kfeatures, "kfeatures")\n \n+ log_action("w4mkmeans: preparing data matrix")\n+ # prepare data matrix (normalize, eliminate zero-variance rows, etc.; no transformation)\n+ dm_en <- new.env()\n+ dm_en$log <- c()\n+ preparation_result <- tryCatchFunc(function(){\n+ dm <- prepare'..b'cores \n+ , sampleMetadata = sampleMetadata\n+ , scores = scores\n )\n )\n }\n- , finally = final(cl)\n+ , finally = {\n+ final(cl)\n+ }\n )\n }\n \n # calculate k-means for features or samples\n # - recall that the dataMatrix has features in rows and samples in columns\n # return value:\n-# list(clusters = km$cluster, scores = scores) \n+# list(clusters = km$cluster, scores = scores)\n # arguments:\n # env:\n # environment having dataMatrix\n@@ -179,40 +223,64 @@\n # abort if environment is not as expected\n if ( is.null(env) || ! is.environment(env) ) {\n stop("calc_kmeans_one_dimension_one_k - argument \'env\' is not an environment")\n- } \n+ }\n if ( ! exists("log_print", env) || ! is.function(env$log_print) ) {\n stop("calc_kmeans_one_dimension_one_k - argument \'env\' - environment does not include log_print or it is not a function")\n- } \n+ }\n+ log_action <- env$log_print\n # abort if k is not as expected\n if ( ! is.numeric(k) ) {\n stop(sprintf("calc_kmeans_one_dimension_one_k - expected numeric argument \'k\' but type is %s", typeof(k)))\n- } \n+ }\n k <- as.integer(k)\n # abort if dimension is not as expected\n- if ( ! is.character(dimension) \n+ if ( ! is.character(dimension)\n || ! Reduce( f =`|`, x = sapply(X = c("features","samples"), FUN = `==`, dimension), init = FALSE) ) {\n stop("calc_kmeans_one_dimension_one_k - argument \'dimension\' is neither \'features\' nor \'samples\'")\n- } \n- dm <- env$dataMatrix\n- iter.max <- env$iter.max\n+ }\n+ dm <- env$preparedDataMatrix\n+ iter_max <- env$iter_max\n nstart <- env$nstart\n algorithm <- env$algorithm\n dim_features <- dimension == "features"\n+\n # tryCatchFunc produces a list\n- # On success of expr(), tryCatchFunc produces\n- # list(success TRUE, value = expr(), msg = "")\n- # On failure of expr(), tryCatchFunc produces\n+ # On success of func(), tryCatchFunc produces\n+ # list(success = TRUE, value = func(), msg = "")\n+ # On failure of func(), tryCatchFunc produces\n # list(success = FALSE, value = NA, msg = "the error message")\n- result_list <- tryCatchFunc( expr = function() {\n+ result_list <- tryCatchFunc( func = function() {\n # kmeans clusters the rows; features are the columns of args_env$dataMatrix; samples, the rows\n # - to calculate sample-clusters, no transposition is needed because samples are rows\n # - to calculate feature-clusters, transposition is needed so that features will be the rows\n- if ( ! dim_features ) dm <- t(dm)\n- dm <- prepare.data.matrix( x.matrix = dm, data.transformation = function(x) { x } )\n+ if ( ! dim_features ) {\n+ dm <- t(dm)\n+ }\n+\n # need to set.seed to get reproducible results from kmeans\n set.seed(4567)\n+\n # do the k-means clustering\n- km <- kmeans( x = dm, centers = k, iter.max, nstart = nstart, algorithm = algorithm )\n+ withCallingHandlers(\n+ {\n+ km <<- kmeans( x = dm, centers = k, iter.max = iter_max, nstart = nstart, algorithm = algorithm )\n+ }\n+ , warning = function(w) {\n+ lw <- list(w)\n+ smplwrn <- as.character(w[[1]])\n+ log_print(\n+ sprintf( "Warning for %s: center = %d, nstart = %d, iter_max = %d: %s"\n+ , if (dim_features) "features" else "samples"\n+ , k\n+ , nstart\n+ , iter_max\n+ , smplwrn\n+ )\n+ )\n+ }\n+ )\n+\n+ # collect the scores\n scores <-\n sprintf("%s\\t%d\\t%0.5e\\t%0.5e\\t%0.5f"\n , dimension\n@@ -221,8 +289,16 @@\n , km$betweenss\n , km$betweenss/km$totss\n )\n+\n+ # return list of results\n list(clusters = km$cluster, scores = scores)\n })\n+\n+ # return either\n+ # list(success = TRUE, value = func(), msg = "")\n+ # or\n+ # list(success = FALSE, value = NA, msg = "the error message")\n return ( result_list )\n }\n \n+# vim: sw=2 ts=2 et :\n'

diff -r 02cafb660b72 -r c415b7dc6f37 w4mkmeans_wrapper.R
--- a/w4mkmeans_wrapper.R Wed Aug 09 18:06:55 2017 -0400
+++ b/w4mkmeans_wrapper.R Mon Mar 05 12:40:17 2018 -0500

[

@@ -22,7 +22,7 @@
#     slots "${GALAXY_SLOTS:-1}" \
#     variableMetadata_out "$variableMetadata_out" \
#     variable_metadata_path "$variableMetadata_in"
-#
+#
# <inputs>
#   <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: '.', missing: NA, mode: numerical, separator: tab" />
#   <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata columns, separator: tab" />
@@ -30,8 +30,8 @@
#   <param name="categoricalPrefix" label="prefix for cluster names " type="text" value="k" help="Some tools require non-numeric values to discern categorical; e.g., enter 'k' here to prepend 'k' to cluster numbers in the output; default 'k'." />
#   <param name="kfeatures" label="K value(s) for features" type="text" value="0" help="Single or min,max value(s) for K for features (variables), or 0 for none." />
#   <param name="ksamples" label="K value(s) for samples" type="text" value="0" help="Single or min,max value(s) for K for samples, or 0 for none." />
-#   <param name="iter_max" label="Max number of iterations" type="text" value="10" help="The maximum number of iterations allowed; default 10." />
-#   <param name="nstart" label="Number of random sets" type="text" value="1" help="How many random sets should be chosen; default 1." />
+#   <param name="iter_max" label="Max number of iterations" type="text" value="20" help="The maximum number of iterations allowed; default 20." />
+#   <param name="nstart" label="Number of random sets" type="text" value="20" help="How many random sets should be chosen; default 20." />
# <param name="algorithm" label="Algorithm for clustering" type="select" value = "Hartigan-Wong" help="K-means clustering algorithm, default 'Hartigan-Wong'; alternatives 'Lloyd', 'MacQueen'; 'Forgy' is a synonym for 'Lloyd', see stats::kmeans reference for further info and references.">
#   <option value="Hartigan-Wong" selected="TRUE">Hartigan-Wong</option>
#   <option value="Lloyd">Lloyd</option>
@@ -66,7 +66,7 @@
## Computation - source general and module-specific routines
##----------------------------------------------------------

-log_print <- function(x, ...) {
+log_print <- function(x, ...) {
   cat(
     format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")
   , " "
@@ -77,6 +77,15 @@
   )
}

+log_cat <- function(x, ...) {
+  cat(
+    c(x, ...)
+  , "\n"
+  , sep=""
+  , file=stderr()
+  )
+}
+
# log_print(sprintf("tool_directory is %s", tool_directory))

w4m_general_purpose_routines_path <- r_path("w4m_general_purpose_routines.R")
@@ -85,7 +94,7 @@
   log_print("cannot find file w4m_general_purpose_routines.R")
   q(save = "no", status = 1, runLast = TRUE)
}
-# log_print("sourcing ",w4m_general_purpose_routines_path)
+log_print("sourcing ",w4m_general_purpose_routines_path)
source(w4m_general_purpose_routines_path)
if ( ! exists("prepare.data.matrix") ) {
   log_print("'prepare.data.matrix' was not read from file w4m_general_purpose_routines.R")
@@ -164,7 +173,7 @@
     expr = {
       # read in the sample metadata
       kind_string <- "sample metadata input"
-      smpl_metadata_input_env <-
+      smpl_metadata_input_env <-
         read_data_frame(
                          file_path = env$sample_metadata_path
                        , kind_string = kind_string
@@ -178,7 +187,7 @@

       # read in the variable metadata
       kind_string <- "variable metadata input"
-      vrbl_metadata_input_env <-
+      vrbl_metadata_input_env <-
         read_data_frame(
                          file_path = env$variable_metadata_path
                        , kind_string = kind_string
@@ -218,7 +227,7 @@
}

-read_input_failure_action <- function(x, ...) {
+read_input_failure_action <- function(x, ...) {
   log_print("Failure reading input for '", modNamC, "' Galaxy module call")
   log_print(x, ...)
}
@@ -238,7 +247,7 @@

# Set the handler for R error-handling
options( show.error.messages = F
-       , error = function () {
+       , error = function () {
                    log_print( "Fatal error in '", modNamC, "': ", geterrmessage() )
                    q( "no", 1, F )
                  }
@@ -283,7 +292,7 @@
args_env$data_matrix_path       <- as.character(argVc["data_matrix_path"])
args_env$variable_metadata_path <- as.character(argVc["variable_metadata_path"])
args_env$sample_metadata_path   <- as.character(argVc["sample_metadata_path"])
-
+
# other parameters

# multi-string args - split csv: "1,2,3" -> c("1","2","3")
@@ -305,20 +314,21 @@
for (member in ls(args_env)) {
   value <- get(member, args_env)
   value <- ifelse(length(value) == 1, value, sprintf("c(%s)", paste(value, collapse=", ")))
-
+
   log_print(sprintf("  - %s: %s", member, ifelse( !is.function(value) , value, "function" )))
}
log_print("")

##---------------------------------------------------------
-## Computation - attempt to read input data
+## Computation - attempt to read input data and process
##---------------------------------------------------------
if ( ! read_input_data(args_env, failure_action = read_input_failure_action) ) {
   result <- -1
} else {
-  log_print("Input data was read successfully.")
+  log_print("Input data was read.")
+  # attempt to process the data
   result <- w4mkmeans(env = args_env)
-  log_print("returned from call to w4mkmeans.")
+  log_print("Returned from call to w4mkmeans.")
}

if ( length(result) == 0 ) {
@@ -356,7 +366,6 @@
## Closing
##--------

-
if (!file.exists(sampleMetadata_out)) {
   log_print(sprintf("ERROR %s::w4m_kmeans_wrapper - file '%s' was not created", modNamC, sampleMetadata_out))
}