Previous changeset 6:46a10309dfe2 (2016-06-28) |
Commit message:
Uploaded |
modified:
R/compareSignature_Galaxy.r R/estimateSign_Galaxy.r R/mutationSpectra_Galaxy.r R/somaticSignature_Galaxy.r R/transciptionalStrandBias.r README.txt hg19_listAVDB.txt mm9_listAVDB.txt mutspecAnnot.pl mutspecAnnot.xml mutspecAnnot_wrapper.sh mutspecCompare.xml mutspecCompare_wrapper.sh mutspecFilter.pl mutspecFilter.xml mutspecNmf.xml mutspecSplit.pl mutspecSplit.xml mutspecStat.pl mutspecStat.xml mutspecStat_wrapper.sh tool-data/annovar_index.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml |
added:
Frequency-COSMIC30-Hupki-Others.txt Frequency-COSMIC30-Hupki.txt R/.Rhistory R/chi2test_MutSpecStat_Galaxy.r R/figs_MutSpecStat_Galaxy.r hg18_listAVDB.txt hg38_listAVDB.txt mm10_listAVDB.txt rn6_listAVDB.txt |
removed:
Frequency-COSMICv72-Hupki.txt mutspecNmf_wrapper.sh |
b |
diff -r 46a10309dfe2 -r eda59b985b1c Frequency-COSMIC30-Hupki-Others.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Frequency-COSMIC30-Hupki-Others.txt Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -0,0 +1,97 @@\n+Substitution Type\tTrinucleotide\tSomatic Mutation Type\tSignature 1\tSignature 2\tSignature 3\tSignature 4\tSignature 5\tSignature 6\tSignature 7\tSignature 8\tSignature 9\tSignature 10\tSignature 11\tSignature 12\tSignature 13\tSignature 14\tSignature 15\tSignature 16\tSignature 17\tSignature 18\tSignature 19\tSignature 20\tSignature 21\tSignature 22\tSignature 23\tSignature 24\tSignature 25\tSignature 26\tSignature 27\tSignature 28\tSignature 29\tSignature 30\tSignature 1 MEF\tSignature 2 MEF\tSignature 3 MEF\tSignature 5 MEF\tSignature DMBA\tSignature MNU\tSignature Urethane\n+C>A\tACA\tA[C>A]A\t0.0110983262\t0.0006827082\t0.0221723068\t0.0365\t0.0149415477\t0.0017\t0.0004\t0.0367180038\t0.012\t0.0007\t0.0002\t0.0077\t0.0003347572\t0.0001\t0.0013\t0.0161\t0.0018320192\t0.0505364186\t0.0107\t0.0011799616\t0.0001\t0.0015040704\t0.0004533607\t0.0286459925\t0.009896768\t0.0020397729\t0.0052056269\t0.0013974388\t0.0699819873\t0\t0.000781083\t0.0037229109\t0.0283533537\t0.0003710632\t0.0121138158\t0.0010972115\t0.0044388346\n+C>A\tACC\tA[C>A]C\t0.0091493407\t0.0006191072\t0.0178716754\t0.0309\t0.008960918\t0.0028\t0.0005\t0.0332457222\t0.0067\t0.001\t0.001\t0.0047\t0.0006487361\t0.0042\t0.004\t0.0097\t0.0003422356\t0.0109398248\t0.0074\t0.0022115051\t0.0007\t0.002451011\t0.0003668005\t0.0202146384\t0.0069989288\t0.0014871623\t0.0047382274\t0.0009171877\t0.0551523572\t0\t0.0022972224\t0.0070460466\t0.015676074\t0.001672691\t0.0046539767\t0.0009508214\t0.0004001609\n+C>A\tACG\tA[C>A]G\t0.0014900705\t0.000099279\t0.0021383396\t0.0183\t0.002207846\t0.0005\t0\t0.0025253113\t0.0005\t0.0003\t0\t0.0017\t3.8144594E-005\t0.0005\t0\t0.0022\t1.576225E-006\t0.0022880727\t0.0005\t1.61691E-007\t0\t0\t0\t0.0204789965\t0.001448443\t0.0002839456\t0.0007826979\t0\t0.017846984\t0.0019673\t0.0031701397\t0.0025537924\t0.0272331284\t0.0007812591\t0.0071227909\t0.0003972991\t1.63618104265282E-020\n+C>A\tACT\tA[C>A]T\t0.0062338852\t0.0003238914\t0.0162651456\t0.0243\t0.0092069053\t0.0019\t0.0004\t0.0335985495\t0.0068\t0.0092\t0.0002\t0.0046\t0.0008466585\t0.0296\t0.0057\t0.0088\t0.0031796648\t0.0194240914\t0.0074\t0.00300801\t0.0006\t0.0009224525\t0\t0.0246001454\t0.004966565\t0.0005978656\t0.0027182425\t0.00051341\t0.026804716\t0\t0.0015620621\t0.0104484061\t0.0079498813\t0.0004287024\t0.0083172408\t0.0002539234\t0.0015287933\n+C>A\tCCA\tC[C>A]A\t0.0065958701\t0.000677445\t0.0187817256\t0.0461\t0.0096749043\t0.0101\t0.0012\t0.0317237566\t0.0098\t0.0031\t0.0007\t0.0135\t0.0017100896\t0.0056\t0.0106\t0.0159\t0.0010324302\t0.0887681088\t0.0112\t0.0173771106\t0.002\t0.0045496929\t0.0001647394\t0.0635592838\t0.0148329479\t0.0037058501\t0.0050650733\t0.0011685156\t0.0514102117\t0\t0.0094052338\t0.0035222831\t0.0414403498\t0.0050521059\t0.0224513894\t0.0008540231\t0.0031599431\n+C>A\tCCC\tC[C>A]C\t0.0073423678\t0.000213681\t0.0157604578\t0.0614\t0.0049523006\t0.0241\t0.0006\t0.0255054071\t0.0057\t0.0009\t0.0017\t0.0112\t0.0011592566\t0.0102\t0.0084\t0.01\t0.0004218801\t0.0206413906\t0.0159\t0.036502463\t0.0014\t0.0037644739\t0.0007368748\t0.0337570047\t0.0078221753\t0.0039807234\t0.0022341533\t0.0003342918\t0.0258256508\t0\t0.0031118726\t0.0059886212\t0.0390237536\t0.0007556355\t0.0096511373\t0.0007989301\t0.0042099735\n+C>A\tCCG\tC[C>A]G\t0.0008928404\t6.77046E-006\t0.0019633898\t0.0088\t0.0028006273\t0.0091\t0\t0.0011596243\t0\t0.0007\t0.001\t0.0028\t0.0002441665\t0.0009\t0.0015\t0.0022\t0.0002974628\t0.0171784025\t0.0018\t0.0124825875\t0.0027\t0.0009001633\t0.0001639537\t0.0224289858\t0.0012769767\t0.000811742\t0.0002663122\t0.000053652\t0.0144961833\t0.0022624\t0.0031056722\t0.0027351313\t0.0444175322\t0.000426758\t0.0104292165\t0.0004936949\t0.001053447\n+C>A\tCCT\tC[C>A]T\t0.0071865816\t0.0004163329\t0.0147228611\t0.0432\t0.0110134658\t0.0571\t0.0013\t0.028791173\t0.0091\t0.016\t0.0014\t0.0071\t0.0012567682\t0.1257\t0.0228\t0.0084\t3.1479429E-005\t0.0376769589\t0.0096\t0.1034012262\t0.0056\t0.0044398462\t0.0007227318\t0.0200865154\t0.0125636547\t0.0190384313\t0.00310057\t0.0001866719\t0.0403550741\t0\t0.0069708534\t0.0135089174\t0.0262144919\t0.0012449334\t0.0241632334\t0.0004829446\t0.0011918687\n+C>A\tGCA\tG[C>A]A\t0.008232604\t0.0003520134\t0.0096965397\t0.0376\t0.011892169\t0.0024\t0.0003\t0.0236823289\t0.0118\t0.0014\t0.0004\t0.0062\t0.0001321096\t0.0018\t0.0024\t0.0096\t0.0065354049\t0.1287241581\t0.00'..b'376549\t0.0060513622\t2.28346914246428E-020\t0.0088482202\t0.0009775125\t0.0090219017\n+T>G\tCTG\tC[T>G]G\t0.0015995485\t0.0002282459\t0.0104646545\t0.0046\t0.0071474562\t0.005\t0.0009\t0.0066040463\t0.0126\t0.0037\t0.0011\t0.0045\t4.59873E-005\t0.0005\t0.0007\t0.0067\t0.0134498376\t0.0053119673\t0.0013\t0.0153837761\t0.0004\t0.0041885681\t0\t0.0001139097\t0.0117694925\t0.0054973573\t0.0004032572\t0.0185164816\t0.0021696933\t0.0073775\t0.0039200627\t0.0028967736\t0.007564699\t0.0003827374\t0.0207298099\t1.06168434038267E-020\t0.003023422\n+T>G\tCTT\tC[T>G]T\t0.0027585376\t6.711134E-005\t0.0087243873\t0.0012\t0.0114868115\t0.0086\t0.0013\t0.0048667139\t0.0509\t0.0182\t0.0009\t0.0063\t0.0004637147\t0.0063\t0.0045\t0.0186\t0.2614566141\t0.0023416292\t0.0019\t0.0021853947\t0.0032\t0.0016294096\t0.000210835\t0.0025337183\t0.0085508075\t0.0067887187\t0.0014390961\t0.118967103\t0.0011436633\t0.0091481\t0.0157841091\t0.012870271\t0.0262293556\t0.0019670653\t0.0047092177\t0.0002687511\t0.003570495\n+T>G\tGTA\tG[T>G]A\t0.000099045\t9.5552392E-005\t0.004144488\t0\t0.0016276645\t0\t0\t0.0009745787\t0.0072\t0\t0\t0\t1.8489857E-005\t0\t0\t0\t3.9193821E-005\t0.0013415325\t0\t3.579149E-006\t0\t0\t0\t0\t0.0055462269\t3.7393232E-005\t0\t0\t0\t0.0032461\t0.0015356557\t0.0019915953\t0.0015778632\t0.0008546281\t0.0021439553\t0.0003157141\t0.0037470482\n+T>G\tGTC\tG[T>G]C\t0.0002023656\t4.7002381E-005\t0.0045019853\t0\t0.0003277349\t0.0016\t0\t0.0005248216\t0.0006\t0.002\t0\t0.0004\t9.3373513E-005\t0.0019\t0.0022\t0.0032\t0.0090784615\t9.20431E-007\t0\t0.0028305751\t0.0006\t0\t0\t0.0010736323\t0\t0.002460705\t5.0790565E-005\t0\t0\t0.001869\t0.0007606153\t0.0041116159\t0.0032012845\t0.0008454968\t0.0013465618\t0.0003570074\t0.0027277577\n+T>G\tGTG\tG[T>G]G\t0.0011883532\t0.0001099257\t0.0163914526\t0.0018\t0.0059488798\t0.001\t0.0017\t0.0060877535\t0.005\t0.0009\t0.001\t0.0011\t1.0579194E-005\t0.0012\t0.0037\t0.0008\t0.0047827755\t0.009695\t0.0043\t0.0027332194\t0.0004\t0.0018775892\t0.0001871324\t0.002924697\t0.0086852444\t0.0008172016\t0.0047429186\t0.0042178083\t0.0036405938\t0.0033445\t0.0054079152\t3.70860800590008E-020\t0.002512037\t0.0012687784\t0.0077472793\t0.0015192874\t0.0082395121\n+T>G\tGTT\tG[T>G]T\t0.0008007233\t8.647718E-005\t0.0070672366\t0.0002\t0.0033074666\t0.0035\t0.0009\t0.0054274338\t0.0185\t0.003\t0\t0.0032\t5.3741207E-005\t0.0038\t0.019\t0.0044\t0.0634977566\t0.0028905535\t0.0025\t0.0035585586\t0.0008\t0\t0\t0.0008250983\t0.0027685717\t0.0078335613\t0.002298476\t0.0316489973\t0.0061810238\t0.0056069\t0.0101885638\t0.0066566288\t0.0079578565\t0.0029254388\t0.0018168896\t0.0004726412\t0.0032734591\n+T>G\tTTA\tT[T>G]A\t0.0013975537\t0.000071737\t0.0054271842\t0\t0.0052028744\t0.0009\t0\t0.0017432214\t0.0502\t0.005\t0\t0.0019\t0.0005465818\t0\t0\t0.0068\t0.0001334106\t3.6318404E-005\t0.0032\t2.69147E-007\t0\t0.0016516988\t0\t0\t0.0020814799\t8.534935E-006\t0.0011890225\t0.0093896587\t0\t0.0086563\t0.0008187477\t0.000948451\t0.0005293442\t2.28346914246428E-020\t0.0002407249\t7.25392033429029E-005\t0.0017254818\n+T>G\tTTC\tT[T>G]C\t0.001291737\t1.4281456E-005\t0.0061602504\t0.0003\t0.0051316079\t0.0019\t0.001\t0.0025498383\t0.0081\t0.0092\t0\t0.0027\t0.0002353471\t0.0015\t0.0004\t0.0069\t0.0096133452\t0.003233838\t0.0018\t0.0003772344\t0.0018\t0\t0\t0\t0.0005789788\t0.0027185098\t0.0002802954\t0.030117077\t0\t0.0043282\t0.0015336834\t0.0010831046\t0.0010211012\t2.28346914246428E-020\t0.0013929045\t0.0003332825\t0.0037135711\n+T>G\tTTG\tT[T>G]G\t0.0020310769\t0.0002066152\t0.0110765263\t0.003\t0.0060552541\t0.0011\t0.001\t0.0060303952\t0.0088\t0.0022\t0.0003\t0.0011\t0.000000479\t0.0002\t0.0009\t0.0049\t0.0045224623\t0.0007546018\t0.0011\t0.0005154216\t0.0003\t0.002572752\t0.0002475019\t0.0013605049\t0.0094291959\t0.0013691612\t0.0023530556\t0.0126987508\t0.000353696\t0.0082628\t0.0015835907\t3.70860800590008E-020\t0.003717572\t2.28346914246428E-020\t0.0039677318\t0.0002233266\t0.0022664664\n+T>G\tTTT\tT[T>G]T\t0.0040301282\t2.3598204E-005\t0.0130009842\t0.0011\t0.0133699358\t0.0072\t0.0014\t0.0072239989\t0.0545\t0.0633\t0.0003\t0.0032\t0.0006705883\t0.0025\t0.0033\t0.0163\t0.0580404078\t0.0021264415\t0.0013\t0.0006156567\t0.0003\t0\t0\t6.9515778E-005\t0.0078696716\t0.0025680767\t0.0001395613\t0.2336597833\t0.0061048341\t0\t0.0031734006\t0.001871395\t0.0032018465\t2.28346914246428E-020\t0.0014171235\t0.0003757336\t0.0024231362\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c Frequency-COSMIC30-Hupki.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Frequency-COSMIC30-Hupki.txt Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -0,0 +1,97 @@\n+Substitution Type\tTrinucleotide\tSomatic Mutation Type\tSignature 1\tSignature 2\tSignature 3\tSignature 4\tSignature 5\tSignature 6\tSignature 7\tSignature 8\tSignature 9\tSignature 10\tSignature 11\tSignature 12\tSignature 13\tSignature 14\tSignature 15\tSignature 16\tSignature 17\tSignature 18\tSignature 19\tSignature 20\tSignature 21\tSignature 22\tSignature 23\tSignature 24\tSignature 25\tSignature 26\tSignature 27\tSignature 28\tSignature 29\tSignature 30\tSignature 1 MEF\tSignature 2 MEF\tSignature 3 MEF\tSignature 5 MEF\n+C>A\tACA\tA[C>A]A\t0.0110983262\t0.0006827082\t0.0221723068\t0.0365\t0.0149415477\t0.0017\t0.0004\t0.0367180038\t0.012\t0.0007\t0.0002\t0.0077\t0.0003347572\t0.0001\t0.0013\t0.0161\t0.0018320192\t0.0505364186\t0.0107\t0.0011799616\t0.0001\t0.0015040704\t0.0004533607\t0.0286459925\t0.009896768\t0.0020397729\t0.0052056269\t0.0013974388\t0.0699819873\t0\t0.000781083\t0.0037229109\t0.0283533537\t0.0003710632\n+C>A\tACC\tA[C>A]C\t0.0091493407\t0.0006191072\t0.0178716754\t0.0309\t0.008960918\t0.0028\t0.0005\t0.0332457222\t0.0067\t0.001\t0.001\t0.0047\t0.0006487361\t0.0042\t0.004\t0.0097\t0.0003422356\t0.0109398248\t0.0074\t0.0022115051\t0.0007\t0.002451011\t0.0003668005\t0.0202146384\t0.0069989288\t0.0014871623\t0.0047382274\t0.0009171877\t0.0551523572\t0\t0.0022972224\t0.0070460466\t0.015676074\t0.001672691\n+C>A\tACG\tA[C>A]G\t0.0014900705\t0.000099279\t0.0021383396\t0.0183\t0.002207846\t0.0005\t0\t0.0025253113\t0.0005\t0.0003\t0\t0.0017\t3.8144594E-005\t0.0005\t0\t0.0022\t1.576225E-006\t0.0022880727\t0.0005\t1.61691E-007\t0\t0\t0\t0.0204789965\t0.001448443\t0.0002839456\t0.0007826979\t0\t0.017846984\t0.0019673\t0.0031701397\t0.0025537924\t0.0272331284\t0.0007812591\n+C>A\tACT\tA[C>A]T\t0.0062338852\t0.0003238914\t0.0162651456\t0.0243\t0.0092069053\t0.0019\t0.0004\t0.0335985495\t0.0068\t0.0092\t0.0002\t0.0046\t0.0008466585\t0.0296\t0.0057\t0.0088\t0.0031796648\t0.0194240914\t0.0074\t0.00300801\t0.0006\t0.0009224525\t0\t0.0246001454\t0.004966565\t0.0005978656\t0.0027182425\t0.00051341\t0.026804716\t0\t0.0015620621\t0.0104484061\t0.0079498813\t0.0004287024\n+C>A\tCCA\tC[C>A]A\t0.0065958701\t0.000677445\t0.0187817256\t0.0461\t0.0096749043\t0.0101\t0.0012\t0.0317237566\t0.0098\t0.0031\t0.0007\t0.0135\t0.0017100896\t0.0056\t0.0106\t0.0159\t0.0010324302\t0.0887681088\t0.0112\t0.0173771106\t0.002\t0.0045496929\t0.0001647394\t0.0635592838\t0.0148329479\t0.0037058501\t0.0050650733\t0.0011685156\t0.0514102117\t0\t0.0094052338\t0.0035222831\t0.0414403498\t0.0050521059\n+C>A\tCCC\tC[C>A]C\t0.0073423678\t0.000213681\t0.0157604578\t0.0614\t0.0049523006\t0.0241\t0.0006\t0.0255054071\t0.0057\t0.0009\t0.0017\t0.0112\t0.0011592566\t0.0102\t0.0084\t0.01\t0.0004218801\t0.0206413906\t0.0159\t0.036502463\t0.0014\t0.0037644739\t0.0007368748\t0.0337570047\t0.0078221753\t0.0039807234\t0.0022341533\t0.0003342918\t0.0258256508\t0\t0.0031118726\t0.0059886212\t0.0390237536\t0.0007556355\n+C>A\tCCG\tC[C>A]G\t0.0008928404\t6.77046E-006\t0.0019633898\t0.0088\t0.0028006273\t0.0091\t0\t0.0011596243\t0\t0.0007\t0.001\t0.0028\t0.0002441665\t0.0009\t0.0015\t0.0022\t0.0002974628\t0.0171784025\t0.0018\t0.0124825875\t0.0027\t0.0009001633\t0.0001639537\t0.0224289858\t0.0012769767\t0.000811742\t0.0002663122\t0.000053652\t0.0144961833\t0.0022624\t0.0031056722\t0.0027351313\t0.0444175322\t0.000426758\n+C>A\tCCT\tC[C>A]T\t0.0071865816\t0.0004163329\t0.0147228611\t0.0432\t0.0110134658\t0.0571\t0.0013\t0.028791173\t0.0091\t0.016\t0.0014\t0.0071\t0.0012567682\t0.1257\t0.0228\t0.0084\t3.1479429E-005\t0.0376769589\t0.0096\t0.1034012262\t0.0056\t0.0044398462\t0.0007227318\t0.0200865154\t0.0125636547\t0.0190384313\t0.00310057\t0.0001866719\t0.0403550741\t0\t0.0069708534\t0.0135089174\t0.0262144919\t0.0012449334\n+C>A\tGCA\tG[C>A]A\t0.008232604\t0.0003520134\t0.0096965397\t0.0376\t0.011892169\t0.0024\t0.0003\t0.0236823289\t0.0118\t0.0014\t0.0004\t0.0062\t0.0001321096\t0.0018\t0.0024\t0.0096\t0.0065354049\t0.1287241581\t0.0032\t0.0011161238\t0.0001\t0.0012983702\t0.0003499075\t0.0546764487\t0.0134652951\t0.0013753118\t0.0107558719\t0.0021366291\t0.0780466101\t0.008853\t0.0038823793\t0.0123398664\t0.0263977944\t0.0008119624\n+C>A\tGCC\tG[C>A]C\t0.0057580214\t0.0001338169\t0.0108433411\t0.0399\t0.0092478575\t0.0058\t0.0001\t0.0158218964\t0.0092\t0.0022\t0.001\t0.0056\t0.000754244\t0.0114\t0.0099\t0.0094\t0.001293804\t0.016'..b'0.0095898419\t0.0011168548\t0.0046233\t3.27595444415171E-020\t3.70860800590008E-020\t0.0005262194\t0.0004210208\n+T>G\tCTC\tC[T>G]C\t0.0020985024\t2.2095087E-005\t0.0058242955\t0.0013\t0.0034785021\t0.004\t0.0008\t0.0020962106\t0.0064\t0.0018\t0.0001\t0.0049\t0.0001476248\t0.002\t0.0019\t0.0082\t0.0198142936\t0.0020156969\t0.0011\t0.0084764945\t0.0005\t0.0010645836\t0.0001901443\t0.0021854373\t0.0028864543\t0.0039194551\t0.000381245\t0.0339132237\t0\t0.0060004\t0.0039803758\t0.0025376549\t0.0060513622\t2.28346914246428E-020\n+T>G\tCTG\tC[T>G]G\t0.0015995485\t0.0002282459\t0.0104646545\t0.0046\t0.0071474562\t0.005\t0.0009\t0.0066040463\t0.0126\t0.0037\t0.0011\t0.0045\t4.59873E-005\t0.0005\t0.0007\t0.0067\t0.0134498376\t0.0053119673\t0.0013\t0.0153837761\t0.0004\t0.0041885681\t0\t0.0001139097\t0.0117694925\t0.0054973573\t0.0004032572\t0.0185164816\t0.0021696933\t0.0073775\t0.0039200627\t0.0028967736\t0.007564699\t0.0003827374\n+T>G\tCTT\tC[T>G]T\t0.0027585376\t6.711134E-005\t0.0087243873\t0.0012\t0.0114868115\t0.0086\t0.0013\t0.0048667139\t0.0509\t0.0182\t0.0009\t0.0063\t0.0004637147\t0.0063\t0.0045\t0.0186\t0.2614566141\t0.0023416292\t0.0019\t0.0021853947\t0.0032\t0.0016294096\t0.000210835\t0.0025337183\t0.0085508075\t0.0067887187\t0.0014390961\t0.118967103\t0.0011436633\t0.0091481\t0.0157841091\t0.012870271\t0.0262293556\t0.0019670653\n+T>G\tGTA\tG[T>G]A\t0.000099045\t9.5552392E-005\t0.004144488\t0\t0.0016276645\t0\t0\t0.0009745787\t0.0072\t0\t0\t0\t1.8489857E-005\t0\t0\t0\t3.9193821E-005\t0.0013415325\t0\t3.579149E-006\t0\t0\t0\t0\t0.0055462269\t3.7393232E-005\t0\t0\t0\t0.0032461\t0.0015356557\t0.0019915953\t0.0015778632\t0.0008546281\n+T>G\tGTC\tG[T>G]C\t0.0002023656\t4.7002381E-005\t0.0045019853\t0\t0.0003277349\t0.0016\t0\t0.0005248216\t0.0006\t0.002\t0\t0.0004\t9.3373513E-005\t0.0019\t0.0022\t0.0032\t0.0090784615\t9.20431E-007\t0\t0.0028305751\t0.0006\t0\t0\t0.0010736323\t0\t0.002460705\t5.0790565E-005\t0\t0\t0.001869\t0.0007606153\t0.0041116159\t0.0032012845\t0.0008454968\n+T>G\tGTG\tG[T>G]G\t0.0011883532\t0.0001099257\t0.0163914526\t0.0018\t0.0059488798\t0.001\t0.0017\t0.0060877535\t0.005\t0.0009\t0.001\t0.0011\t1.0579194E-005\t0.0012\t0.0037\t0.0008\t0.0047827755\t0.009695\t0.0043\t0.0027332194\t0.0004\t0.0018775892\t0.0001871324\t0.002924697\t0.0086852444\t0.0008172016\t0.0047429186\t0.0042178083\t0.0036405938\t0.0033445\t0.0054079152\t3.70860800590008E-020\t0.002512037\t0.0012687784\n+T>G\tGTT\tG[T>G]T\t0.0008007233\t8.647718E-005\t0.0070672366\t0.0002\t0.0033074666\t0.0035\t0.0009\t0.0054274338\t0.0185\t0.003\t0\t0.0032\t5.3741207E-005\t0.0038\t0.019\t0.0044\t0.0634977566\t0.0028905535\t0.0025\t0.0035585586\t0.0008\t0\t0\t0.0008250983\t0.0027685717\t0.0078335613\t0.002298476\t0.0316489973\t0.0061810238\t0.0056069\t0.0101885638\t0.0066566288\t0.0079578565\t0.0029254388\n+T>G\tTTA\tT[T>G]A\t0.0013975537\t0.000071737\t0.0054271842\t0\t0.0052028744\t0.0009\t0\t0.0017432214\t0.0502\t0.005\t0\t0.0019\t0.0005465818\t0\t0\t0.0068\t0.0001334106\t3.6318404E-005\t0.0032\t2.69147E-007\t0\t0.0016516988\t0\t0\t0.0020814799\t8.534935E-006\t0.0011890225\t0.0093896587\t0\t0.0086563\t0.0008187477\t0.000948451\t0.0005293442\t2.28346914246428E-020\n+T>G\tTTC\tT[T>G]C\t0.001291737\t1.4281456E-005\t0.0061602504\t0.0003\t0.0051316079\t0.0019\t0.001\t0.0025498383\t0.0081\t0.0092\t0\t0.0027\t0.0002353471\t0.0015\t0.0004\t0.0069\t0.0096133452\t0.003233838\t0.0018\t0.0003772344\t0.0018\t0\t0\t0\t0.0005789788\t0.0027185098\t0.0002802954\t0.030117077\t0\t0.0043282\t0.0015336834\t0.0010831046\t0.0010211012\t2.28346914246428E-020\n+T>G\tTTG\tT[T>G]G\t0.0020310769\t0.0002066152\t0.0110765263\t0.003\t0.0060552541\t0.0011\t0.001\t0.0060303952\t0.0088\t0.0022\t0.0003\t0.0011\t0.000000479\t0.0002\t0.0009\t0.0049\t0.0045224623\t0.0007546018\t0.0011\t0.0005154216\t0.0003\t0.002572752\t0.0002475019\t0.0013605049\t0.0094291959\t0.0013691612\t0.0023530556\t0.0126987508\t0.000353696\t0.0082628\t0.0015835907\t3.70860800590008E-020\t0.003717572\t2.28346914246428E-020\n+T>G\tTTT\tT[T>G]T\t0.0040301282\t2.3598204E-005\t0.0130009842\t0.0011\t0.0133699358\t0.0072\t0.0014\t0.0072239989\t0.0545\t0.0633\t0.0003\t0.0032\t0.0006705883\t0.0025\t0.0033\t0.0163\t0.0580404078\t0.0021264415\t0.0013\t0.0006156567\t0.0003\t0\t0\t6.9515778E-005\t0.0078696716\t0.0025680767\t0.0001395613\t0.2336597833\t0.0061048341\t0\t0.0031734006\t0.001871395\t0.0032018465\t2.28346914246428E-020\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c Frequency-COSMICv72-Hupki.txt --- a/Frequency-COSMICv72-Hupki.txt Tue Jun 28 02:59:32 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,97 +0,0 @@\n-Substitution Type\tTrinucleotide\tSomatic Mutation Type\tSignature 1\tSignature 2\tSignature 3\tSignature 4\tSignature 5\tSignature 6\tSignature 7\tSignature 8\tSignature 9\tSignature 10\tSignature 11\tSignature 12\tSignature 13\tSignature 14\tSignature 15\tSignature 16\tSignature 17\tSignature 18\tSignature 19\tSignature 20\tSignature 21\tSignature 22\tSignature 23\tSignature 24\tSignature 25\tSignature 26\tSignature 27\tSignature 28\tSignature 29\tSignature 30\tSignature 1 MEF\tSignature 2 MEF\tSignature 3 MEF\tSignature 5 MEF\r\n-C>A\tACA\tA[C>A]A\t0.0110983262\t0.0006827082\t0.0221723068\t0.0365\t0.0149415477\t0.0017\t0.0004\t0.0367180038\t0.012\t0.0007\t0.0002\t0.0077\t0.0003347572\t0.0001\t0.0013\t0.0161\t0.0018320192\t0.0505364186\t0.0107\t0.0011799616\t0.0001\t0.0015040704\t0.0004533607\t0.0286459925\t0.009896768\t0.0020397729\t0.0052056269\t0.0013974388\t0.0699819873\t0\t0.000781083\t0.0037229109\t0.0283533537\t0.0003710632\r\n-C>A\tACC\tA[C>A]C\t0.0091493407\t0.0006191072\t0.0178716754\t0.0309\t0.008960918\t0.0028\t0.0005\t0.0332457222\t0.0067\t0.001\t0.001\t0.0047\t0.0006487361\t0.0042\t0.004\t0.0097\t0.0003422356\t0.0109398248\t0.0074\t0.0022115051\t0.0007\t0.002451011\t0.0003668005\t0.0202146384\t0.0069989288\t0.0014871623\t0.0047382274\t0.0009171877\t0.0551523572\t0\t0.0022972224\t0.0070460466\t0.015676074\t0.001672691\r\n-C>A\tACG\tA[C>A]G\t0.0014900705\t0.000099279\t0.0021383396\t0.0183\t0.002207846\t0.0005\t0\t0.0025253113\t0.0005\t0.0003\t0\t0.0017\t3.8144594E-005\t0.0005\t0\t0.0022\t1.576225E-006\t0.0022880727\t0.0005\t1.61691E-007\t0\t0\t0\t0.0204789965\t0.001448443\t0.0002839456\t0.0007826979\t0\t0.017846984\t0.0019673\t0.0031701397\t0.0025537924\t0.0272331284\t0.0007812591\r\n-C>A\tACT\tA[C>A]T\t0.0062338852\t0.0003238914\t0.0162651456\t0.0243\t0.0092069053\t0.0019\t0.0004\t0.0335985495\t0.0068\t0.0092\t0.0002\t0.0046\t0.0008466585\t0.0296\t0.0057\t0.0088\t0.0031796648\t0.0194240914\t0.0074\t0.00300801\t0.0006\t0.0009224525\t0\t0.0246001454\t0.004966565\t0.0005978656\t0.0027182425\t0.00051341\t0.026804716\t0\t0.0015620621\t0.0104484061\t0.0079498813\t0.0004287024\r\n-C>A\tCCA\tC[C>A]A\t0.0065958701\t0.000677445\t0.0187817256\t0.0461\t0.0096749043\t0.0101\t0.0012\t0.0317237566\t0.0098\t0.0031\t0.0007\t0.0135\t0.0017100896\t0.0056\t0.0106\t0.0159\t0.0010324302\t0.0887681088\t0.0112\t0.0173771106\t0.002\t0.0045496929\t0.0001647394\t0.0635592838\t0.0148329479\t0.0037058501\t0.0050650733\t0.0011685156\t0.0514102117\t0\t0.0094052338\t0.0035222831\t0.0414403498\t0.0050521059\r\n-C>A\tCCC\tC[C>A]C\t0.0073423678\t0.000213681\t0.0157604578\t0.0614\t0.0049523006\t0.0241\t0.0006\t0.0255054071\t0.0057\t0.0009\t0.0017\t0.0112\t0.0011592566\t0.0102\t0.0084\t0.01\t0.0004218801\t0.0206413906\t0.0159\t0.036502463\t0.0014\t0.0037644739\t0.0007368748\t0.0337570047\t0.0078221753\t0.0039807234\t0.0022341533\t0.0003342918\t0.0258256508\t0\t0.0031118726\t0.0059886212\t0.0390237536\t0.0007556355\r\n-C>A\tCCG\tC[C>A]G\t0.0008928404\t6.77046E-006\t0.0019633898\t0.0088\t0.0028006273\t0.0091\t0\t0.0011596243\t0\t0.0007\t0.001\t0.0028\t0.0002441665\t0.0009\t0.0015\t0.0022\t0.0002974628\t0.0171784025\t0.0018\t0.0124825875\t0.0027\t0.0009001633\t0.0001639537\t0.0224289858\t0.0012769767\t0.000811742\t0.0002663122\t0.000053652\t0.0144961833\t0.0022624\t0.0031056722\t0.0027351313\t0.0444175322\t0.000426758\r\n-C>A\tCCT\tC[C>A]T\t0.0071865816\t0.0004163329\t0.0147228611\t0.0432\t0.0110134658\t0.0571\t0.0013\t0.028791173\t0.0091\t0.016\t0.0014\t0.0071\t0.0012567682\t0.1257\t0.0228\t0.0084\t3.1479429E-005\t0.0376769589\t0.0096\t0.1034012262\t0.0056\t0.0044398462\t0.0007227318\t0.0200865154\t0.0125636547\t0.0190384313\t0.00310057\t0.0001866719\t0.0403550741\t0\t0.0069708534\t0.0135089174\t0.0262144919\t0.0012449334\r\n-C>A\tGCA\tG[C>A]A\t0.008232604\t0.0003520134\t0.0096965397\t0.0376\t0.011892169\t0.0024\t0.0003\t0.0236823289\t0.0118\t0.0014\t0.0004\t0.0062\t0.0001321096\t0.0018\t0.0024\t0.0096\t0.0065354049\t0.1287241581\t0.0032\t0.0011161238\t0.0001\t0.0012983702\t0.0003499075\t0.0546764487\t0.0134652951\t0.0013753118\t0.0107558719\t0.0021366291\t0.0780466101\t0.008853\t0.0038823793\t0.0123398664\t0.0263977944\t0.0008119624\r\n-C>A\tGCC\tG[C>A]C\t0.0057580214\t0.0001338169\t0.0108433411\t0.0399\t0.0092478575\t0.0058\t0.0001\t0.0158218964\t0.0092\t0.0022\t0.001\t0.0056\t0.000754244\t0.0114\t0.0099\t0.0094\t0.00129'..b'\t0.0011168548\t0.0046233\t3.27595444415171E-020\t3.70860800590008E-020\t0.0005262194\t0.0004210208\r\n-T>G\tCTC\tC[T>G]C\t0.0020985024\t2.2095087E-005\t0.0058242955\t0.0013\t0.0034785021\t0.004\t0.0008\t0.0020962106\t0.0064\t0.0018\t0.0001\t0.0049\t0.0001476248\t0.002\t0.0019\t0.0082\t0.0198142936\t0.0020156969\t0.0011\t0.0084764945\t0.0005\t0.0010645836\t0.0001901443\t0.0021854373\t0.0028864543\t0.0039194551\t0.000381245\t0.0339132237\t0\t0.0060004\t0.0039803758\t0.0025376549\t0.0060513622\t2.28346914246428E-020\r\n-T>G\tCTG\tC[T>G]G\t0.0015995485\t0.0002282459\t0.0104646545\t0.0046\t0.0071474562\t0.005\t0.0009\t0.0066040463\t0.0126\t0.0037\t0.0011\t0.0045\t4.59873E-005\t0.0005\t0.0007\t0.0067\t0.0134498376\t0.0053119673\t0.0013\t0.0153837761\t0.0004\t0.0041885681\t0\t0.0001139097\t0.0117694925\t0.0054973573\t0.0004032572\t0.0185164816\t0.0021696933\t0.0073775\t0.0039200627\t0.0028967736\t0.007564699\t0.0003827374\r\n-T>G\tCTT\tC[T>G]T\t0.0027585376\t6.711134E-005\t0.0087243873\t0.0012\t0.0114868115\t0.0086\t0.0013\t0.0048667139\t0.0509\t0.0182\t0.0009\t0.0063\t0.0004637147\t0.0063\t0.0045\t0.0186\t0.2614566141\t0.0023416292\t0.0019\t0.0021853947\t0.0032\t0.0016294096\t0.000210835\t0.0025337183\t0.0085508075\t0.0067887187\t0.0014390961\t0.118967103\t0.0011436633\t0.0091481\t0.0157841091\t0.012870271\t0.0262293556\t0.0019670653\r\n-T>G\tGTA\tG[T>G]A\t0.000099045\t9.5552392E-005\t0.004144488\t0\t0.0016276645\t0\t0\t0.0009745787\t0.0072\t0\t0\t0\t1.8489857E-005\t0\t0\t0\t3.9193821E-005\t0.0013415325\t0\t3.579149E-006\t0\t0\t0\t0\t0.0055462269\t3.7393232E-005\t0\t0\t0\t0.0032461\t0.0015356557\t0.0019915953\t0.0015778632\t0.0008546281\r\n-T>G\tGTC\tG[T>G]C\t0.0002023656\t4.7002381E-005\t0.0045019853\t0\t0.0003277349\t0.0016\t0\t0.0005248216\t0.0006\t0.002\t0\t0.0004\t9.3373513E-005\t0.0019\t0.0022\t0.0032\t0.0090784615\t9.20431E-007\t0\t0.0028305751\t0.0006\t0\t0\t0.0010736323\t0\t0.002460705\t5.0790565E-005\t0\t0\t0.001869\t0.0007606153\t0.0041116159\t0.0032012845\t0.0008454968\r\n-T>G\tGTG\tG[T>G]G\t0.0011883532\t0.0001099257\t0.0163914526\t0.0018\t0.0059488798\t0.001\t0.0017\t0.0060877535\t0.005\t0.0009\t0.001\t0.0011\t1.0579194E-005\t0.0012\t0.0037\t0.0008\t0.0047827755\t0.009695\t0.0043\t0.0027332194\t0.0004\t0.0018775892\t0.0001871324\t0.002924697\t0.0086852444\t0.0008172016\t0.0047429186\t0.0042178083\t0.0036405938\t0.0033445\t0.0054079152\t3.70860800590008E-020\t0.002512037\t0.0012687784\r\n-T>G\tGTT\tG[T>G]T\t0.0008007233\t8.647718E-005\t0.0070672366\t0.0002\t0.0033074666\t0.0035\t0.0009\t0.0054274338\t0.0185\t0.003\t0\t0.0032\t5.3741207E-005\t0.0038\t0.019\t0.0044\t0.0634977566\t0.0028905535\t0.0025\t0.0035585586\t0.0008\t0\t0\t0.0008250983\t0.0027685717\t0.0078335613\t0.002298476\t0.0316489973\t0.0061810238\t0.0056069\t0.0101885638\t0.0066566288\t0.0079578565\t0.0029254388\r\n-T>G\tTTA\tT[T>G]A\t0.0013975537\t0.000071737\t0.0054271842\t0\t0.0052028744\t0.0009\t0\t0.0017432214\t0.0502\t0.005\t0\t0.0019\t0.0005465818\t0\t0\t0.0068\t0.0001334106\t3.6318404E-005\t0.0032\t2.69147E-007\t0\t0.0016516988\t0\t0\t0.0020814799\t8.534935E-006\t0.0011890225\t0.0093896587\t0\t0.0086563\t0.0008187477\t0.000948451\t0.0005293442\t2.28346914246428E-020\r\n-T>G\tTTC\tT[T>G]C\t0.001291737\t1.4281456E-005\t0.0061602504\t0.0003\t0.0051316079\t0.0019\t0.001\t0.0025498383\t0.0081\t0.0092\t0\t0.0027\t0.0002353471\t0.0015\t0.0004\t0.0069\t0.0096133452\t0.003233838\t0.0018\t0.0003772344\t0.0018\t0\t0\t0\t0.0005789788\t0.0027185098\t0.0002802954\t0.030117077\t0\t0.0043282\t0.0015336834\t0.0010831046\t0.0010211012\t2.28346914246428E-020\r\n-T>G\tTTG\tT[T>G]G\t0.0020310769\t0.0002066152\t0.0110765263\t0.003\t0.0060552541\t0.0011\t0.001\t0.0060303952\t0.0088\t0.0022\t0.0003\t0.0011\t0.000000479\t0.0002\t0.0009\t0.0049\t0.0045224623\t0.0007546018\t0.0011\t0.0005154216\t0.0003\t0.002572752\t0.0002475019\t0.0013605049\t0.0094291959\t0.0013691612\t0.0023530556\t0.0126987508\t0.000353696\t0.0082628\t0.0015835907\t3.70860800590008E-020\t0.003717572\t2.28346914246428E-020\r\n-T>G\tTTT\tT[T>G]T\t0.0040301282\t2.3598204E-005\t0.0130009842\t0.0011\t0.0133699358\t0.0072\t0.0014\t0.0072239989\t0.0545\t0.0633\t0.0003\t0.0032\t0.0006705883\t0.0025\t0.0033\t0.0163\t0.0580404078\t0.0021264415\t0.0013\t0.0006156567\t0.0003\t0\t0\t6.9515778E-005\t0.0078696716\t0.0025680767\t0.0001395613\t0.2336597833\t0.0061048341\t0\t0.0031734006\t0.001871395\t0.0032018465\t2.28346914246428E-020\r\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c R/chi2test_MutSpecStat_Galaxy.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/R/chi2test_MutSpecStat_Galaxy.r Mon Mar 13 08:21:19 2017 -0400 |
[ |
@@ -0,0 +1,173 @@ +#!/usr/bin/Rscript + +#---------------------------------------# +# Author: Maude # +# Script: chi2test_MutSpecStat_Galaxy.r # +# Last update: 18/10/16 # +#---------------------------------------# + + +######################################################################################################################################### +# Calculate the chi2 test for the strand bias # +######################################################################################################################################### + +#------------------------------------------------------------------------------- +# Load library for recovering the arguments +#------------------------------------------------------------------------------- +suppressMessages(suppressWarnings(require("getopt"))) + + +#------------------------------------------------------------------------------- +# Recover the arguments +#------------------------------------------------------------------------------- +spec = matrix(c( + "folderChi2", "folderChi2", 1, "character", + "help", "h", 0, "logical" + ), + byrow=TRUE, ncol=4 + ) + +opt = getopt(spec) + +# No argument is pass to the command line +if(length(opt) == 1) +{ + cat(paste("Usage:\n chi2test_MutSpecStat_Galaxy.r --folderChi2 <path_to_folder> \n",sep="")) + q(status=1) +} + +# Help was asked for. +if ( !is.null(opt$help) ) +{ + # print a friendly message and exit with a non-zero error code + cat(paste("Usage:\n chi2test_MutSpecStat_Galaxy.r --folderChi2 <path_to_folder> \n",sep="")) + q(status=1) +} + + + + + +## Load the data. There is one column with the mutation type and the sample name but it's just for knowing what is corresponding to each line. The two columns with the number of variant per strand would be sufficient. +inputChi2 <- paste0(opt$folderChi2, "/Input_chi2_strandBias.txt") +strBias<-read.delim(inputChi2, dec=".") + +# Chi2 +pValChi2 <- c() # First I create an empty vector and then I apply a for on the data load +pValChi2_round <- c() # Empty vector with the rounded p-values +confInt <- c() # Empty vector for the confident interval +proportion <- c() # Empty vector for the proportion of NonTr compared to the (NonTr+Tr) +sampleSize <- c() # Empty vector for the count of samples in NonTr and Tr +# For Pool_Data save the p-values in a different vector for not having them for the FDR +pValChi2_PoolData <- c() +pValChi2_PoolData_Round <- c() + +j = 1 # Timer for pValChi2_PoolData vector +k = 1 # Timer for pValChi2 + +for(i in 1:nrow(strBias)) +{ + if(! sum(strBias[i,2:3]) == 0) + { + # For Pool_Data + if(strBias[i,1] == "Pool_Data") + { + pValChi2_PoolData[j] <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$p.value + j <- j+1 + } + # For the other sample(s) + else + { + # Calculate the p-value + pValChi2[k] <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$p.value + k <- k+1 + } + + # Calculate the confidence interval + temp <- prop.test(x=strBias[i,2],n=sum(strBias[i,2:3]),p=0.5)$conf.int + confInt[i] <- paste0("[", round(temp[1],2), "-", round(temp[2],2), "]") # Same as paste(sep="") + + # Save the proportion + proportion[i] <- strBias[i,2] / sum(strBias[i,2:3]) + + # Save the sample size (count on NonTr and Tr) + sampleSize[i] <- paste(strBias[i,2], strBias[i,3], sep="-") + } else + { + if(strBias[i,1] == "Pool_Data") + { + pValChi2_PoolData[j] <- NA + pValChi2_PoolData_Round[j] <- NA + j <- j+1 + } + else + { + # Not enough effective for the test + pValChi2[k] <- NA + pValChi2_round[k] <- NA + k <- k+1 + } + + confInt[i] <- NA + proportion[i] <- NA + sampleSize[i] <- NA + } +} + +# Adjust with FDR +FDR<-p.adjust(pValChi2, method="BH") + +# Rount the p-value +for(i in 1:nrow(strBias)) +{ + pValChi2_round[i] <- format(pValChi2[i], scientific=T, digits=3) +} + +# The option for the pool is specified +if(!is.null(pValChi2_PoolData)) +{ + # Round the p-value for Pool_Data + for(i in 1:6) + { + pValChi2_PoolData_Round[i] <- format(pValChi2_PoolData[i], scientific=T, digits=3) + } +} + + +# I create a dataframe for add what I want +outputChi2 <- data.frame(round(strBias[,2]/strBias[,3], digits=2), sampleSize, round(proportion, 3), confInt) +outputChi2$Mut.type <- strBias$Alteration +outputChi2$SampleName <- strBias$SampleName +colnames(outputChi2)[1:6]<-c("Strand_Bias", "NonTr-Tr", "Proportion", "Confidence Interval", "Mutation_Type", "SampleName") + +# Transform the data frame into a matrix for adding the p-value for the samples and Pool_Data +matrix <- as.matrix(outputChi2) +tempColPValFDR <- matrix(, nrow=length(sampleSize), ncol = 2) # Create an empty matrix with 2 columns for adding the p-value and the FDR +matrix <- cbind(matrix, tempColPValFDR) +j = 1 # Timer for all the sample +k = 1 # Timer for Pool_Data +for(i in 1:nrow(matrix)) +{ + if(matrix[i,6] == "Pool_Data") + { + matrix[i,7] <- pValChi2_PoolData_Round[k] + matrix[i,8] <- "NA" # No FDR for Pool_Data + k = k+1 + } + else + { + matrix[i,7] <- pValChi2_round[j] + matrix[i,8] <- round(FDR[j], 3) + j = j+1 + } +} + +# Reorder the columns +matrix <- cbind(matrix[,1:3], matrix[,7], matrix[,8], matrix[,4:6]) +colnames(matrix)[4] <- "P-val-Chi2" +colnames(matrix)[5] <- "FDR" + +# Export the file +# dec=".": Set the separator for the decimal by "." +outputFileChi2 <- paste0(opt$folderChi2, "/Output_chi2_strandBias.txt") +write.table(matrix,file=outputFileChi2,quote = FALSE,sep="\t",row.names = FALSE,dec=".") |
b |
diff -r 46a10309dfe2 -r eda59b985b1c R/compareSignature_Galaxy.r --- a/R/compareSignature_Galaxy.r Tue Jun 28 02:59:32 2016 -0400 +++ b/R/compareSignature_Galaxy.r Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -1,125 +1,125 @@\n-#!/usr/bin/Rscript\r\n-\r\n-#-----------------------------------#\r\n-# Author: Maude #\r\n-# Script: compareSignature_Galaxy.r #\r\n-# Last update: 29/10/15 #\r\n-#-----------------------------------#\r\n-\r\n-\r\n-#########################################################################################################################################\r\n-# Compare new sigantures with published one using the cosine similarity method #\r\n-#########################################################################################################################################\r\n-\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Print a usage message if there is no argument pass to the command line\r\n-#-------------------------------------------------------------------------------\r\n-args <- commandArgs(TRUE)\r\n-usage <- function() \r\n-{\r\n- msg <- paste0(\'Usage:\\n\',\r\n- \' compareSignature_Galaxy.r Published_Signature New_Signature Output_Folder\\n\'\r\n- )\r\n- cat(msg, \'\\n\', file="/dev/stderr")\r\n- quit(status=1)\r\n-}\r\n-\r\n-input = args[length(args)]\r\n-\r\n-if (length(args) == 0) { usage() }\r\n-\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Load library\r\n-#-------------------------------------------------------------------------------\r\n-suppressMessages(suppressWarnings(library(lsa)))\r\n-suppressMessages(suppressWarnings(library(ggplot2)))\r\n-suppressMessages(suppressWarnings(library(reshape)))\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Recover the arguments\r\n-#-------------------------------------------------------------------------------\r\n-published_signature_file <- args[1] # The matrix with the published signatures\r\n-unknown_signature_file <- args[2] # The matrix W from NMF from which we want to compare the signatures\r\n-dir\t\t\t\t\t\t\t\t <- args[3] # html directory\r\n-\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Set the variables\r\n-#-------------------------------------------------------------------------------\r\n-# Create the outputs\r\n-output_cosineRes <- paste0(dir, "/Similarity_Matrix.txt")\r\n-output_png <- paste0(dir, "/Similarity_Matrix.png")\r\n-\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Calculate the cosine similarity and represent it with a heatmap\r\n-#-------------------------------------------------------------------------------\r\n-# Published signatures\r\n-dataFrame1 <- read.table(published_signature_file, header=T, sep="\\t")\r\n-# Remove the first three colmumns (Substitution Type, Trinucleotide Somatic, Mutation Type)\r\n-dataFrame1 <- dataFrame1[,4:ncol(dataFrame1)]\r\n-matrix1 <- as.matrix(dataFrame1)\r\n- \r\n-# Unkown signatures\r\n-dataFrame2 <- read.table(unknown_signature_file, header=T, sep="\\t")\r\n-# Remove the first two columns (alteration, context)\r\n-dataFrame2 <- dataFrame2[,3:ncol(dataFrame2)]\r\n-matrix2 <- as.matrix(dataFrame2)\r\n-# Recover the number of new signatures\r\n-NbNewSignature <- ncol(dataFrame2) - 1\r\n-\r\n-# Combined the two matrices (published and unknown signatures)\r\n-input_matrix_cos <- cbind(matrix1, matrix2)\r\n-# Calculate the cosine similarity\r\n-cosine_res <- cosine(input_matrix_cos)\r\n-\r\n-# Keep only the comparison between the two matrices\r\n-nbSign <- ncol(matrix1)+1 # +1 for havng the first signature of the matrix1\r\n-cosine_res_subset <- cosine_res[nbSign:nrow(cosine_res), 1:ncol(matrix1)]\r\n- \r\n-# Save the matrix\r\n-write.table(cosine_res_subset, file=output_cosineRes, quote=F, sep="\\t", col.names=T, row.names=T)\r\n-\r\n-# Transform the matrix in a suitable format for ggplot2\r\n-cosineRes_subset_melt <- melt(cosine_res_subset)\r\n-# Rename the columns\r\n-colnames(cosineRes_subset_melt) <- c("Unknown_Signatures", "Published_Signatures", "Similarity")\r\n-# Reorder the S'..b'first two columns (alteration, context)\n+dataFrame2 <- dataFrame2[,3:ncol(dataFrame2)]\n+matrix2 <- as.matrix(dataFrame2)\n+# Recover the number of new signatures\n+NbNewSignature <- ncol(dataFrame2) - 1\n+\n+# Combined the two matrices (published and unknown signatures)\n+input_matrix_cos <- cbind(matrix1, matrix2)\n+# Calculate the cosine similarity\n+cosine_res <- cosine(input_matrix_cos)\n+\n+# Keep only the comparison between the two matrices\n+nbSign <- ncol(matrix1)+1 # +1 for havng the first signature of the matrix1\n+cosine_res_subset <- cosine_res[nbSign:nrow(cosine_res), 1:ncol(matrix1)]\n+\n+# Save the matrix\n+write.table(cosine_res_subset, file=output_cosineRes, quote=F, sep="\\t", col.names=T, row.names=T)\n+\n+# Transform the matrix in a suitable format for ggplot2\n+cosineRes_subset_melt <- melt(cosine_res_subset)\n+# Rename the columns\n+colnames(cosineRes_subset_melt) <- c("Unknown_Signatures", "Published_Signatures", "Similarity")\n+# Reorder the Signature for having the same order as in the matrix. Turn your \'signature\' column into a character vector\n+cosineRes_subset_melt$Published_Signatures <- as.character(cosineRes_subset_melt$Published_Signatures)\n+#Then turn it back into an ordered factor\n+cosineRes_subset_melt$Published_Signatures <- factor(cosineRes_subset_melt$Published_Signatures, levels=rev(unique(cosineRes_subset_melt$Published_Signature)))\n+\n+# Base plot: heatmap\n+p1 <- ggplot(cosineRes_subset_melt, aes(x=Published_Signatures, y=Unknown_Signatures, fill=Similarity)) + geom_tile(colour="yellow") +scale_fill_gradientn(colours=c("yellow", "red")) + theme_classic()\n+\n+# Rename the signatures\n+if(basename(published_signature_file) == "Frequency-COSMIC30-Hupki.txt")\n+{\n+ p1 <- p1 + scale_x_discrete(breaks = c("Signature.1", "Signature.2", "Signature.3", "Signature.4", "Signature.5", "Signature.6", "Signature.7", "Signature.8", "Signature.9",\n+ "Signature.10", "Signature.11", "Signature.12", "Signature.13", "Signature.14", "Signature.15", "Signature.16", "Signature.17",\n+ "Signature.18", "Signature.19", "Signature.20", "Signature.21", "Signature.22", "Signature.23", "Signature.24", "Signature.25",\n+ "Signature.26", "Signature.27", "Signature.28", "Signature.29", "Signature.30",\n+ "Signature.1.MEF", "Signature.2.MEF", "Signature.3.MEF", "Signature.5.MEF"),\n+ labels = c("(Age) Sig 1", "(AID/APOBEC) Sig 2", "(BRCA1/2) Sig 3", "(Smoking) Sig 4", "Sig 5", "(DNA MMR deficiency) Sig 6", "(UV) Sig 7",\n+ "Sig 8", "(IgG) Sig 9", "(pol e) Sig 10", "(temozolomide) Sig 11", "Sig 12", "(AID/APOBEC) Sig 13", "Sig 14",\n+ "(DNA MMR deficiency) Sig 15", "Sig 16", "Sig 17", "Sig 18", "Sig 19", "(DNA MMR deficiency) Sig 20", "Sig 21", "(AA) Sig 22",\n+ "Sig 23", "(Aflatoxin) Sig 24", "Sig 25", "(DNA MMR deficiency) Sig 26", "Sig 27", "Sig 28", "(Tobacco chewing) Sig 29", "Sig 30",\n+ "(AA) Sig 1 MEF", "(AID) Sig 2 MEF", "(BaP) Sig 3 MEF", "(MNNG) Sig 5 MEF")\n+ )\n+}\n+\n+# Flipped cartesian coordinates so that horizontal becomes vertical, and vertical, horizontal\n+p1 <- p1 + coord_flip()\n+# Remove the x axis line\n+p1 <- p1 + theme(axis.line.x=element_blank(), axis.line.y=element_blank())\n+# Add the cosine value only if >= 0.9\n+cosResLabel <- subset(cosineRes_subset_melt, round(cosineRes_subset_melt$Similarity, digits=2) >= 0.9) # Subset the data for keeping only the values greater thant 0.9\n+p1 <- p1 + geom_text(data = cosResLabel, aes(x = Published_Signatures, y = Unknown_Signatures, label = round(cosResLabel$Similarity, 2)))\n+\n+graphics.off()\n+options(bitmapType=\'cairo\')\n+png(output_png, width=3000, height=2000, res=300)\n+plot(p1)\n+invisible( dev.off() )\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c R/estimateSign_Galaxy.r --- a/R/estimateSign_Galaxy.r Tue Jun 28 02:59:32 2016 -0400 +++ b/R/estimateSign_Galaxy.r Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -1,105 +1,105 @@\n-#!/usr/bin/Rscript\r\n-\r\n-#-----------------------------------#\r\n-# Author: Maude #\r\n-# Script: estimateSign_Galaxy.r #\r\n-# Last update: 22/07/15 #\r\n-#-----------------------------------#\r\n-\r\n-#########################################################################################################################################\r\n-# Estimate the number of signatures for NMF #\r\n-#########################################################################################################################################\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Load library for recovering the arguments\r\n-#-------------------------------------------------------------------------------\r\n-suppressMessages(suppressWarnings(require("getopt")))\r\n-\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Recover the arguments\r\n-#-------------------------------------------------------------------------------\r\n-spec = matrix(c(\r\n- "input" , "i", 1, "character",\r\n- "stop", "stop", 1, "numeric",\r\n- "cpu", "cpu", 1, "integer",\r\n- "output", "o", 1, "character",\r\n- "help", "h", 0, "logical"\r\n- ),\r\n- byrow=TRUE, ncol=4\r\n- )\r\n-\r\n-opt = getopt(spec);\r\n-\r\n-# No argument is pass to the command line\r\n-if(length(opt) == 1)\r\n-{\r\n- cat(paste("Usage:\\n estimateSign_Galaxy.r --input <matrix> --stop <maxNbSign> --cpu <cpu> --output <output_filename.png>\\n",sep=""))\r\n- q(status=1)\r\n-}\r\n-\r\n-# Help was asked for.\r\n-if ( !is.null(opt$help) )\r\n-{\r\n- # print a friendly message and exit with a non-zero error code\r\n- cat(paste("Usage:\\n estimateSign_Galaxy.r --input <matrix> --stop <maxNbSign> --cpu <cpu> --output <output_filename.png>\\n",sep=""))\r\n- q(status=1)\r\n-}\r\n-\r\n-\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Load library\r\n-#-------------------------------------------------------------------------------\r\n-suppressMessages(suppressWarnings(library(NMF)))\r\n-\r\n-\r\n- ###############################################################################\r\n- # Load the functions #\r\n- ###############################################################################\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Check the file doesn\'t have lines equal to zero\r\n-#-------------------------------------------------------------------------------\r\n-CheckFile <- function(rowsum, dataFrame, x)\r\n-{\r\n- if(rowsum == 0)\r\n- {\r\n- write("\\n\\nERROR: There is not enough mutations for running NMF!!!", stderr())\r\n- write(paste0("Input matrix contains at least one null row ", rownames(dataFrame)[x], "\\n\\n"), stderr())\r\n- stop()\r\n- }\r\n-}\r\n-\r\n-\r\n- ###############################################################################\r\n- # Check file #\r\n- ###############################################################################\r\n-\r\n-# The input musn\'t contains lines equal to zero !!!\r\n-matrixNMF <- read.table(opt$input, header=T)\r\n-# suppresses the return of sapply function\r\n-invisible( sapply(1:nrow(matrixNMF), function(x) { CheckFile(rowSums(matrixNMF)[x], matrixNMF, x) } ) )\r\n-\r\n-\r\n-\r\n- ###############################################################################\r\n- # '..b'-\n+# Load library for recovering the arguments\n+#-------------------------------------------------------------------------------\n+suppressMessages(suppressWarnings(require("getopt")))\n+\n+\n+#-------------------------------------------------------------------------------\n+# Recover the arguments\n+#-------------------------------------------------------------------------------\n+spec = matrix(c(\n+ "input" , "i", 1, "character",\n+ "stop", "stop", 1, "numeric",\n+ "cpu", "cpu", 1, "integer",\n+ "output", "o", 1, "character",\n+ "help", "h", 0, "logical"\n+ ),\n+ byrow=TRUE, ncol=4\n+ )\n+\n+opt = getopt(spec);\n+\n+# No argument is pass to the command line\n+if(length(opt) == 1)\n+{\n+ cat(paste("Usage:\\n estimateSign_Galaxy.r --input <matrix> --stop <maxNbSign> --cpu <cpu> --output <output_filename.png>\\n",sep=""))\n+ q(status=1)\n+}\n+\n+# Help was asked for.\n+if ( !is.null(opt$help) )\n+{\n+ # print a friendly message and exit with a non-zero error code\n+ cat(paste("Usage:\\n estimateSign_Galaxy.r --input <matrix> --stop <maxNbSign> --cpu <cpu> --output <output_filename.png>\\n",sep=""))\n+ q(status=1)\n+}\n+\n+\n+\n+#-------------------------------------------------------------------------------\n+# Load library\n+#-------------------------------------------------------------------------------\n+suppressMessages(suppressWarnings(library(NMF)))\n+\n+\n+ ###############################################################################\n+ # Load the functions #\n+ ###############################################################################\n+\n+#-------------------------------------------------------------------------------\n+# Check the file doesn\'t have lines equal to zero\n+#-------------------------------------------------------------------------------\n+CheckFile <- function(rowsum, dataFrame, x)\n+{\n+ if(rowsum == 0)\n+ {\n+ write("\\n\\nERROR: There is not enough mutations for running NMF!!!", stderr())\n+ write(paste0("Input matrix contains at least one null row ", rownames(dataFrame)[x], "\\n\\n"), stderr())\n+ stop()\n+ }\n+}\n+\n+\n+ ###############################################################################\n+ # Check file #\n+ ###############################################################################\n+\n+# The input musn\'t contains lines equal to zero !!!\n+matrixNMF <- read.table(opt$input, header=T)\n+# suppresses the return of sapply function\n+invisible( sapply(1:nrow(matrixNMF), function(x) { CheckFile(rowSums(matrixNMF)[x], matrixNMF, x) } ) )\n+\n+\n+\n+ ###############################################################################\n+ # Estimate the number of signatures #\n+ ###############################################################################\n+# Estimate the number of signatures with our data\n+nbCPU <- paste0("vP", opt$cpu)\n+nbSign <- 2:opt$stop # The minum number of signatures can\'t be lower than 2\n+\n+estim_r <- nmf(matrixNMF, method="brunet", nbSign, nrun=50, .opt=nbCPU)\n+\n+# Shuffle original data\n+v_random <- randomize(matrixNMF)\n+# Estimate quality measures from the shuffled data\n+estim_r_random <- nmf(v_random, method="brunet", nbSign, nrun=50, .opt=nbCPU)\n+\n+# Plot the estimation for our data and the random ones\n+graphics.off()\n+options(bitmapType=\'cairo\')\n+png(opt$output, width=3000, height=2000, res=300)\n+plot(estim_r, estim_r_random)\n+invisible( dev.off() )\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c R/figs_MutSpecStat_Galaxy.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/R/figs_MutSpecStat_Galaxy.r Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -0,0 +1,218 @@\n+#!/usr/bin/Rscript\n+\n+#-----------------------------------#\n+# Author: Maude #\n+# Script: figs_MutSpecStat_Galaxy.r #\n+# Last update: 18/10/16 #\n+#-----------------------------------#\n+\n+\n+#########################################################################################################################################\n+# Create the figures for the report and the HTML page #\n+#########################################################################################################################################\n+\n+\n+#-------------------------------------------------------------------------------\n+# Load library for recovering the arguments\n+#-------------------------------------------------------------------------------\n+suppressMessages(suppressWarnings(require("getopt")))\n+\n+\n+\n+#-------------------------------------------------------------------------------\n+# Recover the arguments\n+#-------------------------------------------------------------------------------\n+spec = matrix(c(\n+ "folderFigure", "folderFigure", 1, "character",\n+ "folderTemp", "folderTemp", 1, "character",\n+ "filename", "filename", 1, "character",\n+ "help", "h", 0, "logical"\n+),\n+byrow=TRUE, ncol=4\n+)\n+\n+opt = getopt(spec)\n+\n+# No argument is pass to the command line\n+if(length(opt) == 1)\n+{\n+ cat(paste("Usage:\\n figs_MutSpecStat_Galaxy.r --folderFigure <path_to_folder> --folderTemp <path_to_tempFolder> --filename <filename> \\n",sep=""))\n+ q(status=1)\n+}\n+\n+# Help was asked for.\n+if ( !is.null(opt$help) )\n+{\n+ # print a friendly message and exit with a non-zero error code\n+ cat(paste("Usage:\\n figs_MutSpecStat_Galaxy.r --folderFigure <path_to_folder> --filename <filename> \\n",sep=""))\n+ q(status=1)\n+}\n+\n+\n+\n+#-------------------------------------------------------------------------------\n+# Load library\n+#-------------------------------------------------------------------------------\n+suppressMessages(suppressWarnings(library(ggplot2)))\n+suppressMessages(suppressWarnings(library(gplots)))\n+suppressMessages(suppressWarnings(library(gtable)))\n+suppressMessages(suppressWarnings(library(grid)))\n+\n+\n+\n+\n+#-------------------------------------------------------------------------------\n+# OVERALL MUTATION DISTRIBUTION\n+#-------------------------------------------------------------------------------\n+inputDistrMut <- paste0(opt$folderFigure, "/Overall_mutation_distribution/", opt$filename, "/", opt$filename, "-OverallMutationDistribution.txt")\n+outputDistrMut <- paste0(opt$folderFigure, "/Overall_mutation_distribution/", opt$filename, "/", opt$filename, "-OverallMutationDistribution.png")\n+\n+# Load the input file\n+distrMut <- read.table(inputDistrMut, header=T)\n+\n+# Add the count of each category in the legend\n+distrMut$Legend[[1]] <- paste0(distrMut$Variant_type[[1]], " (", distrMut$Count[[1]], ")")\n+distrMut$Legend[[2]] <- paste0(distrMut$Variant_type[[2]], " (", distrMut$Count[[2]], ")")\n+distrMut$Legend[[3]] <- paste0(distrMut$Variant_type[[3]], " (", distrMut$Count[[3]], ")")\n+\n+# Base plot\n+pie <- ggplot(distrMut, aes(x=factor(""), fill=Legend, weight=Count)) + geom_bar(width=1) + coord_polar(theta="y") + scale_x_discrete("", breaks=NULL) + scale_y_continuous("", breaks=NULL) + labs(fill="")\n+# Background of the plot entire white\n+pie <- pie + theme(panel.grid.major = element_line(colour="white"), panel.grid.minor = element_line(colour="white"), panel.background = element_rect(fill="white"))\n+# Legend on right in 3 rows\n+pie <- pie + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=3))\n+# Change the color and the title of the legend\n+pie <- pie + scale_fill_brewer("Variant type", palette="Set1")\n+# Remove all the margins\n+pie <- pie + theme(plot.margin=unit(c(-1, 0, -1.5, 0), "cm"))\n+# Save the pie chart for the HTML page (higher resolution)\n+options(bitmapType=\'cairo\') # Use cai'..b'-------------------------------------------------------------------\n+inputSB <- paste0(opt$folderFigure, "/Stranded_Analysis/", opt$filename, "/", opt$filename, "-StrandBias.txt")\n+outputSB <- paste0(opt$folderFigure, "/Stranded_Analysis/", opt$filename, "/", opt$filename, "-StrandBias.png")\n+outputSBReport <- paste0(opt$folderTemp, "/", opt$filename, "-StrandBias-Report.png")\n+\n+# Load the input file\n+file_sb <- read.table(inputSB, header=T)\n+# Custom palette (blue, red)\n+cb_palette_SB <- c("#0072B2", "#CC0000")\n+# Base plot\n+p_sb <- ggplot(file_sb, aes(x=Alteration, y=Count, fill=Strand)) + theme_classic() + geom_bar(stat="identity", position="dodge") + scale_fill_manual(values=cb_palette_SB) + theme(axis.text.x = element_text(angle=60, hjust=1)) + xlab("") + theme(legend.position="bottom")\n+# Save the plot for the HTML page (higher resolution)\n+png(outputSB, width=1000, height=1200, res=300)\n+print(p_sb)\n+dev.off()\n+# Save the plot for the report\n+p_sb\n+ggsave(outputSBReport)\n+\n+\n+\n+\n+#-------------------------------------------------------------------------------\n+# HEATMAP SEQUENCE CONTEXT - GENOMIC STRAND\n+#-------------------------------------------------------------------------------\n+inputHeatmapGenomic <- paste0(opt$folderFigure, "/Trinucleotide_Sequence_Context/", opt$filename, "/", opt$filename, "-HeatmapCount-Genomic.txt")\n+outputHeatmapGenomic <- paste0(opt$folderFigure, "/Trinucleotide_Sequence_Context/", opt$filename, "/", opt$filename, "-HeatmapCount-Genomic.png")\n+outputHeatmapGenomicReport <- paste0(opt$folderTemp, "/", opt$filename, "-HeatmapCount-Genomic-Report.png")\n+\n+inputHeatmapGenomicPercent <- paste0(opt$folderFigure, "/Trinucleotide_Sequence_Context/", opt$filename, "/", opt$filename, "-HeatmapPercent-Genomic.txt")\n+outputHeatmapGenomicPercent <- paste0(opt$folderFigure, "/Trinucleotide_Sequence_Context/", opt$filename, "/", opt$filename, "-HeatmapPercent-Genomic.png")\n+outputHeatmapGenomicPercentReport <- paste0(opt$folderTemp, "/", opt$filename, "-HeatmapPercent-Genomic-Report.png")\n+\n+\n+## COUNT\n+heatmap_C <- read.table(inputHeatmapGenomic, header=T)\n+# Save the plot for the report\n+png(filename=outputHeatmapGenomicReport, bg="transparent", width=240, height=360)\n+# Heatmap with an absolute scale\n+heatmap.2(as.matrix(heatmap_C),Rowv=F,Colv=F,col=colorpanel(384,low="yellow",high="red"),dendrogram="none",scale="none",trace="none",key=F,labRow=rownames(as.matrix(heatmap_C)),labCol=colnames(as.matrix(heatmap_C)),lmat=rbind(c(5,1,4),c(3,1,2)), lhei=c(0.75,0.75),lwid=c(0.5,1.5,0.5))\n+dev.off()\n+# Save the plot for the HTML page (higher resolution)\n+png(filename=outputHeatmapGenomic, width=1100, height=1600, res=300)\n+heatmap.2(as.matrix(heatmap_C),Rowv=F,Colv=F,col=colorpanel(384,low="yellow",high="red"),dendrogram="none",scale="none",trace="none",key=F,labRow=rownames(as.matrix(heatmap_C)),labCol=colnames(as.matrix(heatmap_C)),lmat=rbind(c(5,1,4),c(3,1,2)), lhei=c(0.75,0.75),lwid=c(0.5,1.5,0.5))\n+dev.off()\n+\n+## PERCENT\n+heatmap_P <- read.table(inputHeatmapGenomicPercent, header=T)\n+# Save the plot for the report\n+png(filename=outputHeatmapGenomicPercentReport,bg="transparent", width=240, height=360)\n+# Heatmap with an absolute scale\n+heatmap.2(as.matrix(heatmap_P),Rowv=F,Colv=F,col=colorpanel(384,low="yellow",high="red"),dendrogram="none",scale="none",trace="none",key=F,labRow=rownames(as.matrix(heatmap_P)),labCol=colnames(as.matrix(heatmap_P)),lmat=rbind(c(5,1,4),c(3,1,2)), lhei=c(0.75,0.75),lwid=c(0.5,1.5,0.5))\n+dev.off()\n+# Save the plot for the HTML page (higher resolution)\n+png(filename=outputHeatmapGenomicPercent, width=1100, height=1600, res=300)\n+heatmap.2(as.matrix(heatmap_P),Rowv=F,Colv=F,col=colorpanel(384,low="yellow",high="red"),dendrogram="none",scale="none",trace="none",key=F,labRow=rownames(as.matrix(heatmap_P)),labCol=colnames(as.matrix(heatmap_P)),lmat=rbind(c(5,1,4),c(3,1,2)), lhei=c(0.75,0.75),lwid=c(0.5,1.5,0.5))\n+dev.off()\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c R/mutationSpectra_Galaxy.r --- a/R/mutationSpectra_Galaxy.r Tue Jun 28 02:59:32 2016 -0400 +++ b/R/mutationSpectra_Galaxy.r Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -1,203 +1,203 @@\n-#!/usr/bin/Rscript\r\n-\r\n-#-----------------------------------#\r\n-# Author: Maude #\r\n-# Script: mutationSpectra_Galaxy.r #\r\n-# Last update: 23/07/15 #\r\n-#-----------------------------------#\r\n-\r\n-#########################################################################################################################################\r\n-# Represent the mutation spectra with a bar graph #\r\n-#########################################################################################################################################\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Print a usage message if there is no argument pass to the command line\r\n-#-------------------------------------------------------------------------------\r\n-args <- commandArgs(TRUE)\r\n-usage <- function() \r\n-{\r\n- msg <- paste0(\'Usage:\\n\',\r\n- \' mutationSpectra_Galaxy.r input_Mutation_Spectra Sample_Name Output_Folder_High_Resolution Output_Folder_Low_Resolution Count_ca Count_cg Count_ta Count_tc Count_tg\\n\',\r\n- \'\\ninput_Mutation_Spectra should be tab-separated: alteration context value\\n\',\r\n- \'\\nOutput_Folder_High_Resolution: Folder for saving the high resolution image (display on the HTML page)\\n\',\r\n- \'\\nOutput_Folder_Low_Resolution: Folder for saving the low resolution image (display on the Excel report)\\n\'\r\n- )\r\n- cat(msg, \'\\n\', file="/dev/stderr")\r\n- quit(status=1)\r\n-}\r\n-\r\n-input = args[length(args)]\r\n-\r\n-if (length(args) == 0) { usage() }\r\n-\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Load library\r\n-#-------------------------------------------------------------------------------\r\n-suppressMessages(suppressWarnings(library(ggplot2)))\r\n-suppressMessages(suppressWarnings(library(reshape)))\r\n-suppressMessages(suppressWarnings(library(grid)))\r\n-suppressMessages(suppressWarnings(library(scales)))\r\n-suppressMessages(suppressWarnings(library(gridExtra)))\r\n-\r\n-\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Recover the arguments\r\n-#-------------------------------------------------------------------------------\r\n-input <- args[1]\r\n-sampleName <- args[2]\r\n-output_html <- args[3]\r\n-output_report <- args[4]\r\n-count_ca <- as.numeric(args[5])\r\n-count_cg <- as.numeric(args[6])\r\n-count_ct <- as.numeric(args[7])\r\n-count_ta <- as.numeric(args[8])\r\n-count_tc <- as.numeric(args[9])\r\n-count_tg <- as.numeric(args[10])\r\n-\r\n-count_ca <- paste("C>A (", count_ca, ")")\r\n-count_cg <- paste("C>G (", count_cg, ")")\r\n-count_ct <- paste("C>T (", count_ct, ")")\r\n-count_ta <- paste("T>A (", count_ta, ")")\r\n-count_tc <- paste("T>C (", count_tc, ")")\r\n-count_tg <- paste("T>G (", count_tg, ")")\r\n-\r\n-\r\n-\r\n- ###############################################################################\r\n- # Load the functions #\r\n- ###############################################################################\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Set the font depending on X11 availability\r\n-#-------------------------------------------------------------------------------\r\n-font <- ""\r\n-# Check the device available\r\n-device <- capabilities()\r\n-# X11 is available\r\n-if(device[5]) { font <- "Helvetica" } else { font <- "mono" }\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# My own thme\r\n-#-------------------------------------------'..b'<- ggplot(matrixW_melt, aes(x=context, y=value, fill=alteration)) + geom_bar(stat="identity", width=0.5) + facet_grid(variable ~ alteration, scales="free_y")\n+# Color the mutation like Alexandrov et al.\n+p <- p + scale_fill_manual(values=c("blue", "black", "red", "#828282", "#00CC33", "pink"))\n+# Remove the legend\n+p <- p + guides(fill=FALSE)\n+# customized theme (no background, no facet grid and strip, y axis only)\n+p <- p + mytheme\n+# Remove the title of the x facet strip\n+p <- p + theme(strip.text.x=element_blank(), strip.text.y=element_blank())\n+# Remove the x axis label, thicks and title\n+p <- p + theme(axis.title.x=element_blank(), axis.ticks.x=element_blank(), axis.title.y=element_text(size=15))\n+# Scale the y axis to the maximum value\n+p <- p + scale_y_continuous(limits=c(0,max_matrixW), oob=squish, breaks=c(0,max_matrixW), labels=fmt())\n+# Rename the y axis\n+p <- p + ylab("percent")\n+# Add a title to the plot\n+p <- p + ggtitle(sampleName) + theme(plot.title = element_text(vjust = 3.4, family=font))\n+# Add a top margin for writing the title of the plot\n+p <- p + theme(plot.margin=unit(c(.7,0,0,0), "cm"))\n+p <- p + scale_x_discrete(breaks = c("A_A","A_C","A_G","A_T", "C_A","C_C","C_G","C_T", "G_A","G_C","G_G","G_T", "T_A","T_C","T_G","T_T"),\n+\t\t labels =c(\'A\\nA\',"\\nC","\\nG","\\nT", \'C\\nA\',"\\nC","\\nG","\\nT",\n+\t\t\t \t \'G\\nA\',"\\nC","\\nG","\\nT", \'T\\nA\',"\\nC","\\nG","\\nT"\n+ )\n+ )\n+\n+#------------------------------------------------------------------------------------------------------------------------------\n+# Change the color of the facets for the mutation type\n+#------------------------------------------------------------------------------------------------------------------------------\n+cols <- rep( c("blue", "black", "red", "#828282", "#00CC33", "pink")) # Facet strip colours\n+\n+# Make a grob object\n+Pg <- ggplotGrob(p)\n+# To keep track of strip.background grobs\n+idx <- 0\n+# Find each strip.background and alter its backround colour\n+for( g in 1:length(Pg$grobs) )\n+{\n+\tif( grepl( "strip.absoluteGrob" , Pg$grobs[[g]]$name ) )\n+\t{\n+\t\tidx <- idx + 1\n+\t\tsb <- which( grepl( "strip\\\\.background" , names( Pg$grobs[[g]]$children ) ) )\n+\t\tPg$grobs[[g]]$children[[sb]][]$gp$fill <- cols[idx]\n+\t}\n+}\n+\n+# Reduce the size of the facet strip\n+Pg$heights[[3]] = unit(.1,"cm")\n+\n+\n+# Save the plot for the HTML page (higher resolution)\n+graphics.off() # close graphics windows\n+# Use cairo device as isn\'t possible to install X11 on the server...\n+png(paste0(output_html, "/", sampleName, "-MutationSpectraPercent-Genomic.png"), width=3500, heigh=500, res=300, type=c("cairo-png"))\n+plot(Pg)\n+## Add label for the mutation type above the strip facet\n+grid.text(0.13, unit(0.90,"npc") - unit(1,"line"), label=count_ca)\n+grid.text(0.29, unit(0.90,"npc") - unit(1,"line"), label=count_cg)\n+grid.text(0.45, unit(0.90,"npc") - unit(1,"line"), label=count_ct)\n+grid.text(0.6, unit(0.90,"npc") - unit(1,"line"), label=count_ta)\n+grid.text(0.76, unit(0.90,"npc") - unit(1,"line"), label=count_tc)\n+grid.text(0.92, unit(0.90,"npc") - unit(1,"line"), label=count_tg)\n+invisible( dev.off() )\n+\n+# Save the plot for the report\n+png(paste0(output_report, "/", sampleName, "-MutationSpectraPercent-Genomic-Report.png"), width=1000, heigh=150, type=c("cairo-png"))\n+plot(Pg)\n+## Add label for the mutation type above the strip facet\n+grid.text(0.13, unit(0.90,"npc") - unit(1,"line"), label=count_ca)\n+grid.text(0.29, unit(0.90,"npc") - unit(1,"line"), label=count_cg)\n+grid.text(0.45, unit(0.90,"npc") - unit(1,"line"), label=count_ct)\n+grid.text(0.6, unit(0.90,"npc") - unit(1,"line"), label=count_ta)\n+grid.text(0.76, unit(0.90,"npc") - unit(1,"line"), label=count_tc)\n+grid.text(0.92, unit(0.90,"npc") - unit(1,"line"), label=count_tg)\n+invisible( dev.off() )\n+\n+# Delete the empty plot created by the script\n+if (file.exists("Rplots.pdf")) invisible( file.remove("Rplots.pdf") )\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c R/somaticSignature_Galaxy.r --- a/R/somaticSignature_Galaxy.r Tue Jun 28 02:59:32 2016 -0400 +++ b/R/somaticSignature_Galaxy.r Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -1,467 +1,577 @@\n-#!/usr/bin/Rscript\r\n-\r\n-#-----------------------------------#\r\n-# Author: Maude #\r\n-# Script: somaticSignature_Galaxy.r #\r\n-# Last update: 29/07/15 #\r\n-#-----------------------------------#\r\n-\r\n-\r\n-#########################################################################################################################################\r\n-# Run NMF algorithm and represent the composition of somatic signatures and the contribution in each samples #\r\n-#########################################################################################################################################\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Load library for recovering the arguments\r\n-#-------------------------------------------------------------------------------\r\n-suppressMessages(suppressWarnings(require("getopt")))\r\n-\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Recover the arguments\r\n-#-------------------------------------------------------------------------------\r\n-spec = matrix(c(\r\n- "input" , "i", 1, "character",\r\n- "nbSignature", "nbSign", 1, "integer",\r\n- "cpu", "cpu", 1, "integer",\r\n- "output", "o", 1, "character",\r\n- "help", "h", 0, "logical"\r\n- ),\r\n- byrow=TRUE, ncol=4\r\n- )\r\n-\r\n-opt = getopt(spec);\r\n-\r\n-# No argument is pass to the command line\r\n-if(length(opt) == 1)\r\n-{\r\n- cat(paste("Usage:\\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir>\\n",sep=""))\r\n- q(status=1)\r\n-}\r\n-\r\n-# Help was asked for.\r\n-if ( !is.null(opt$help) )\r\n-{\r\n- # print a friendly message and exit with a non-zero error code\r\n- cat(paste("Usage:\\n somaticSignature_Galaxy.r --input <matrix> --nbSignature <nbSign> --cpu <cpu> --output <outputdir>\\n",sep=""))\r\n- q(status=1)\r\n-}\r\n-\r\n-\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Load library\r\n-#-------------------------------------------------------------------------------\r\n-suppressMessages(suppressWarnings(library(NMF)))\r\n-suppressMessages(suppressWarnings(library(ggplot2)))\r\n-suppressMessages(suppressWarnings(library(reshape)))\r\n-suppressMessages(suppressWarnings(library(grid)))\r\n-suppressMessages(suppressWarnings(library(scales))) # Set the maximum value to the y axis (graph composition somatic signature)\r\n-suppressMessages(suppressWarnings(library(gridExtra))) # function "unit"\r\n-\r\n-\r\n-\r\n- ###############################################################################\r\n- # Load the functions #\r\n- ###############################################################################\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Set the font depending on X11 availability\r\n-#-------------------------------------------------------------------------------\r\n-font <- ""\r\n-# Check the device available\r\n-device <- capabilities()\r\n-# X11 is available\r\n-if(device[5]) { font <- "Helvetica" } else { font <- "Helvetica-Narrow" }\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# My own theme\r\n-#-------------------------------------------------------------------------------\r\n-theme_custom <- function(base_size = 4, base_family = "")\r\n-{\r\n- # Starts with theme_grey and then modify some parts\r\n- theme_grey(base_size = base_size, base_family = base_family) %+replace%\r\n- theme(\r\n- axis.text = element_text(size = rel(0.8), family=font),\r\n- axis.ticks = element_line(colour = "black", size=.2),\r\n- axis.line = element_line('..b'F")\n+\n+ write("<html><body>", file=opt$html)\n+ write("<center> <h2> NMF Mutational signatures analysis </h2> </center>", file=opt$html, append=TRUE)\n+\n+ write("<br/> Download the results", file=opt$html, append=TRUE)\n+ write("<br/><a href=NMF.zip>NMF.zip</a><br/>", file=opt$html, append=TRUE)\n+\n+ #### Heatmap\n+ write("<table>", file=opt$html, append=TRUE)\n+ write("<tr> <br/> <th><h3>Heatmap of the mixture coefficient matrix</h3></th> </tr>", file=opt$html, append=TRUE)\n+ write(paste0("<tr> <td> <center> <br/> <a href=", output_cluster_html, ">Cluster_MixtureCoeff.txt</a> </center> </td> </tr>"), file=opt$html, append=TRUE)\n+ write("<tr>", file=opt$html, append=TRUE)\n+\n+ if(!file.exists(figure_cluster))\n+ {\n+ write("WARNING: NMF package can\'t plot the heatmap when the samples size is above 300. <br/>", file=opt$html, append=TRUE)\n+ }else{\n+ write(paste0("<td> <center> <a href=", figure_cluster_html, ">"), file=opt$html, append=TRUE)\n+ write(paste0("<img src=", figure_cluster_html, "/></a> <center> </td>"), file=opt$html, append=TRUE)\n+ }\n+ write("</tr>", file=opt$html, append=TRUE)\n+ write("</table>", file=opt$html, append=TRUE)\n+\n+ ### Signature composition\n+ write("<br/><br/>", file=opt$html, append=TRUE)\n+ write("<table>", file=opt$html, append=TRUE)\n+ write("<tr>", file=opt$html, append=TRUE)\n+ write("<th><h3>Signature composition</h3></th>", file=opt$html, append=TRUE)\n+ write("</tr>", file=opt$html, append=TRUE)\n+ write(paste0("<tr><td>", evar_round, "% of the variance is explained with ", opt$nbSignature, " signatures", "</td></tr>"), file=opt$html, append=TRUE)\n+ write("<tr height=15></tr>", file=opt$html, append=TRUE)\n+ write(paste0("<tr><td> <center> <a href=", output_matrixW_html ,">Composition somatic mutation (input matrix for the tool MutSpec-Compare)</a><center></td></tr>"), file=opt$html, append=TRUE)\n+ write("<tr>", file=opt$html, append=TRUE)\n+ write(paste0("<td><a href=", figure_matrixW_png_html, ">"), file=opt$html, append=TRUE)\n+ write(paste0("<img width=1000 src=", figure_matrixW_png_html, "/></a></td>"), file=opt$html, append=TRUE)\n+ write("</tr>\t", file=opt$html, append=TRUE)\n+ write("</table>", file=opt$html, append=TRUE)\n+ write("<br/><br/>", file=opt$html, append=TRUE)\n+\n+ ### Sample contribution to signatures\n+ write("<table>", file=opt$html, append=TRUE)\n+ write("<tr>", file=opt$html, append=TRUE)\n+ write("<th><h3>Sample contribution to signatures</h3></th>", file=opt$html, append=TRUE)\n+ write("</tr>", file=opt$html, append=TRUE)\n+ write(paste0("<tr><td> <center> <a href=", output_matrixH_ggplot2_html, ">Contribution mutation signature matrix</a></center></td></tr>"), file=opt$html, append=TRUE)\n+ write("<tr>", file=opt$html, append=TRUE)\n+ write(paste0("<td><a href=", figure_matrixH_png_html, ">"), file=opt$html, append=TRUE)\n+ write(paste0("<img width=700 src=", figure_matrixH_png_html, "/></a></td>"), file=opt$html, append=TRUE)\n+ write("</tr>", file=opt$html, append=TRUE)\n+ write("</table>", file=opt$html, append=TRUE)\n+ write("<br/><br/>", file=opt$html, append=TRUE)\n+\n+ ### Average contributions of each signatures in each cluster\n+ write("<table>", file=opt$html, append=TRUE)\n+ write("<tr>", file=opt$html, append=TRUE)\n+ write("<th><h3>Average contributions of each signatures in each cluster</h3></th>", file=opt$html, append=TRUE)\n+ write(paste0("<tr><td> <center> <a href=", output_matrixH_cluster_html, ">Average contributions</a></center></td></tr>"), file=opt$html, append=TRUE)\n+ write("<tr>", file=opt$html, append=TRUE)\n+ write(paste0("<td><a href=", figure_matrixH_cluster_html, ">"), file=opt$html, append=TRUE)\n+ write(paste0("<img width=700 src=", figure_matrixH_cluster_html, "/></a></td>"), file=opt$html, append=TRUE)\n+ write("</tr>\t", file=opt$html, append=TRUE)\n+ write("</table>", file=opt$html, append=TRUE)\n+ write("<br/><br/>", file=opt$html, append=TRUE)\n+\n+ write("<br/><br/><br/><br/>", file=opt$html, append=TRUE)\n+}\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c R/transciptionalStrandBias.r --- a/R/transciptionalStrandBias.r Tue Jun 28 02:59:32 2016 -0400 +++ b/R/transciptionalStrandBias.r Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -1,144 +1,145 @@\n-#!/usr/bin/Rscript\r\n-\r\n-#---------------------------------------------#\r\n-# Author: Maude #\r\n-# Script: transcriptionalStrandBias_Galaxy.r #\r\n-# Last update: 03/07/15 #\r\n-#---------------------------------------------#\r\n-\r\n-#########################################################################################################################################\r\n-# Transcriptional strand bias #\r\n-#########################################################################################################################################\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Print a usage message if there is no argument pass to the command line\r\n-#-------------------------------------------------------------------------------\r\n-args <- commandArgs(TRUE)\r\n-usage <- function() \r\n-{\r\n- msg <- paste0(\'Usage:\\n\',\r\n- \' transcriptionalStrandBias_Galaxy.r input Output_Folder_High_Resolution Output_Folder_Low_Resolution Label_Y_axis\\n\',\r\n- \'\\ninput should be tab-separated: MutationTypeContext Strand Value Sample\\n\',\r\n- \'\\nOutput_Folder_High_Resolution: Folder for saving the high resolution image (display on the HTML page)\\n\',\r\n- \'\\nOutput_Folder_Low_Resolution: Folder for saving the low resolution image (display on the Excel report)\\n\',\r\n- \'\\nLabel_Y_axis: can be Count or Percent\'\r\n- )\r\n- cat(msg, \'\\n\', file="/dev/stderr")\r\n- quit(status=1)\r\n-}\r\n-\r\n-input = args[length(args)]\r\n-\r\n-if (length(args) == 0) { usage() }\r\n-\r\n-\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Load library\r\n-#-------------------------------------------------------------------------------\r\n-suppressMessages(suppressWarnings(library(ggplot2)))\r\n-suppressMessages(suppressWarnings(library(gridExtra)))\r\n-\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Recover the argument pass in the command line\r\n-#-------------------------------------------------------------------------------\r\n-input <- args[1]\r\n-output <- args[2]\r\n-output_temp <- args[3] # Temp folder for the plot present in the Excel report\r\n-legend_y_axis <- args[4]\r\n-\r\n-\r\n-#-------------------------------------------------------------------------------\r\n-# Create the plot\r\n-#-------------------------------------------------------------------------------\r\n-## Load the data\r\n-txnSB <- read.table(input, header=T)\r\n-## Define the color for the transcribed (blue) and non-transcribed strand(red)\r\n-cb_palette_SB <- c("#0072B2", "#CC0000")\r\n-## Reorder the mutation on the x axis (same order as NMF)\r\n-txnSB$MutationTypeContext <- factor(txnSB$MutationTypeContext,\r\n- levels=c(\r\n- "C>A:A_A","C>A:A_C","C>A:A_G","C>A:A_T","C>A:C_A","C>A:C_C","C>A:C_G","C>A:C_T","C>A:G_A","C>A:G_C","C>A:G_G","C>A:G_T","C>A:T_A","C>A:T_C","C>A:T_G","C>A:T_T",\r\n- "C>G:A_A","C>G:A_C","C>G:A_G","C>G:A_T","C>G:C_A","C>G:C_C","C>G:C_G","C>G:C_T","C>G:G_A","C>G:G_C","C>G:G_G","C>G:G_T","C>G:T_A","C>G:T_C","C>G:T_G","C>G:T_T",\r\n- "C>T:A_A","C>T:A_C","C>T:A_G","C>T:A_T","C>T:C_A","C>T:C_C","C>T:C_G","C>T:C_T","C>T:G_A","C>T:G_C","C>T:G_G","C>T:G_T","C>T:T_A","C>T:T_C","C>T:T_G","C>T:T_T",\r\n- "T>A:A_A","T>A:A_C","T>A:A_G","T>A:A_T","T>A:C_A","T>A:C_C","T>A:C_G","T>A:C_T","T>A:G_A","T>A:G_C","T>A:G_G","T>A:G_T","T>A:T_A","T>A:T_C","T>A:T_G","T>A:T_T",\r\n- "T>C:A_A","T>C:A_C","T>C:A_G","T>C:A_T","T>C:C_A","T>C:C_C","T>C:C_G","T>C:C_T","T>C:G_A","T>C:G_C","T>C:G_G","T>C:G_T","T>C:T_A","T>C:T_C","T>C:T_G","T>C:T_T",\r\n- "T>G:A_A","T>G:A_C","T>G:A_G","T>G:A_T","T>G:C_A","T>G:C_C","T>G:C_G","T>G:C_T","T>G:G_A","T>G:G_C","T>G:G_G","T>G:G_T","T>G:T_A","T>G:T_C",'..b'"G>A:C_G","G>A:C_T","G>A:G_A","G>A:G_C","G>A:G_G","G>A:G_T","G>A:T_A","G>A:T_C","G>A:T_G","G>A:T_T",\n+ "G>C:A_A","G>C:A_C","G>C:A_G","G>C:A_T","G>C:C_A","G>C:C_C","G>C:C_G","G>C:C_T","G>C:G_A","G>C:G_C","G>C:G_G","G>C:G_T","G>C:T_A","G>C:T_C","G>C:T_G","G>C:T_T",\n+ "G>T:A_A","G>T:A_C","G>T:A_G","G>T:A_T","G>T:C_A","G>T:C_C","G>T:C_G","G>T:C_T","G>T:G_A","G>T:G_C","G>T:G_G","G>T:G_T","G>T:T_A","G>T:T_C","G>T:T_G","G>T:T_T",\n+ "T>A:A_A","T>A:A_C","T>A:A_G","T>A:A_T","T>A:C_A","T>A:C_C","T>A:C_G","T>A:C_T","T>A:G_A","T>A:G_C","T>A:G_G","T>A:G_T","T>A:T_A","T>A:T_C","T>A:T_G","T>A:T_T",\n+ "T>C:A_A","T>C:A_C","T>C:A_G","T>C:A_T","T>C:C_A","T>C:C_C","T>C:C_G","T>C:C_T","T>C:G_A","T>C:G_C","T>C:G_G","T>C:G_T","T>C:T_A","T>C:T_C","T>C:T_G","T>C:T_T",\n+ "T>G:A_A","T>G:A_C","T>G:A_G","T>G:A_T","T>G:C_A","T>G:C_C","T>G:C_G","T>G:C_T","T>G:G_A","T>G:G_C","T>G:G_G","T>G:G_T","T>G:T_A","T>G:T_C","T>G:T_G","T>G:T_T"\n+ \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t),\n+ \t\t\t\t\t\t\t\t\t\tlabels=c(\n+ "A_A","A_C","A_G","A_T","C_A","C_C","C_G","C_T","G_A","G_C","G_G","G_T","T_A","T_C","T_G","T_T",\n+ "A_A","A_C","A_G","A_T","C_A","C_C","C_G","C_T","G_A","G_C","G_G","G_T","T_A","T_C","T_G","T_T",\n+ "A_A","A_C","A_G","A_T","C_A","C_C","C_G","C_T","G_A","G_C","G_G","G_T","T_A","T_C","T_G","T_T",\n+ "A_A","A_C","A_G","A_T","C_A","C_C","C_G","C_T","G_A","G_C","G_G","G_T","T_A","T_C","T_G","T_T",\n+ "A_A","A_C","A_G","A_T","C_A","C_C","C_G","C_T","G_A","G_C","G_G","G_T","T_A","T_C","T_G","T_T",\n+ "A_A","A_C","A_G","A_T","C_A","C_C","C_G","C_T","G_A","G_C","G_G","G_T","T_A","T_C","T_G","T_T",\n+ "A_A","A_C","A_G","A_T","C_A","C_C","C_G","C_T","G_A","G_C","G_G","G_T","T_A","T_C","T_G","T_T",\n+ "A_A","A_C","A_G","A_T","C_A","C_C","C_G","C_T","G_A","G_C","G_G","G_T","T_A","T_C","T_G","T_T",\n+ "A_A","A_C","A_G","A_T","C_A","C_C","C_G","C_T","G_A","G_C","G_G","G_T","T_A","T_C","T_G","T_T",\n+ "A_A","A_C","A_G","A_T","C_A","C_C","C_G","C_T","G_A","G_C","G_G","G_T","T_A","T_C","T_G","T_T",\n+ "A_A","A_C","A_G","A_T","C_A","C_C","C_G","C_T","G_A","G_C","G_G","G_T","T_A","T_C","T_G","T_T",\n+ "A_A","A_C","A_G","A_T","C_A","C_C","C_G","C_T","G_A","G_C","G_G","G_T","T_A","T_C","T_G","T_T"\n+ \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t)\n+ \t\t\t\t\t\t\t\t\t )\n+## Changing the appearance of x axis thicks\n+p_txnSB <- p_txnSB + theme(axis.text.x = element_text(angle=60, hjust=1, vjust=1))\n+## Close graphics windows\n+graphics.off()\n+## Save the plot for the HTML page (higher resolution)\n+options(bitmapType=\'cairo\') # # Use cairo device as isn\'t possible to install X11 on the server...\n+png(paste0(output, ".png"), width=4000, height=1000, res=300)\n+plot(p_txnSB)\n+# Add a label bellow the bar graph for indicating the mutation type\n+grid.text(paste("C>A", sep=""), x=unit(.14, "npc"), y=unit(.7, "npc"), just=c("left", "bottom"), gp=gpar(fontface="bold",fontsize=10))\n+grid.text(paste("C>G", sep=""), x=unit(.29, "npc"), y=unit(.7, "npc"), just=c("left", "bottom"), gp=gpar(fontface="bold",fontsize=10))\n+grid.text(paste("C>T", sep=""), x=unit(.45, "npc"), y=unit(.7, "npc"), just=c("left", "bottom"), gp=gpar(fontface="bold",fontsize=10))\n+grid.text(paste("T>A", sep=""), x=unit(.58, "npc"), y=unit(.7, "npc"), just=c("left", "bottom"), gp=gpar(fontface="bold",fontsize=10))\n+grid.text(paste("T>C", sep=""), x=unit(.74, "npc"), y=unit(.7, "npc"), just=c("left", "bottom"), gp=gpar(fontface="bold",fontsize=10))\n+grid.text(paste("T>G", sep=""), x=unit(.9, "npc"), y=unit(.7, "npc"), just=c("left", "bottom"), gp=gpar(fontface="bold",fontsize=10))\n+invisible( dev.off() )\n+\n+\n+\n+# Save the plot for the report\n+p_txnSB\n+ggsave(paste0(output_temp, "-Report.png"), width=18)\n+\n+# Delete the empty plot created by the script\n+if (file.exists("Rplots.pdf")) invisible( file.remove("Rplots.pdf") )\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c README.txt --- a/README.txt Tue Jun 28 02:59:32 2016 -0400 +++ b/README.txt Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -1,86 +1,87 @@\n-==============================\r\n- MutSpec-Suite \r\n-==============================\r\n-\r\n-Created by Maude Ardin and Vincent Cahais (Mechanisms of Carcinogenesis Section, International Agency for Research on Cancer F69372 Lyon France,\r\n-http://www.iarc.fr/)\r\n-\r\n-Version 1.0\r\n-\r\n-Released under GNU public license version 2 (GPL v2)\r\n-\r\n-Package description: Ardin et al. - 2016 - MutSpec: a Galaxy toolbox for streamlined analyses of somatic mutation spectra in human and mouse\r\n-cancer genomes - BMC Bioinformatics\r\n-http://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-1011-z\r\n-\r\n-Test data: https://usegalaxy.org/u/maude-ardin/p/mutspectestdata\r\n-\r\n-\r\n-\r\n-### Requirements\r\n-\r\n-\t# python-dev\r\n-build-essential and python-dev packages must be installed on your machine before installing MutSpec tools:\r\n-$ sudo apt-get install build-essential python-dev\r\n-\r\n-\r\n-\t# Annovar\r\n-If you do not have ANNOVAR installed, you can download it here: http://www.openbioinformatics.org/annovar/annovar_download_form.php\r\n-\r\n-1) Once downloaded, install annovar per the installation instructions and edit the PATH variable in galaxy deamon (/etc/init.d/galaxy)\r\n-to reflect the location of directory containing perl scripts.\r\n-\r\n-2) Create directories for saving Annovar databases\r\n-\t2-a Create a folder (annovardb) for saving all Annovar databases, e.g. hg19db\r\n-\t2-b Create a subfolder (seqFolder) for saving the reference genome, e.g. hg19db/hg19_seq\r\n-\r\n-3) Download the reference genome (by chromosome) from UCSC for all desired builds as follows:\r\n-$ annotate_variation.pl -buildver <build> -downdb seq <seqFolder>\r\n-\r\n-where <build> can be hg18, hg19 or hg38 for the human genome or mm9, mm10 for the mouse genome.\r\n-and <seqFolder> is the location where the sequences (by chromosme) should be stored, e.g. hg19db/hg19_seq\r\n-\r\n-\r\n-4) Download all desired databases for all desired builds as follows:\r\n-$ annotate_variation.pl -buildver <build> [-webfrom annovar] -downdb <database> <annovardb>\r\n-\r\n-/!\\ At least the database refGene must be downloaded /!\\\r\n-\r\n-where <build> can be hg18, hg19 or hg38 for the human genome or mm9, mm10 for the mouse genome.\r\n-and <database> is the database file to download, e.g. refGene\r\n-and <annovardb> is the location where all database files should be stored, e.g. hg19db\r\n-\r\n-The list of all available databases can be found here: http://annovar.openbioinformatics.org/en/latest/user-guide/download/\r\n-\r\n-\r\n-5) Edit the annovar_index.loc file (in the folder galaxy-dist/tool-data/toolshed/repos/iarc/mutspec/revision/) to reflect the location\r\n-of annovardb folder (containing all the databases files downloaded from Annovar).\r\n-Restart galaxy instance for changes in .loc file to take effect or reload it into the admin interface.\r\n-\r\n-6) Edit the file build_listAVDB.txt in the mutspec install directory to reflect the name and the type of the databases installed\r\n-\r\n-\r\n-### Installation\r\n-\r\n-\t# MutSpec-Stat and MutSpec-NMF\r\n-By default 8 CPUs are used by these tools, but you may edit mutspecStat_wrapper.sh and mutspecNmf_wrapper.sh to change this number\r\n-to the maximum number of CPU available on your server.\r\n-\r\n-MutSpec-Stat and MutSpec-NMF tools allow parallel computations that are time consuming.\r\n-It is recommended to use the highest number of cores available on the Galaxy server to reduce the computation time of these tools.\r\n-\r\n-\r\n-\r\n-\r\n-\t# MutSpec-Annot\r\n-The maximum CPU value needs to be specified when installing MutSpec package by editing the file mutspecAnnot.pl to reflect the maximum number\r\n-of CPU available on your server.\r\n-\r\n-This tool may be time consuming for large files. For example, annotating a file of more than 25,000 variants takes 1 hour using 1 CPU (2.6 GHz),\r\n-while annotating this file using 8 CPUs takes only 5 minutes.\r\n-We have optimized MutSpec-Annot so that the tool uses more CPUs, if available, as follows:\r\n--files with less than 5,000 lines: 1 CPU is used\r\n-'..b" v2)\n+\n+Package description: Ardin et al. - 2016 - MutSpec: a Galaxy toolbox for streamlined analyses of somatic mutation spectra in human and mouse\n+cancer genomes - BMC Bioinformatics\n+http://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-016-1011-z\n+\n+Test data: https://usegalaxy.org/u/maude-ardin/p/mutspectestdata\n+\n+Source code: https://github.com/IARCbioinfo/mutspec\n+\n+\n+### Requirements\n+\n+\t# python-dev\n+build-essential and python-dev packages must be installed on your machine before installing MutSpec tools:\n+$ sudo apt-get install build-essential python-dev\n+\n+\n+\t# Annovar\n+If you do not have ANNOVAR installed, you can download it here: http://www.openbioinformatics.org/annovar/annovar_download_form.php\n+\n+1) Once downloaded, install annovar per the installation instructions and edit the PATH variable in galaxy deamon (/etc/init.d/galaxy)\n+to reflect the location of directory containing perl scripts.\n+\n+2) Create directories for saving Annovar databases\n+\t2-a Create a folder (annovardb) for saving all Annovar databases, e.g. hg19db\n+\t2-b Create a subfolder (seqFolder) for saving the reference genome, e.g. hg19db/hg19_seq\n+\n+3) Download the reference genome (by chromosome) from UCSC for all desired builds as follows:\n+$ annotate_variation.pl -buildver <build> -downdb seq <seqFolder>\n+\n+where <build> can be hg18, hg19 or hg38 for the human genome or mm9, mm10 for the mouse genome.\n+and <seqFolder> is the location where the sequences (by chromosme) should be stored, e.g. hg19db/hg19_seq\n+\n+\n+4) Download all desired databases for all desired builds as follows:\n+$ annotate_variation.pl -buildver <build> [-webfrom annovar] -downdb <database> <annovardb>\n+\n+/!\\ At least the database refGene must be downloaded /!\\\n+\n+where <build> can be hg18, hg19 or hg38 for the human genome or mm9, mm10 for the mouse genome.\n+and <database> is the database file to download, e.g. refGene\n+and <annovardb> is the location where all database files should be stored, e.g. hg19db\n+\n+The list of all available databases can be found here: http://annovar.openbioinformatics.org/en/latest/user-guide/download/\n+\n+\n+5) Edit the annovar_index.loc file (in the folder galaxy-dist/tool-data/toolshed/repos/iarc/mutspec/revision/) to reflect the location\n+of annovardb folder (containing all the databases files downloaded from Annovar).\n+Restart galaxy instance for changes in .loc file to take effect or reload it into the admin interface.\n+\n+6) Edit the file build_listAVDB.txt in the mutspec install directory to reflect the name and the type of the databases installed\n+\n+\n+### Installation\n+\n+\t# MutSpec-Stat and MutSpec-NMF\n+By default 8 CPUs are used by these tools, but you may edit mutspecStat_wrapper.sh and mutspecNmf.xml to change this number\n+to the maximum number of CPU available on your server.\n+\n+MutSpec-Stat and MutSpec-NMF tools allow parallel computations that are time consuming.\n+It is recommended to use the highest number of cores available on the Galaxy server to reduce the computation time of these tools.\n+\n+\n+\n+\n+\t# MutSpec-Annot\n+The maximum CPU value needs to be specified when installing MutSpec package by editing the file mutspecAnnot.pl to reflect the maximum number\n+of CPU available on your server.\n+\n+This tool may be time consuming for large files. For example, annotating a file of more than 25,000 variants takes 1 hour using 1 CPU (2.6 GHz),\n+while annotating this file using 8 CPUs takes only 5 minutes.\n+We have optimized MutSpec-Annot so that the tool uses more CPUs, if available, as follows:\n+-files with less than 5,000 lines: 1 CPU is used\n+-files with more than 5,000 and less than 25,000 lines: 2 CPUs are used\n+-files with more than 25,000 and less than 100,000 lines: 8 (or maximum CPUs, if less than 8 CPUs are available) are used (our benchmark\n+results didn't show any time saving using more than 8 cores for files with more than 25,000 but less than 100,000 lines)\n+-files with more than 100,000: maximum CPUs are used \n" |
b |
diff -r 46a10309dfe2 -r eda59b985b1c hg18_listAVDB.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hg18_listAVDB.txt Mon Mar 13 08:21:19 2017 -0400 |
b |
@@ -0,0 +1,32 @@ +#This is a sample file distributed with Galaxy that is used by the +#MutSpec-Annot tools. The hg18_listAVDB.txt has this format (white space +#characters are TAB characters): +# +#<RefGenome_DatabaseName> <operation> +# +# +# +#hg18_refGene.txt g +#hg18_genomicSuperDups.txt r +#hg18_snp138.txt f +hg18_refGene.txt g +hg18_knownGene.txt g +hg18_ensGene.txt g +hg18_cytoBand.txt r +hg18_gwasCatalog.txt r +hg18_genomicSuperDups.txt r +hg18_snp138.txt f +hg18_ALL.sites.2014_10.txt f +hg18_AFR.sites.2014_10.txt f +hg18_AMR.sites.2014_10.txt f +hg18_EAS.sites.2014_10.txt f +hg18_EUR.sites.2014_10.txt f +hg18_SAS.sites.2014_10.txt f +hg18_esp6500siv2_all.txt f +hg18_esp6500siv2_aa.txt f +hg18_esp6500siv2_ea.txt f +hg18_ljb26_sift.txt f +hg18_ljb26_pp2hdiv.txt f +hg18_ljb26_pp2hvar.txt f +hg18_cosmic70.txt f +hg18_exac03.txt f |
b |
diff -r 46a10309dfe2 -r eda59b985b1c hg19_listAVDB.txt --- a/hg19_listAVDB.txt Tue Jun 28 02:59:32 2016 -0400 +++ b/hg19_listAVDB.txt Mon Mar 13 08:21:19 2017 -0400 |
b |
@@ -29,9 +29,7 @@ hg19_ljb26_sift.txt f hg19_ljb26_pp2hdiv.txt f hg19_ljb26_pp2hvar.txt f -hg19_cosmic70.txt f +hg19_ljb26_mt.txt f +hg19_cosmic77.txt f hg19_exac03.txt f hg19_exac03nontcga.txt f -hg19_exac03nonpsych.txt f -hg19_kaviar20150923.txt f -hg19_hrcr1.txt f |
b |
diff -r 46a10309dfe2 -r eda59b985b1c hg38_listAVDB.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hg38_listAVDB.txt Mon Mar 13 08:21:19 2017 -0400 |
b |
@@ -0,0 +1,32 @@ +#This is a sample file distributed with Galaxy that is used by the +#MutSpec-Annot tools. The hg38_listAVDB.txt has this format (white space +#characters are TAB characters): +# +#<RefGenome_DatabaseName> <operation> +# +# +# +#hg38_refGene.txt g +#hg38_genomicSuperDups.txt r +#hg38_snp142.txt f +hg38_refGene.txt g +hg38_knownGene.txt g +hg38_cytoBand.txt r +hg38_gwasCatalog.txt r +hg38_genomicSuperDups.txt r +hg38_snp142.txt f +hg38_ALL.sites.2015_08.txt f +hg38_AFR.sites.2015_08.txt f +hg38_AMR.sites.2015_08.txt f +hg38_EAS.sites.2015_08.txt f +hg38_EUR.sites.2015_08.txt f +hg38_SAS.sites.2015_08.txt f +hg38_esp6500siv2_all.txt f +hg38_esp6500siv2_aa.txt f +hg38_esp6500siv2_ea.txt f +hg38_ljb26_sift.txt f +hg38_ljb26_pp2hdiv.txt f +hg38_ljb26_pp2hvar.txt f +hg38_cosmic70.txt f +hg38_exac03.txt f +hg38_exac03nontcga.txt f |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mm10_listAVDB.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mm10_listAVDB.txt Mon Mar 13 08:21:19 2017 -0400 |
b |
@@ -0,0 +1,17 @@ +#This is a sample file distributed with Galaxy that is used by the +#MutSpec-Annot tools. The mm10_listAVDB.txt has this format (white space +#characters are TAB characters): +# +#<RefGenome_DatabaseName> <operation> +# +# +# +#mm10_refGene.txt g +#mm10_genomicSuperDups.txt r +#mm10_snp137.txt f +mm10_refGene.txt g +mm10_knownGene.txt g +mm10_ensGene.txt g +mm10_cytoBand.txt r +mm10_genomicSuperDups.txt r +mm10_snp142.txt f |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mm9_listAVDB.txt --- a/mm9_listAVDB.txt Tue Jun 28 02:59:32 2016 -0400 +++ b/mm9_listAVDB.txt Mon Mar 13 08:21:19 2017 -0400 |
b |
@@ -1,17 +1,17 @@ -#This is a sample file distributed with Galaxy that is used by the -#MutSpec-Annot tools. The mm9_listAVDB.txt has this format (white space -#characters are TAB characters): -# -#<RefGenome_DatabaseName> <operation> -# -# -# -#mm9_refGene.txt g -#mm9_genomicSuperDups.txt r -#mm9_snp128.txt f -mm9_refGene.txt g -mm9_knownGene.txt g -mm9_ensGene.txt g -mm9_cytoBand.txt r -mm9_genomicSuperDups.txt r -mm9_snp128.txt f +#This is a sample file distributed with Galaxy that is used by the +#MutSpec-Annot tools. The mm9_listAVDB.txt has this format (white space +#characters are TAB characters): +# +#<RefGenome_DatabaseName> <operation> +# +# +# +#mm9_refGene.txt g +#mm9_genomicSuperDups.txt r +#mm9_snp128.txt f +mm9_refGene.txt g +mm9_knownGene.txt g +mm9_ensGene.txt g +mm9_cytoBand.txt r +mm9_genomicSuperDups.txt r +mm9_snp128.txt f |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mutspecAnnot.pl --- a/mutspecAnnot.pl Tue Jun 28 02:59:32 2016 -0400 +++ b/mutspecAnnot.pl Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -1,1235 +1,1327 @@\n-#!/usr/bin/env perl\r\n-\r\n-#-----------------------------------#\r\n-# Author: Maude #\r\n-# Script: mutspecAnnot.pl #\r\n-# Last update: 21/06/16 #\r\n-#-----------------------------------#\r\n-\r\n-use strict;\r\n-use warnings;\r\n-use Getopt::Long;\r\n-use Pod::Usage;\r\n-use File::Basename; # my ($filename, $directories, $suffix) = fileparse($file, qr/\\.[^.]*/);\r\n-use File::Path;\r\n-use Parallel::ForkManager;\r\n-\r\n-\r\n-our ($verbose, $man, $help) = (0, 0, 0); # Parse options and print usage if there is a syntax error, or if usage was explicitly requested.\r\n-our ($refGenome, $output, $path_AVDB, $pathAVDBList, $folder_temp) = ("empty", "empty", "empty", "empty", "empty"); # The reference genome to use; The path for saving the result; The path to Annovar database; Text file with the list of the databases for Annovar; the path for saving the temporary files\r\n-our ($intervalEnd) = (10); # Number of bases for the flanking region for the sequence context.\r\n-our ($fullAVDB) = "yes"; # Add an option for using all Annovar databases for the annotation or only refGene + strand + context for having a quicker annotation (for large file with million of lines)\r\n-\r\n-GetOptions(\'verbose|v\'=>\\$verbose, \'help|h\'=>\\$help, \'man|m\'=>\\$man, \'refGenome=s\'=>\\$refGenome, \'interval=i\' => \\$intervalEnd, \'fullAnnotation=s\' => \\$fullAVDB, \'outfile|o=s\' => \\$output, \'pathAnnovarDB|AVDB=s\' => \\$path_AVDB, \'pathAVDBList=s\' => \\$pathAVDBList, \'pathTemporary|temp=s\' => \\$folder_temp) or pod2usage(2);\r\n-\r\n-our ($input) = @ARGV;\r\n-\r\n-pod2usage(-verbose=>1, -exitval=>1, -output=>\\*STDERR) if ($help);\r\n-pod2usage(-verbose=>2, -exitval=>1, -output=>\\*STDERR) if ($man);\r\n-pod2usage(-verbose=>0, -exitval=>1, -output=>\\*STDERR) if(@ARGV == 0); # No argument is pass to the command line print the usage of the script\r\n-pod2usage(-verbose=>0, -exitval=>1, -output=>\\*STDERR) if(@ARGV == 2); # Only one argument is expected to be pass to @ARGV (the input)\r\n-\r\n-\r\n-\r\n-######################################################################################################################################################\r\n-#\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tGLOBAL VARIABLES\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t #\r\n-######################################################################################################################################################\r\n-\r\n-#########################################\r\n-### SPECIFY THE NUMBER OF CPU ###\r\n-#########################################\r\n-our $max_cpu = 1; # Max number of CPU to use for the annotation\r\n-\r\n-\r\n-# Recover the current path\r\n-our $pwd = `pwd`;\r\n-chomp($pwd);\r\n-\r\n-# Input file path\r\n-our @pathInput = split("/", $input);\r\n-# Output directories\r\n-our ($folderMutAnalysis, $folderAnnovar) = ("", "");\r\n-# File with the list of Annovar databases to use\r\n-our $listAVDB = "";\r\n-# Initialisation of chromosome, position, ref and alt values\r\n-our ($chrValue, $positionValue, $refValue, $altValue) = ("c", "s", "r", "a");\r\n-\r\n-\r\n-######################################################################################################################################################\r\n-#\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tMAIN \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t #\r\n-######################################################################################################################################################\r\n-## Check the presence of the flags and create the output and temp directories\r\n-CheckFlags();\r\n-\r\n-## Format the file in the correct format if they are vcf or MuTect output and recover the column positions\r\n-FormatingInputFile();\r\n-\r\n-# Annotate the file with Annovar, add the strand orientation and the sequence context\r\n-FullAnnotation();\r\n-\r\n-######################################################################################################################################################\r\n-#\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tFUNCTIONS\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t'..b'= split("\\t",$search_header);\n+ close F1;\n+ # The number of the column\n+ my $name_of_column_NB = "toto";\n+ for(my $i=0; $i<=$#tab_search_header; $i++)\n+ {\n+ if($tab_search_header[$i] eq $name_of_column) { $name_of_column_NB = $i; last; }\n+ }\n+ if($name_of_column_NB eq "toto")\n+ {\n+ \tprint STDERR "Error message:\\n";\n+ \tprint STDERR "Error recoverNumCol(): the column named $name_of_column doesn\'t exits in the input file $input!!!!!\\n";\n+ }\n+ else { return $name_of_column_NB; }\n+}\n+\n+\n+\n+\n+=head1 NAME\n+\n+mutspec-Annot\n+\n+=head1 SYNOPSIS\n+\n+\tmutspecannot.pl [arguments] <query-file>\n+\n+ <query-file> can be a folder with multiple VCF or a single VCF\n+\n+ Arguments:\n+ -h, --help print help message\n+ -m, --man print complete documentation\n+ -v, --verbose use verbose output\n+ --refGenome the reference genome to use\n+ --interval <interger> the number of bases for the sequence context\n+ -o, --outfile <string> output directory for the result. If none is specify the result will be write in the same directory as the input file\n+ -AVDB --pathAnnovarDB <string> the path to Annovar database and the files with the chromosome size\n+ --pathAVDBList the path to a text file containing the list of Annovar databases installed\n+ -temp --pathTemporary <string> the path for saving the temporary files\n+ --fullAnnotation <string> recover all Annovar annotations (yes) or only the minimum for MutSpec-Stat (no)\n+ --max_cpu <integer> number of CPUs to be used for the annotation\n+\n+\n+Function: automatically run a pipeline on a list of variants and annote them using Annovar\n+\n+ Example: # Annotation only\n+ mutspecannot.pl --refGenome hg19 --interval 10 --outfile output_directory --pathAnnovarDB path_to_annovar_database --pathAVDBList path_to_the_list_of_annovar_DB --temp path_to_temporary_directory --fullAnnotation yes|no input\n+\n+\n+ Version: 03-2017 (March 2017)\n+\n+\n+=head1 OPTIONS\n+\n+=over 8\n+\n+=item B<--help>\n+\n+print a brief usage message and detailed explanation of options.\n+\n+=item B<--man>\n+\n+print the complete manual of the program.\n+\n+=item B<--verbose>\n+\n+use verbose output.\n+\n+=item B<--refGenome>\n+\n+the reference genome to use, could be hg19 or mm9.\n+\n+=item B<--interval>\n+\n+the number of bases surrounding the mutated bases, for the sequence context analysis.\n+\n+=item B<--outfile>\n+\n+the directory of output file names. If it is nor specify the same directory as the input file is used.\n+\n+=item B<--pathAnnovarDB>\n+\n+the path to the directory containing the Annovar databases and the files with the chromosome size.\n+\n+=item B<--pathAVDBList>\n+\n+the path to a text file containing the list of Annovar databases installed.\n+\n+=item B<--pathTemporary>\n+\n+the path for saving temporary files generated by the script.\n+If any is specify a temporary folder is created in the same directory where the script is running.\n+Deleted when the script is finish\n+\n+=item B<--fullAnnotation>\n+\n+Use all Annovar databases for the annotation (set to yes) or only refGene + strand + context (set to no) for having a quicker annotation (for large file with million of lines)\n+\n+=item B<--max_cpu>\n+\n+Specify the number of CPUs to be used. This number is used for spliting the file in n part and running the annotations in each part in parallel.\n+\n+\n+=head1 DESCRIPTION\n+\n+MutSpec-Annot is a perl script for added annotations on a list of genetic variants generated with NGS.\n+Functional annotations are added using ANNOVAR software. Strand transcript orientation is added using RefSeq database and the sequence context for x bases flanking the variant positions is also added.\n+A text tab delimited file is produced.\n+\n+=cut\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mutspecAnnot.xml --- a/mutspecAnnot.xml Tue Jun 28 02:59:32 2016 -0400 +++ b/mutspecAnnot.xml Mon Mar 13 08:21:19 2017 -0400 |
b |
b'@@ -1,192 +1,221 @@\n-<tool id="mutSpecannot" name="MutSpec Annot" version="0.1" hidden="false">\r\n-<description>Annotate variants with ANNOVAR and other databases</description>\r\n-\r\n-<requirements>\r\n- <requirement type="set_environment">SCRIPT_PATH</requirement>\r\n- <requirement type="package" version="5.18.1">perl</requirement>\r\n-</requirements>\r\n-\r\n-<command interpreter="bash">\r\n- mutspecAnnot_wrapper.sh\r\n- $output\r\n- --refGenome ${refGenome}\r\n- --AVDB ${refGenome.fields.path}\r\n- --interval $interval\r\n- --fullAnnotation ${annotation_type}\r\n- $input\r\n-</command>\r\n-\r\n-<inputs>\r\n-\t<param name="input" type="data" format="txt" label="Input file" help="Select a single file, multiple files or a dataset collection"/>\r\n-\t\r\n-\t<param name="refGenome" type="select" label="Reference genome" help="Select the reference genome that was used for generating your data">\r\n- <options from_data_table="annovar_index" />\r\n- </param>\r\n- \r\n-\t<param name="interval" type="text" value="10" label="Sequence context of variants" help="Number of retrieved bases that flank variants in 5\' and 3\'"/>\r\n-\r\n- <param name="annotation_type" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Complete annotations" help="Select No if you have a file with millions of variants and you are just interested in having a quick overview of the mutational spectrum. Only the annotation from refGene, the strand orientation and the sequence context will be added." />\r\n-\r\n-</inputs>\r\n-\r\n-<outputs>\r\n-\t<data name="output" type="data" format="tabular" label="${input.name} annotated" />\r\n-</outputs> \r\n-\r\n-<stdio>\r\n- <regex match="ANNOVAR LOG FILE"\r\n- source="stdout"\r\n- level="fatal"\r\n- description="Read Annovar log file for more information" />\r\n-</stdio>\r\n-\r\n-<help>\r\n-\r\n-**What it does**\r\n-\r\n-MutSpect-Annot provides functional annotations from `ANNOVAR software`__ (Feb 2016 version is provided here), as well as the strand transcript orientation (from refGene database) and sequence context of variants (extrated from the reference genome selected).\r\n-\r\n-.. __: http://www.openbioinformatics.org/annovar/\r\n-\r\n---------------------------------------------------------------------------------------------------------------------------------------------------\r\n-\r\n-**Input formats**\r\n-\r\n-MutSpect-Annot accepts files in VCF (version 4.1 and 4.2) or in tab-delimited (TAB) format.\r\n-\r\n-.. class:: infomark\r\n-\r\n-TIP: If your data is not TAB delimited, use *Text manipulation -> convert*\r\n-\r\n-.. class:: warningmark\r\n-\r\n-Filenames must be <= 31 characters.\r\n-\r\n-.. class:: warningmark\r\n-\r\n-These files should contain at least four columns describing for each variant, the chromosome number, the start genomic position, the reference allele and the alternate allele\r\n-\r\n-.. class:: warningmark \r\n-\r\n-The tool supports different column names (**names are case-sensitive**) depending on the source file as follows:\r\n-\r\n-**mutect** : contig position ref_allele alt_allele\r\n-\r\n-**cosmic** : Mutation_GRCh37_chromosome_number Mutation_GRCh37_genome_position Description_Ref_Genomic Description_Alt_Genomic\r\n-\r\n-**icgc** : chromosome chromosome_start reference_genome_allele mutated_to_allele\r\n-\r\n-**tcga** : Chromosome Start_position Reference_Allele Tumor_Seq_Allele2\r\n-\r\n-**ionTorrent** : chr Position Ref Alt \r\n-\r\n-**proton** : Chrom Position Ref Variant \r\n-\r\n-**varScan2** : Chrom Position Ref VarAllele\r\n-\r\n-**annovar** : Chr Start Ref Obs \r\n-\r\n-**custom** : Chromosome Start Wild_Type Mutant\r\n-\r\n-.. class:: infomark\r\n-\r\n-For MuTect and MuTect2 output files, only confident calls are considered (variants containing the string REJECT in the judgement column or not passing MuTect2 filters, are not annotated and excluded from the MutSpect-Annot output) as other calls are very likely to be dubious calls or artefacts.\r\n-\r\n-.. class:: infomar'..b'0999 A G\n+ chr12 8082458 8082458 C T exonic SLC2A3 nonsynonymous SNV SLC2A3:NM_006931:exon6:c.G683A:p.R228Q NA rs200481428 0.000199681 NA - CCG chr12 8082458 8082458 C T\n+ chr4 70156391 70156391 T C exonic UGT2B28 nonsynonymous SNV UGT2B28:NM_053039:exon5:c.T1172C:p.V391A score=0.949699;Name=chr4:70035680 NA 0.000199681 NA + GTA chr4 70156391 70156391 T C\n+\n+\n+--------------------------------------------------------------------------------------------------------------------------------------------------\n+\n+**Contact**\n+\n+ardinm@fellows.iarc.fr; cahaisv@iarc.fr\n+\n+--------------------------------------------------------------------------------------------------------------------------------------------------\n+\n+**Code**\n+\n+The source code is available on `GitHub`__\n+\n+.. __: https://github.com/IARCbioinfo/mutspec.git\n+\n+\n+</help>\n+\n+\n+<citations>\n+ <citation type="bibtex">\n+ @article{ardin_mutspec:_2016,\n+ title = {{MutSpec}: a Galaxy toolbox for streamlined analyses of somatic mutation spectra in human and mouse cancer genomes},\n+ volume = {17},\n+ issn = {1471-2105},\n+ doi = {10.1186/s12859-016-1011-z},\n+ shorttitle = {{MutSpec}},\n+ abstract = {{BACKGROUND}: The nature of somatic mutations observed in human tumors at single gene or genome-wide levels can reveal information on past carcinogenic exposures and mutational processes contributing to tumor development. While large amounts of sequencing data are being generated, the associated analysis and interpretation of mutation patterns that may reveal clues about the natural history of cancer present complex and challenging tasks that require advanced bioinformatics skills. To make such analyses accessible to a wider community of researchers with no programming expertise, we have developed within the web-based user-friendly platform Galaxy a first-of-its-kind package called {MutSpec}.\n+ {RESULTS}: {MutSpec} includes a set of tools that perform variant annotation and use advanced statistics for the identification of mutation signatures present in cancer genomes and for comparing the obtained signatures with those published in the {COSMIC} database and other sources. {MutSpec} offers an accessible framework for building reproducible analysis pipelines, integrating existing methods and scripts developed in-house with publicly available R packages. {MutSpec} may be used to analyse data from whole-exome, whole-genome or targeted sequencing experiments performed on human or mouse genomes. Results are provided in various formats including rich graphical outputs. An example is presented to illustrate the package functionalities, the straightforward workflow analysis and the richness of the statistics and publication-grade graphics produced by the tool.\n+ {CONCLUSIONS}: {MutSpec} offers an easy-to-use graphical interface embedded in the popular Galaxy platform that can be used by researchers with limited programming or bioinformatics expertise to analyse mutation signatures present in cancer genomes. {MutSpec} can thus effectively assist in the discovery of complex mutational processes resulting from exogenous and endogenous carcinogenic insults.},\n+ pages = {170},\n+ number = {1},\n+ journaltitle = {{BMC} Bioinformatics},\n+ author = {Ardin, Maude and Cahais, Vincent and Castells, Xavier and Bouaoun, Liacine and Byrnes, Graham and Herceg, Zdenko and Zavadil, Jiri and Olivier, Magali},\n+ date = {2016},\n+ pmid = {27091472},\n+ keywords = {Galaxy, Mutation signatures, Mutation spectra, Single base substitutions}\n+ }\n+ </citation>\n+</citations>\n+\n+</tool>\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mutspecAnnot_wrapper.sh --- a/mutspecAnnot_wrapper.sh Tue Jun 28 02:59:32 2016 -0400 +++ b/mutspecAnnot_wrapper.sh Mon Mar 13 08:21:19 2017 -0400 |
[ |
@@ -1,26 +1,29 @@ -#!/bin/bash - -output=$1;shift -refg=$2 -input=${9} - -command -v table_annovar.pl >/dev/null 2>&1 || { - echo "ERROR : table_annovar.pl not found. Add annovar scripts to your galaxy path !" ; - return 1 ; -} - -mkdir out -name=${input##*/} -name=${name%%.*} - -perl $SCRIPT_PATH/mutspecAnnot.pl \ - --outfile out \ - --pathAVDBList $SCRIPT_PATH \ - --temp "./temp" \ - $* 2>&1 - -ls out/Mutational_Analysis/Annovar/ -cp out/Mutational_Analysis/Annovar/${name}.${refg}_multianno.txt $output - -exit 0 - +#!/bin/bash + +output=$1;shift +refg=$2 +input=${9} + +command -v table_annovar.pl >/dev/null 2>&1 || { + echo "ERROR : table_annovar.pl not found. Add annovar scripts to your galaxy path !" ; + return 1 ; +} + +mkdir out +name=${input##*/} +name=${name%%.*} + +perl $SCRIPT_PATH/mutspecAnnot.pl \ + --outfile out \ + --pathAVDBList $SCRIPT_PATH \ + --temp "./temp" \ + $* + +ls out/Mutational_Analysis/Annovar/ + +if [ -e "out/Mutational_Analysis/Annovar/${name}.${refg}_multianno.txt" ]; then + cp out/Mutational_Analysis/Annovar/${name}.${refg}_multianno.txt $output +fi + +exit 0 + |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mutspecCompare.xml --- a/mutspecCompare.xml Tue Jun 28 02:59:32 2016 -0400 +++ b/mutspecCompare.xml Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -1,132 +1,151 @@\n-<?xml version="1.0"?>\r\n-<tool id="mutSpeccompare" name="MutSpec Compare" version="0.0.1">\r\n-<description>Compare signatures with the cosine similarity method</description>\r\n-\r\n-<requirements>\r\n- <requirement type="set_environment">SCRIPT_PATH</requirement>\r\n- <requirement type="package" version="3.1.2">R</requirement>\r\n- <requirement type="package" version="0.1">mutspec</requirement>\r\n-</requirements>\r\n-\r\n-<command interpreter="bash">\r\n-\tmutspecCompare_wrapper.sh\r\n-\t$newsign\r\n-\t$output\r\n-\t#if $refSignatureSource.source == "fromtable":\r\n-\t\t\\$SCRIPT_PATH/Frequency-COSMICv72-Hupki.txt\r\n-\t#else\r\n-\t\t${refSignatureSource.h_publish}\r\n-\t#end if\r\n-</command>\r\n-\r\n-<inputs>\r\n-\t<conditional name="refSignatureSource">\r\n-\t\t<param name="source" type="select" label="Reference signatures" help="You may select the provided file that includes published signatures (see details further below) or your own reference file">\r\n-\t\t\t<option value="fromtable">Use COSMICv72_Hupki2014</option>\r\n-\t\t\t<option value="history">Use one from my history</option>\r\n-\t\t</param>\r\n-\t\t<when value="fromtable">\r\n-\t\t\t<options from_data_table="published_signature_matrice" />\r\n-\t\t</when>\r\n-\t\t<when value="history">\r\n-\t\t\t<param name="h_publish" type="data" format="tabular" label="Select a file from my history" help="Matrix correctly formated (see details further below)"/>\r\n-\t\t</when>\r\n- </conditional>\r\n- \t\r\n-\t<param name="newsign" type="data" format="html" label="Newly identified signature" help="Select an output of the tool MutSpec-NMF"/>\r\n-\t\r\n-</inputs>\r\n-\r\n-<outputs>\r\n-\t<data name="output" format="html" label="Similarity_Matrix on dataset ${newsign.name}" />\r\n-</outputs> \r\n-\r\n-\r\n-<help>\r\n-\r\n-**What it does**\r\n-\r\n-Compare two matrices containing published and newly identified mutation signatures using the `cosine similarity`__ method as already used by `Alexandrov et al. 2013`__, `Olivier et al. 2014`__ or `Schulze et al. 2015`__\r\n-\r\n-.. __: http://en.wikipedia.org/wiki/Cosine_similarity\r\n-\r\n-.. __: http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3588146/\r\n-\r\n-.. __: http://www.nature.com/srep/2014/140327/srep04482/full/srep04482.html\r\n-\r\n-.. __: http://www.nature.com/ng/journal/v47/n5/fig_tab/ng.3252_SF3.html\r\n-\r\n---------------------------------------------------------------------------------------------------------------------------------------------------\r\n-\r\n-**Output**\r\n-\r\n-A HTML page displaying a heatmap representing the similarity between the new signatures and the published ones.\r\n-\r\n-Values close to 1 (red) indicate a high similarity between the signatures.\r\n-\r\n---------------------------------------------------------------------------------------------------------------------------------------------------\r\n-\r\n-**Published signatures**\r\n-\r\n-The reference signatures matrix (COSMICv72-Hupki2014 matrix) includes\r\n-\r\n-1. The 30 signatures published in `COSMIC database, v72`__\r\n-\r\n-2. The 4 experimental signatures obtained in mouse cells for AA, MNNG, BaP and AID that were published in `Olivier et al. 2014`__\r\n-\r\n-\r\n-.. __: http://cancer.sanger.ac.uk/cosmic/signatures\r\n-\r\n-.. __: http://www.nature.com/srep/2014/140327/srep04482/full/srep04482.html\r\n-\r\n-\r\n-\r\n---------------------------------------------------------------------------------------------------------------------------------------------------\r\n-\r\n-**Example**\r\n-\r\n-Matrix of known signatures\r\n-\r\n-+-------------------+---------------+-----------------------+--------------+--------------+\r\n-| Substitution Type | Trinucleotide | Somatic Mutation Type | Signature 1 | Signature 2 |\r\n-+===================+===============+=======================+==============+==============+\r\n-| C>A | ACA | A[C>A]A | 0.0110983262 | 0.0006827082 +\r\n-+-------------------+---------------+-----------------------+--------------+--------------+\r\n-| C>A | ACC | A[C>A]C | 0.0091493407 | 0.0006191072 +\r\n-+-------------------+--------'..b'----------+--------------+--------------+\n+| C>A | ACT | A[C>A]T | 0.0062338852 | 0.0003238914 +\n++-------------------+---------------+-----------------------+--------------+--------------+\n+| C>A | CCA | C[C>A]A | 0.0065958701 | 0.000677445 +\n++-------------------+---------------+-----------------------+--------------+--------------+\n+| C>A | CCC | C[C>A]C | 0.0073423678 | 0.000213681 +\n++-------------------+---------------+-----------------------+--------------+--------------+\n+\n+\n+--------------------------------------------------------------------------------------------------------------------------------------------------\n+\n+**Contact**\n+\n+ardinm@fellows.iarc.fr; cahaisv@iarc.fr\n+\n+--------------------------------------------------------------------------------------------------------------------------------------------------\n+\n+**Code**\n+\n+The source code is available on `GitHub`__\n+\n+.. __: https://github.com/IARCbioinfo/mutspec.git\n+\n+</help>\n+\n+\n+<citations>\n+ <citation type="bibtex">\n+ @article{ardin_mutspec:_2016,\n+ title = {{MutSpec}: a Galaxy toolbox for streamlined analyses of somatic mutation spectra in human and mouse cancer genomes},\n+ volume = {17},\n+ issn = {1471-2105},\n+ doi = {10.1186/s12859-016-1011-z},\n+ shorttitle = {{MutSpec}},\n+ abstract = {{BACKGROUND}: The nature of somatic mutations observed in human tumors at single gene or genome-wide levels can reveal information on past carcinogenic exposures and mutational processes contributing to tumor development. While large amounts of sequencing data are being generated, the associated analysis and interpretation of mutation patterns that may reveal clues about the natural history of cancer present complex and challenging tasks that require advanced bioinformatics skills. To make such analyses accessible to a wider community of researchers with no programming expertise, we have developed within the web-based user-friendly platform Galaxy a first-of-its-kind package called {MutSpec}.\n+ {RESULTS}: {MutSpec} includes a set of tools that perform variant annotation and use advanced statistics for the identification of mutation signatures present in cancer genomes and for comparing the obtained signatures with those published in the {COSMIC} database and other sources. {MutSpec} offers an accessible framework for building reproducible analysis pipelines, integrating existing methods and scripts developed in-house with publicly available R packages. {MutSpec} may be used to analyse data from whole-exome, whole-genome or targeted sequencing experiments performed on human or mouse genomes. Results are provided in various formats including rich graphical outputs. An example is presented to illustrate the package functionalities, the straightforward workflow analysis and the richness of the statistics and publication-grade graphics produced by the tool.\n+ {CONCLUSIONS}: {MutSpec} offers an easy-to-use graphical interface embedded in the popular Galaxy platform that can be used by researchers with limited programming or bioinformatics expertise to analyse mutation signatures present in cancer genomes. {MutSpec} can thus effectively assist in the discovery of complex mutational processes resulting from exogenous and endogenous carcinogenic insults.},\n+ pages = {170},\n+ number = {1},\n+ journaltitle = {{BMC} Bioinformatics},\n+ author = {Ardin, Maude and Cahais, Vincent and Castells, Xavier and Bouaoun, Liacine and Byrnes, Graham and Herceg, Zdenko and Zavadil, Jiri and Olivier, Magali},\n+ date = {2016},\n+ pmid = {27091472},\n+ keywords = {Galaxy, Mutation signatures, Mutation spectra, Single base substitutions}\n+ }\n+ </citation>\n+</citations>\n+\n </tool>\n\\ No newline at end of file\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mutspecCompare_wrapper.sh --- a/mutspecCompare_wrapper.sh Tue Jun 28 02:59:32 2016 -0400 +++ b/mutspecCompare_wrapper.sh Mon Mar 13 08:21:19 2017 -0400 |
b |
@@ -1,29 +1,29 @@ -#!/bin/bash - -newsign=$1 -html=$2 -ref=$3 - -output_dir=${html%%.*}_files - -matrix=${newsign%.*}_files/NMF/Files/MatrixW-Normto100.txt - -mkdir $output_dir - -Rscript --no-save $SCRIPT_PATH/R/compareSignature_Galaxy.r $ref $matrix $output_dir 2>&1 - -# Convert the image into png format -cd $output_dir - -echo "<html><body>" >> $html -echo "<center> <h2> Cosine similarity comparison </h2> </center>" >> $html - -echo "<table>" >> $html -echo "<tr> <td> <center> <br/> <a href="Similarity_Matrix.txt">Similarity_Matrix.txt</a> </center> </td> </tr>" >> $html -echo "<tr>" >> $html -echo "<td><a href="Similarity_Matrix.png">" >> $html -echo "<img width="1000" src="Similarity_Matrix.png" /></a></td>" >> $html -echo "</tr>" >> $html -echo "</table>" >> $html - -exit 0 +#!/bin/bash + +newsign=$1 +html=$2 +ref=$3 + +output_dir=${html%%.*}_files + +matrix=${newsign%.*}_files/NMF/Files/MatrixW-Normto100.txt + +mkdir $output_dir + +Rscript --no-save $SCRIPT_PATH/R/compareSignature_Galaxy.r $ref $matrix $output_dir 2>&1 + +# Convert the image into png format +cd $output_dir + +echo "<html><body>" >> $html +echo "<center> <h2> Cosine similarity comparison </h2> </center>" >> $html + +echo "<table>" >> $html +echo "<tr> <td> <center> <br/> <a href="Similarity_Matrix.txt">Similarity_Matrix.txt</a> </center> </td> </tr>" >> $html +echo "<tr>" >> $html +echo "<td><a href="Similarity_Matrix.png">" >> $html +echo "<img width="1000" src="Similarity_Matrix.png" /></a></td>" >> $html +echo "</tr>" >> $html +echo "</table>" >> $html + +exit 0 |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mutspecFilter.pl --- a/mutspecFilter.pl Tue Jun 28 02:59:32 2016 -0400 +++ b/mutspecFilter.pl Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -1,378 +1,477 @@\n-# !/usr/bin/perl\r\n-\r\n-#-----------------------------------#\r\n-# Author: Maude #\r\n-# Script: mutspecFilter.pl #\r\n-# Last update: 18/03/16 #\r\n-#-----------------------------------#\r\n-\r\n-use strict;\r\n-use warnings;\r\n-use Getopt::Long;\r\n-use Pod::Usage;\r\n-use File::Basename; # my ($filename, $directories, $suffix) = fileparse($file, qr/\\.[^.]*/);\r\n-use File::Path;\r\n-\r\n-################################################################################################################################################################################\r\n-#\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tFilter an Annotaed file with Annovar\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t #\r\n-################################################################################################################################################################################\r\n-\r\n-our ($verbose, $man, $help) = (0, 0, 0); # Parse options and print usage if there is a syntax error, or if usage was explicitly requested.\r\n-our ($dbSNP_value, $segDup, $esp, $thG) = (0, 0, 0, 0); # For filtering agains the databases dbSNP, genomic duplicate segments, Exome Sequencing Project and 1000 genome.\r\n-our ($output, $refGenome) = ("", ""); # The path for saving the result; The reference genome to use.\r\n-our ($listAVDB) = "empty"; # Text file with the list Annovar databases.\r\n-our ($dir) = "";\r\n-\r\n-GetOptions(\'dir|d=s\'=>\\$dir,\'verbose|v\'=>\\$verbose, \'help|h\'=>\\$help, \'man|m\'=>\\$man, \'dbSNP=i\'=>\\$dbSNP_value, \'segDup\'=>\\$segDup, \'esp\'=>\\$esp, \'thG\'=>\\$thG, \'outfile|o=s\' => \\$output, \'refGenome=s\'=>\\$refGenome, \'pathAVDBList=s\' => \\$listAVDB) or pod2usage(2);\r\n-\r\n-our ($input) = @ARGV;\r\n-\r\n-pod2usage(-verbose=>1, -exitval=>1, -output=>\\*STDERR) if ($help);\r\n-pod2usage(-verbose=>2, -exitval=>1, -output=>\\*STDERR) if ($man);\r\n-pod2usage(-verbose=>0, -exitval=>1, -output=>\\*STDERR) if(@ARGV == 0); # No argument is pass to the command line print the usage of the script\r\n-pod2usage(-verbose=>0, -exitval=>1, -output=>\\*STDERR) if(@ARGV == 2); # Only one argument is expected to be pass to @ARGV (the input)\r\n-\r\n-\r\n-\r\n-# If the dbSNP value is not equal to zero filter using the dbSNP column specify\r\n-our $dbSNP = 0;\r\n-if($dbSNP_value > 0) { $dbSNP = 1; }\r\n-\r\n-\r\n-############ Check flags ############\r\n-if($listAVDB eq "empty") { $listAVDB = "$dir/${refGenome}_listAVDB.txt" }\r\n-\r\n-# Zero databases is specified\r\n-if( ($dbSNP == 0) && ($segDup == 0) && ($esp == 0) && ($thG == 0) )\r\n-{\r\n-\tprint STDERR "There is no databases selected for filtering against!!!\\nPlease chose at least one between dbSNP, SegDup, ESP (only for human genome) or 1000 genome (only for human genome)\\n";\r\n-\texit;\r\n-}\r\n-\r\n-\r\n-\r\n-############ Recover the name of the databases to filter against ############\r\n-my ($segDup_name, $espAll_name, $thousandGenome_name) = ("", "", "");\r\n-my @tab_protocol = ();\r\n-\r\n-if( ($segDup == 1) || ($esp == 1) || ($thG == 1) )\r\n-{\r\n-\t### Recover the name of the column\r\n-\tmy $protocol = "";\r\n-\tExtractAVDBName($listAVDB, \\$protocol);\r\n-\t@tab_protocol = split(",", $protocol);\r\n-\r\n-\tfor(my $i=0; $i<=$#tab_protocol; $i++)\r\n-\t{\r\n-\t\tif($tab_protocol[$i] =~ /genomicSuperDups/) { $segDup_name = $tab_protocol[$i]; }\r\n-\t\telsif($tab_protocol[$i] =~ /1000g/) { $thousandGenome_name = $tab_protocol[$i]; }\r\n-\t\telsif($tab_protocol[$i] =~ /esp/) { $espAll_name = $tab_protocol[$i]; }\r\n-\t}\r\n-}\r\n-\r\n-\r\n-############ Filter the file ############\r\n-filterAgainstPublicDB();\r\n-\r\n-\r\n-print STDOUT "\\tFilter selected\\tdbSNP = ".$dbSNP."\\tsegDup = ".$segDup."\\tesp = ".$esp."\\tthG = ".$thG."\\n";\r\n-\r\n-\r\n-sub filterAgainstPublicDB\r\n-{\r\n-\topen(FILTER, ">", "$output") or die "$!: $output\\n";\r\n-\r\n-\topen(F1, $input) or die "$!: $input\\n";\r\n-\tmy $header = <F1>; print FILTER $header;\r\n-\twhile(<F1>)\r\n-\t{\r\n-\t\t$_ =~ s/[\\r\\n]+$//;\r\n-\t\tmy @tab = split("\\t", $_);\r\n-\r\n-\t\tmy ($segDupI'..b'ror no table file";\n+\topen(OUT, ">$output") or die "error cannot open output file";\n+\tprint OUT $headers;\n+\t$NL=1;\n+\tmy $line = <F2>;\n+\twhile(<F1>)\n+\t{\n+ my @NR=split("\\t", $_);\n+ while( $NL < $NR[3]){ $line = <F2>; $NL++; }\n+ print OUT $line;\n+\t}\n+\tclose F1;\n+\tclose F2;\n+\tclose OUT;\n+\n+}\n+\n+\n+\n+=head1 NAME\n+\n+mutspecFilter - Filter a file annotated with MutSpec-Annot tool. Variants present in public databases (dbSNP, SegDup, ESP, 1000 genome obtained from Annovar) will be removed from the input file (with frequency limits described above)\n+\n+=head1 SYNOPSIS\n+\n+\tmutspecFilter.pl [arguments] <query-file>\n+\n+ <query-file> an annotated file\n+\n+ Arguments:\n+ -h, --help print help message\n+ -m, --man print complete documentation\n+ -v, --verbose use verbose output\n+\t\t\t\t\t\t\t\t\t --dbSNP <value> filter against dbSNP database. Specify the number of the dbSNP column in the file (start to count from 1)\n+\t\t\t\t\t\t\t\t\t --segDup filter against genomic duplicate database\n+\t\t\t\t\t\t\t\t\t --esp filter against Exome Sequencing Project database (only for human)\n+\t\t\t\t\t\t\t\t\t --thG filter against 1000 genome database (onyl for human)\n+\t\t\t -o, --outfile <string> path to output file\n+\t\t\t --refGenome reference genome to use\n+\t\t\t --pathAVDBList path to the list of Annovar databases installed\n+\t\t\t --filter path to a bed file\n+\n+\n+Function: Filter out variants present in public databases\n+\n+ Example: # Filter against dbSNP\n+ \t\t\t\t\tmutspecFilter.pl --dbSNP col_number (start to count from 1) --refGenome hg19 --pathAVDBList path_to_the_list_of_annovar_DB --outfile output_filename input\n+\n+ \t\t\t\t\t# Filter against all Annovar databases\n+ \t\t\t\t\tmutspecFilter.pl --dbSNP col_number (start to count from 1) --segDup --esp --thG --exac --refGenome hg19 --pathAVDBList path_to_the_list_of_annovar_DB --outfile output_filename input\n+\n+ \t\t\t\t\t# Filter against additional databases in BED format\n+ \t\t\t\t\tmutspecFilter.pl --filter path_to_bed --refGenome hg19 --pathAVDBList path_to_the_list_of_annovar_DB --outfile output_filename input\n+\n+\n+ Version: 02-2017 (February 2017)\n+\n+\n+=head1 OPTIONS\n+\n+=over 8\n+\n+=item B<--help>\n+\n+print a brief usage message and detailed explanation of options.\n+\n+=item B<--man>\n+\n+print the complete manual of the program.\n+\n+=item B<--verbose>\n+\n+use verbose output.\n+\n+=item B<--dbSNP>\n+\n+Remove all the variants presents in the dbSNP databases\n+Specify the number of the dbSNP column in the file (start to count from 1)\n+For human and mouse genome\n+\n+=item B<--segDup>\n+\n+Remove all the variants with a frequency greater or equal to 0.9 in genomic duplicate segments database\n+For human and mouse genome\n+\n+=item B<--esp>\n+\n+Remove all the variants with a frequency greater than 0.001 in Exome sequencing project\n+For human genome only\n+\n+=item B<--thG>\n+\n+Remove all the variants with a frequency greater than 0.001 in 1000 genome database\n+\n+\n+=item B<--exac>\n+\n+Remove all the variants with a frequency greater than 0.001 in ExAC database\n+\n+\n+=item B<--filter>\n+\n+Remove all variants present in the BED file\n+\n+\n+=item B<--refGenome>\n+\n+The reference genome to use.\n+\n+=item B<--outfile>\n+\n+path to output file\n+\n+=item B<--pathAVDBList>\n+\n+the path to a texte file containing the list of the Annovar databases installed.\n+\n+=back\n+\n+=head1 DESCRIPTION\n+\n+mutspecFilter - Filter a file annotated with MutSpec-Annot tool.\n+Variants present in public databases (dbSNP, SegDup, ESP, 1000 genome, exac obtained from Annovar) will be removed from the input file (with frequency limits described above).\n+Additionally, using the --filter option, any variants present in a specified bed file will be removed from the input file.\n+\n+=cut\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mutspecFilter.xml --- a/mutspecFilter.xml Tue Jun 28 02:59:32 2016 -0400 +++ b/mutspecFilter.xml Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -1,120 +1,172 @@\n-<tool id="MutSpecfilter" name="MutSpec Filter" version="0.1" hidden="false">\r\n-<description>Filter out variants present in public databases</description>\r\n-\r\n-<requirements>\r\n- <requirement type="set_environment">SCRIPT_PATH</requirement>\r\n- <requirement type="package" version="5.18.1">perl</requirement>\r\n-</requirements>\r\n-\r\n-<command interpreter="perl">\r\n- mutspecFilter.pl \r\n- --dir \\$SCRIPT_PATH \r\n- $segDup\r\n- $esp\r\n- $thG\r\n- #if str($FilterdbSNP.dbSNP) == "true" or $FilterdbSNP.dbSNP == True:\r\n- --dbSNP ${FilterdbSNP.column}\r\n- #else\r\n- --dbSNP 0\r\n- #end if\r\n- --refGenome ${refGenome} \r\n- --outfile $output\r\n-\t\t$input\r\n-</command>\r\n-\r\n-<inputs>\r\n-\t<param name="input" type="data" format="txt" label="Input file"/>\r\n-\t\r\n-\t<param name="refGenome" type="select" label="Reference genome" help="All your data should have been annotated with the selected genome">\r\n- <options from_data_table="annovar_index" />\r\n- </param>\r\n-\r\n- <conditional name="FilterdbSNP">\r\n- <param name="dbSNP" type="boolean" checked="true" truevalue="true" label="Filter against dbSNP database" help="Remove variants with a RS number" />\r\n- <when value="true">\r\n- <param name="column" type="data_column" data_ref="input" label="Select the dbSNP column for filtering" use_header_names="true" help="Select a column name snp or snpNonFlagged" />\r\n- </when>\r\n- </conditional> \r\n-\r\n-\r\n- <param name="segDup" type="boolean" checked="true" truevalue="--segDup" falsevalue="" label="Filter against SegDup database" help="Remove variants present at >= 0.9 frequency in the genomic duplicate segments database" />\r\n- <param name="esp" type="boolean" checked="true" truevalue="--esp" falsevalue="" label="Filter against the ESP database" help="Remove variants present at frequency > 0.001 in the Exome Sequencing Project database (only valid for human genomes)" />\r\n- <param name="thG" type="boolean" checked="true" truevalue="--thG" falsevalue="" label="Filter against the 1000g database project" help="Remove variants present at frequency > 0.001 in the 1000 genome database (only valid for human genomes)" />\r\n-</inputs>\r\n-\r\n-<outputs>\r\n- \t<data type="data" name="output" format="tabular" label="${input.name.split(\' \')[0]} filtered" />\r\n-</outputs>\r\n-\r\n-<help>\r\n-\r\n-**What it does**\r\n-\r\n-Filter a file annotated with MutSpec-Annot tool. Variants present in public databases (dbSNP, SegDup, ESP, 1000 genome obtained from Annovar) will be removed from the input file (with frequency limits described above).\r\n-\r\n-.. class:: warningmark\r\n-\r\n-The databases ESP and 1000 genome can be used only for human genomes\r\n-\r\n---------------------------------------------------------------------------------------------------------------------------------------------------\r\n-\r\n-**Input**\r\n-\r\n-.. class:: warningmark\r\n-\r\n-Tab delimited text files generated by MutSpec-Annot tool.\r\n-\r\n---------------------------------------------------------------------------------------------------------------------------------------------------\r\n-\r\n-**Output**\r\n-\r\n-Tab delimited text file filtered for variants considered as neutral polymorphisms.\r\n-\r\n---------------------------------------------------------------------------------------------------------------------------------------------------\r\n-\r\n-**Example**\r\n-\r\n-Filter the following file::\r\n-\r\n- Chr Start End Ref Alt Func.refGene Gene.refGene ExonicFunc.refGene AAChange.refGene genomicSuperDups snp138 1000g2014oct_all esp6500si_all Strand context Chromosome Start_Position End_Position Reference_Allele Tumor_Seq_Allele2\r\n- chr7 121717919 121717920 - G exonic AASS frameshift insertion AASS:NM_005763:exon23:c.2634dupC:p.A879fs NA rs147476318 NA '..b'46235 T A\n+ chr14 33290999 33290999 A G exonic AKAP6 nonsynonymous SNV AKAP6:NM_004274:exon13:c.A3980G:p.D1327G NA NA NA NA + GAC chr14 33290999 33290999 A G\n+ chr4 70156391 70156391 T C exonic UGT2B28 nonsynonymous SNV UGT2B28:NM_053039:exon5:c.T1172C:p.V391A score=0.949699;Name=chr4:70035680 NA 0.000199681 NA + GTA chr4 70156391 70156391 T C\n+\n+\n+--------------------------------------------------------------------------------------------------------------------------------------------------\n+\n+**Contact**\n+\n+ardinm@fellows.iarc.fr; cahaisv@iarc.fr\n+\n+--------------------------------------------------------------------------------------------------------------------------------------------------\n+\n+**Code**\n+\n+The source code is available on `GitHub`__\n+\n+.. __: https://github.com/IARCbioinfo/mutspec.git\n+\n+\n+</help>\n+\n+\n+<citations>\n+ <citation type="bibtex">\n+ @article{ardin_mutspec:_2016,\n+ title = {{MutSpec}: a Galaxy toolbox for streamlined analyses of somatic mutation spectra in human and mouse cancer genomes},\n+ volume = {17},\n+ issn = {1471-2105},\n+ doi = {10.1186/s12859-016-1011-z},\n+ shorttitle = {{MutSpec}},\n+ abstract = {{BACKGROUND}: The nature of somatic mutations observed in human tumors at single gene or genome-wide levels can reveal information on past carcinogenic exposures and mutational processes contributing to tumor development. While large amounts of sequencing data are being generated, the associated analysis and interpretation of mutation patterns that may reveal clues about the natural history of cancer present complex and challenging tasks that require advanced bioinformatics skills. To make such analyses accessible to a wider community of researchers with no programming expertise, we have developed within the web-based user-friendly platform Galaxy a first-of-its-kind package called {MutSpec}.\n+ {RESULTS}: {MutSpec} includes a set of tools that perform variant annotation and use advanced statistics for the identification of mutation signatures present in cancer genomes and for comparing the obtained signatures with those published in the {COSMIC} database and other sources. {MutSpec} offers an accessible framework for building reproducible analysis pipelines, integrating existing methods and scripts developed in-house with publicly available R packages. {MutSpec} may be used to analyse data from whole-exome, whole-genome or targeted sequencing experiments performed on human or mouse genomes. Results are provided in various formats including rich graphical outputs. An example is presented to illustrate the package functionalities, the straightforward workflow analysis and the richness of the statistics and publication-grade graphics produced by the tool.\n+ {CONCLUSIONS}: {MutSpec} offers an easy-to-use graphical interface embedded in the popular Galaxy platform that can be used by researchers with limited programming or bioinformatics expertise to analyse mutation signatures present in cancer genomes. {MutSpec} can thus effectively assist in the discovery of complex mutational processes resulting from exogenous and endogenous carcinogenic insults.},\n+ pages = {170},\n+ number = {1},\n+ journaltitle = {{BMC} Bioinformatics},\n+ author = {Ardin, Maude and Cahais, Vincent and Castells, Xavier and Bouaoun, Liacine and Byrnes, Graham and Herceg, Zdenko and Zavadil, Jiri and Olivier, Magali},\n+ date = {2016},\n+ pmid = {27091472},\n+ keywords = {Galaxy, Mutation signatures, Mutation spectra, Single base substitutions}\n+ }\n+ </citation>\n+</citations>\n+\n+</tool>\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mutspecNmf.xml --- a/mutspecNmf.xml Tue Jun 28 02:59:32 2016 -0400 +++ b/mutspecNmf.xml Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -1,131 +1,149 @@\n-<?xml version="1.0"?>\r\n-<tool id="mutSpecnmf" name="MutSpec NMF" version="0.0.1">\r\n-<description>Extract mutation signatures with the Non negative Matrix Factorization algorithm</description>\r\n-\r\n-<requirements>\r\n- <requirement type="set_environment">SCRIPT_PATH</requirement>\r\n- <requirement type="package" version="5.18.1">perl</requirement>\r\n- <requirement type="package" version="3.1.2">R</requirement>\r\n- <requirement type="package" version="1.7.1">numpy</requirement>\r\n- <requirement type="package" version="0.1">mutspec</requirement>\r\n-</requirements>\r\n-\r\n-<command interpreter="bash">\r\n-\tmutspecNmf_wrapper.sh\r\n-\t$html_file\r\n-\t"--nbSign $nbsign"\r\n-\t${refGenomeSource.source}\r\n-\t#if $refGenomeSource.source == "html":\r\n-\t\t${refGenomeSource.reportHTML}\r\n-\t#else\r\n-\t\t${refGenomeSource.matrix}\r\n-\t#end if\r\n-</command>\r\n-\r\n-<inputs>\r\n-\t<conditional name="refGenomeSource">\r\n-\t\t<param name="source" type="select" label="Input a MutSpec Stats report or a matrix" help="You may select either a report generated by MutSpec-Stats or a tab-delimited text matrix">\r\n-\t\t\t<option value="html">Dataset generated by the tool MutSpec-Stats</option>\r\n-\t\t\t<option value="tab">Tab-delimited matrix</option>\r\n-\t\t</param>\r\n-\t\t<when value="html">\r\n-\t\t\t<param name="reportHTML" type="data" format="html" label="Input dataset" help="Select a report generated by the MutSpec-Stats tool"/>\r\n-\t\t</when>\r\n-\t\t<when value="tab">\r\n-\t\t\t<param name="matrix" type="data" format="tabular" label="Input matrix" help="Select a matrix formatted as shown further below"/>\r\n-\t\t</when>\r\n-\t</conditional> \r\n-\t<param name="nbsign" type="text" value="2" label="Number of expected signatures" help="min=2" />\r\n-</inputs>\r\n-\r\n-<outputs>\r\n-\t<data name="html_file" format="html" label="NMF result on ${on_string} ($nbsign signatures)" />\r\n-</outputs> \r\n-\r\n-<help>\r\n-\r\n-**What it does**\r\n-\r\n-Extract mutation signatures composed of 96 SBS types (6 SBS types in their trinucleotide sequence context) using the non-negative matrix (`NMF`__) factorisation algorithm of Brunet with the Kullback-Leibler divergence penalty implemented in a `R package`__.\r\n-\r\n-.. __: http://www.nature.com/nature/journal/v401/n6755/full/401788a0.html\r\n-.. __: http://www.biomedcentral.com/1471-2105/11/367\r\n-\r\n-\r\n---------------------------------------------------------------------------------------------------------------------------------------------------\r\n-\r\n-**Input formats**\r\n-\r\n-The tool accepts a HTML report produces by the tool MutSpec-Stats or a matrix of mutation count in a tab-delimited text file format (see example below).\r\n-\r\n-.. class:: warningmark\r\n-\r\n-If the input is a matrix of mutation count, the sum of mutation counts for each row should be not null.\r\n-\r\n---------------------------------------------------------------------------------------------------------------------------------------------------\r\n-\r\n-**Output**\r\n-\r\n-Matrices and graphs representing the composition of the mutation signatures found by NMF (Matrix W) and the contributions of each sample to the signatures (Matrix H). The tool also produces a matrice that can be used with the tool MutSpec-compare for comparing the identified signatures with known signatures.\r\n-\r\n---------------------------------------------------------------------------------------------------------------------------------------------------\r\n-\r\n-**Example: matrix of mutation count (96 rows + a header with the samples names)**\r\n-\r\n-+--------+----------+----------+----------+\r\n-| | Sample_1 | Sample_2 | Sample_3 |\r\n-+========+==========+==========+==========+\r\n-|A[C>A]A | 4 | 3 | 1 |\r\n-+--------+----------+----------+----------+\r\n-|A[C>T]A | 2 | 1 | 0 |\r\n-+--------+----------+----------+----------+\r\n-|A[C>G]A | 13 | 2 | 1 |\r\n-+--------+----------+----------+----------+\r\n-|A[T>A]A | 10 | 3 | 6 |\r\n-+--------+----------+----------+----------+\r\n-|A[T>C]A | '..b' ... |\n++--------+----------+----------+----------+\n+|T[C>A]T | 5 | 2 | 2 |\n++--------+----------+----------+----------+\n+|T[C>G]T | 5 | 2 | 0 |\n++--------+----------+----------+----------+\n+|T[C>T]T | 11 | 4 | 2 |\n++--------+----------+----------+----------+\n+|T[T>A]T | 3 | 0 | 5 |\n++--------+----------+----------+----------+\n+|T[T>C]T | 39 | 17 | 1 |\n++--------+----------+----------+----------+\n+|T[T>G]T | 12 | 8 | 1 |\n++--------+----------+----------+----------+\n+\n+\n+--------------------------------------------------------------------------------------------------------------------------------------------------\n+\n+**Contact**\n+\n+ardinm@fellows.iarc.fr; cahaisv@iarc.fr\n+\n+--------------------------------------------------------------------------------------------------------------------------------------------------\n+\n+**Code**\n+\n+The source code is available on `GitHub`__\n+\n+.. __: https://github.com/IARCbioinfo/mutspec.git\n+\n+</help>\n+\n+\n+<citations>\n+ <citation type="bibtex">\n+ @article{ardin_mutspec:_2016,\n+ title = {{MutSpec}: a Galaxy toolbox for streamlined analyses of somatic mutation spectra in human and mouse cancer genomes},\n+ volume = {17},\n+ issn = {1471-2105},\n+ doi = {10.1186/s12859-016-1011-z},\n+ shorttitle = {{MutSpec}},\n+ abstract = {{BACKGROUND}: The nature of somatic mutations observed in human tumors at single gene or genome-wide levels can reveal information on past carcinogenic exposures and mutational processes contributing to tumor development. While large amounts of sequencing data are being generated, the associated analysis and interpretation of mutation patterns that may reveal clues about the natural history of cancer present complex and challenging tasks that require advanced bioinformatics skills. To make such analyses accessible to a wider community of researchers with no programming expertise, we have developed within the web-based user-friendly platform Galaxy a first-of-its-kind package called {MutSpec}.\n+ {RESULTS}: {MutSpec} includes a set of tools that perform variant annotation and use advanced statistics for the identification of mutation signatures present in cancer genomes and for comparing the obtained signatures with those published in the {COSMIC} database and other sources. {MutSpec} offers an accessible framework for building reproducible analysis pipelines, integrating existing methods and scripts developed in-house with publicly available R packages. {MutSpec} may be used to analyse data from whole-exome, whole-genome or targeted sequencing experiments performed on human or mouse genomes. Results are provided in various formats including rich graphical outputs. An example is presented to illustrate the package functionalities, the straightforward workflow analysis and the richness of the statistics and publication-grade graphics produced by the tool.\n+ {CONCLUSIONS}: {MutSpec} offers an easy-to-use graphical interface embedded in the popular Galaxy platform that can be used by researchers with limited programming or bioinformatics expertise to analyse mutation signatures present in cancer genomes. {MutSpec} can thus effectively assist in the discovery of complex mutational processes resulting from exogenous and endogenous carcinogenic insults.},\n+ pages = {170},\n+ number = {1},\n+ journaltitle = {{BMC} Bioinformatics},\n+ author = {Ardin, Maude and Cahais, Vincent and Castells, Xavier and Bouaoun, Liacine and Byrnes, Graham and Herceg, Zdenko and Zavadil, Jiri and Olivier, Magali},\n+ date = {2016},\n+ pmid = {27091472},\n+ keywords = {Galaxy, Mutation signatures, Mutation spectra, Single base substitutions}\n+ }\n+ </citation>\n+</citations>\n+\n+</tool>\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mutspecNmf_wrapper.sh --- a/mutspecNmf_wrapper.sh Tue Jun 28 02:59:32 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,94 +0,0 @@ -#!/bin/bash - -######################################### -### SPECIFY THE NUMBER OF CPU ### -######################################### -cpu=1 - - - - -html=$1;shift -parameters=$1;shift -source=$1;shift -input=$1 - -if [[ $source == "html" ]] -then input=${input%%.*}_files/Mutational_Analysis/Figures/Input_NMF/Input_NMF_Count.txt -fi - -output_dir=${html%%.*}_files -mkdir $output_dir - -Rscript $SCRIPT_PATH/R/somaticSignature_Galaxy.r $parameters --cpu $cpu --input $input --output $output_dir 2>&1 - - -## Test the existence of the files and graphs produced by NMF -if [[ ! -e "$output_dir/NMF/Files/MatrixW-Normto100.txt" ]]; then - >&2 echo "error" - exit -fi - - -echo "<html><body>" >> $html -echo "<center> <h2> NMF Mutational signatures analysis </h2> </center>" >> $html - - -echo "<table>" >> $html -echo "<tr> <br/> <th><h3>Heatmap of the mixture coefficient matrix</h3></th> </tr>" >> $html -echo "<tr> <td> <center> <br/> <a href="NMF/Files/Cluster_MixtureCoeff.txt">Cluster_MixtureCoeff.txt</a> </center> </td> </tr>" >> $html -echo "<tr>" >> $html - -if [[ ! -e "$output_dir/NMF/Figures/Heatmap_MixtureCoeff.png" ]]; then - echo "WARNING: NMF package can't plot the heatmap when the samples size is above 300. <br/>" >> $html -else - echo "<td> <center> <a href="NMF/Figures/Heatmap_MixtureCoeff.png">" >> $html - echo "<img src="NMF/Figures/Heatmap_MixtureCoeff.png" /></a> <center> </td>" >> $html -fi -echo "</tr>" >> $html -echo "</table>" >> $html - -echo "<br/><br/>" >> $html - -echo "<table>" >> $html -echo "<tr>" >> $html -echo "<th><h3>Signature composition</h3></th>" >> $html -echo "</tr>" >> $html -echo "<tr><td> <center> <a href="NMF/Files/MatrixW-Normto100.txt">Composition somatic mutation (input matrix for the tool MutSpec-Compare)</a><center></td></tr>" >> $html -echo "<tr>" >> $html -echo "<td><a href="NMF/Figures/CompositionSomaticMutation.png">" >> $html -echo "<img width="1000" src="NMF/Figures/CompositionSomaticMutation.png" /></a></td>" >> $html -echo "</tr> " >> $html -echo "</table>" >> $html -echo "<br/><br/>" >> $html - -echo "<table>" >> $html -echo "<tr>" >> $html -echo "<th><h3>Sample contribution to signatures</h3></th>" >> $html -echo "</tr>" >> $html -echo "<tr><td> <center> <a href="NMF/Files/MatrixH-Inputggplot2.txt">Contribution mutation signature matrix</a></center></td></tr>" >> $html -echo "<tr>" >> $html -echo "<td><a href="NMF/Figures/ContributionMutationSignature.png">" >> $html -echo "<img width="700" src="NMF/Figures/ContributionMutationSignature.png" /></a></td>" >> $html -echo "</tr> " >> $html -echo "</table>" >> $html -echo "<br/><br/>" >> $html - - -echo "<table>" >> $html -echo "<tr>" >> $html -echo "<th><h3>Average contributions of each signatures in each cluster</h3></th>" >> $html -echo "</tr>" >> $html -echo "<tr><td> <center> <a href="NMF/Files/Average_ContriByCluster.txt">Average contributions</a></center></td></tr>" >> $html -echo "<tr>" >> $html -echo "<td><a href="NMF/Figures/Average_ContriByCluster.png">" >> $html -echo "<img width="700" src="NMF/Figures/Average_ContriByCluster.png" /></a></td>" >> $html -echo "</tr> " >> $html -echo "</table>" >> $html -echo "<br/><br/>" >> $html - -echo "<br/><br/><br/><br/>" >> $html - - - -exit 0 |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mutspecSplit.pl --- a/mutspecSplit.pl Tue Jun 28 02:59:32 2016 -0400 +++ b/mutspecSplit.pl Mon Mar 13 08:21:19 2017 -0400 |
[ |
@@ -1,64 +1,82 @@ -# !/usr/bin/perl - -#-----------------------------------# -# Author: Vincent # -# Script: mutspecSplit.pl # -# Last update: 01/07/14 # -#-----------------------------------# - - -use strict; -use warnings; -use Getopt::Long; - -our $file=""; -our $column=""; -our $path=""; -our $key=""; - - -GetOptions('file|f=s' =>\$file, - 'key|k=s' =>\$key, - 'column|i=s' =>\$column, - 'path|p=s' =>\$path); - - -mkdir ("outputFiles") or die ("Erreur creation repertoire\n"); -# print $file,"\n", $key,"\n", $column,"\n", $path,"\n"; exit; - -my %tab; -if ($column==0) {$column++;} -$column--; - -open(FILE, "$file") or die "cannot open $file\n"; - -$_=<FILE>; #skip headers -chomp; -my @line = split(/\t/,$_); -my $headers = join("\t", @line[0..($column-1),($column+1)..$#line]); - -while(<FILE>){ - chomp; - my @line = split(/\t/,$_); - #if (!exists($tab{$line[$column]})) { $tab{$line[$column]}=[]; } - #push( @{ $tab{$line[$column]} }, join("\t", @line[0..($column-1),($column+1)..$#line]) ); - my $tmp = join("\t", @line[0..($column-1),($column+1)..$#line]) ; - my $id = $line[$column]; - push( @{ $tab{$id} }, $tmp); -} - - -while( my ($name,$lines) = each(%tab) ) { - my $output="outputFiles/$name"; - #my $output="primary_$key" . "_$name" . "_visible_tabular"; - # my $output=$name; - open(FILE, ">$output") or die "cannot create file $output \n"; - print FILE $headers."\n"; - foreach my $line (@{$lines}){ - print FILE "$line\n"; - } - close FILE; -} - -my $list=`ls outputFiles/*`; -print ($list); +# !/usr/bin/perl + +#-----------------------------------# +# Author: Vincent # +# Script: mutspecSplit.pl # +# Last update: 24/02/17 # +#-----------------------------------# + + +use strict; +use warnings; +use Getopt::Long; + +our $file=""; +our $column=""; +our $path=""; +our $key=""; +our $help=0; + + +GetOptions('file|f=s' =>\$file, + 'column|i=s' =>\$column, + 'help|h' =>\$help) or do_help(); # 'key|k=s' =>\$key, + +if($help) +{ + do_help(); +} + +if ( ($file eq "") || ($column eq "") ) +{ + do_help(); +} + +sub do_help +{ + print "Usage: mutspecSplit.pl --file <input_file> --column <value>\n + Option --file: Input file to split\n + Option --column: Column number containing the samples ids (start to count from 1)\n\n"; + exit; +} + + +mkdir ("outputFiles") or die ("Erreur creation repertoire\n"); +# print $file,"\n", $key,"\n", $column,"\n", $path,"\n"; exit; + +my %tab; +if ($column==0) {$column++;} # Start to count from 1 +$column--; + +open(FILE, "$file") or die "cannot open $file\n"; + +$_=<FILE>; #skip headers +chomp; +my @line = split(/\t/,$_); +my $headers = join("\t", @line[0..($column-1),($column+1)..$#line]); + +while(<FILE>){ + chomp; + my @line = split(/\t/,$_); + #if (!exists($tab{$line[$column]})) { $tab{$line[$column]}=[]; } + #push( @{ $tab{$line[$column]} }, join("\t", @line[0..($column-1),($column+1)..$#line]) ); + my $tmp = join("\t", @line[0..($column-1),($column+1)..$#line]) ; + my $id = $line[$column]; + push( @{ $tab{$id} }, $tmp); +} + + +while( my ($name,$lines) = each(%tab) ) { + my $output="outputFiles/$name"; + #my $output="primary_$key" . "_$name" . "_visible_tabular"; + # my $output=$name; + open(FILE, ">$output") or die "cannot create file $output \n"; + print FILE $headers."\n"; + foreach my $line (@{$lines}){ + print FILE "$line\n"; + } + close FILE; +} + +my $list=`ls outputFiles/*`; +print ($list); |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mutspecSplit.xml --- a/mutspecSplit.xml Tue Jun 28 02:59:32 2016 -0400 +++ b/mutspecSplit.xml Mon Mar 13 08:21:19 2017 -0400 |
b |
b'@@ -1,108 +1,122 @@\n-<tool id="mutSpecsplit" name="MutSpec Split" version="0.1" hidden="false" force_history_refresh="True">\r\n-<description>Split a tabular file by sample ID</description>\r\n-\r\n-<requirements>\r\n- <requirement type="set_environment">SCRIPT_PATH</requirement>\r\n- <requirement type="package" version="5.18.1">perl</requirement>\r\n-</requirements>\r\n-\r\n-<command interpreter="perl">\r\n- mutspecSplit.pl -f $input -c $column\r\n-</command>\r\n-\r\n-<inputs>\r\n-\t<param name="input" type="data" format="tabular" label="Input file" help="If using the batch mode (multiple datasets), all files must contain the same sample id column. The tool doesn\'t support dataset list as input !" />\r\n-\t<param name="column" type="data_column" data_ref="input" label="Split by" use_header_names="true"/>\r\n-</inputs>\r\n-\r\n-<outputs>\r\n- <collection name="splitted_output" type="list" label="collection">\r\n- \t <discover_datasets pattern="__name__" ext="tabular" directory="outputFiles"/>\r\n- </collection> \r\n-</outputs>\r\n-\r\n-<help>\r\n-\r\n-**What it does**\r\n-\r\n-This tool splits a file into several files based on the content of the selected column.\r\n-It can be used for example to split a file that contains data on 10 samples into 10 files using the same sample ID column.\r\n-The resulting files are saved into a dataset list/collection.\r\n-\r\n---------------------------------------------------------------------------------------------------------------------------------------------------\r\n-\r\n-**Input**\r\n-\r\n-One or multiple tab delimited text files.\r\n-\r\n-If multiple files are selected, they should all have the same column on which you want to do the split.\r\n-\r\n-.. class:: warningmark\r\n-\r\n-The tool doesn\'t support dataset list as input !!!\r\n-\r\n---------------------------------------------------------------------------------------------------------------------------------------------------\r\n-\r\n-**Output**\r\n-\r\n-A dataset list containing tab delimited text files resulting from splitting the input file(s).\r\n-\r\n-.. class:: warningmark\r\n-\r\n-If a large number of file are generated, you\'ll need to refresh the history to see all files included in the dataset list. The entire list of file may still not be correctly displayed due to a known bug in Galaxy that may be fixed in future versions.\r\n- \r\n---------------------------------------------------------------------------------------------------------------------------------------------------\r\n-\r\n-**Example**\r\n-\r\n-Split by sample ID the following file::\r\n-\r\n- Chr Start End Ref Alt Func.refGene Gene.refGene ExonicFunc.refGene AAChange.refGene genomicSuperDups 1000g2012apr_all snp137 esp6500si_all cosmic67 Strand Context Mutation_GRCh37_chromosome_number Mutation_GRCh37_genome_position Description_Ref_Genomic Description_Alt_Genomic Sample_name Pubmed_PMID Age Comments\r\n- chr12 82752552 82752552 G A exonic METTL25 nonsynonymous SNV NM_032230:c.G208A:p.E70K NA NA NA NA NA + GTCGGAGACGGAGGCCCTGCC chr12 82752552 G A APA29 23913001 2 NA\r\n- chr11 86663436 86663436 C A exonic FZD4 nonsynonymous SNV NM_012193:c.G362T:p.C121F NA NA NA NA NA - GACTGAAAGACACATGCCGCC chr11 86663436 C A APA12 21311022 34 Tissue Remark Fixed:Remark\r\n- chr12 57872994 57872994 G A exonic ARHGAP9 nonsynonymous SNV NM_001080157:c.C196T:p.R66C NA NA NA 0.000077 ID=COSM431582;OCCURENCE=2(breast) - GCTTCTAGGCGTCTTGCCAAC chr12 '..b' GACTGAAAGACACATGCCGCC chr11 86663436 C A APA12 21311022 34 Tissue Remark Fixed:Remark\n+ chr12 57872994 57872994 G A exonic ARHGAP9 nonsynonymous SNV NM_001080157:c.C196T:p.R66C NA NA NA 0.000077 ID=COSM431582;OCCURENCE=2(breast) - GCTTCTAGGCGTCTTGCCAAC chr12 57872994 G A APA12 21311022 34 Tissue Remark Fixed:Remark\n+\n+\n+--------------------------------------------------------------------------------------------------------------------------------------------------\n+\n+**Contact**\n+\n+ardinm@fellows.iarc.fr; cahaisv@iarc.fr\n+\n+--------------------------------------------------------------------------------------------------------------------------------------------------\n+\n+**Code**\n+\n+The source code is available on `GitHub`__\n+\n+.. __: https://github.com/IARCbioinfo/mutspec.git\n+\n+\n+</help>\n+\n+\n+<citations>\n+ <citation type="bibtex">\n+ @article{ardin_mutspec:_2016,\n+ title = {{MutSpec}: a Galaxy toolbox for streamlined analyses of somatic mutation spectra in human and mouse cancer genomes},\n+ volume = {17},\n+ issn = {1471-2105},\n+ doi = {10.1186/s12859-016-1011-z},\n+ shorttitle = {{MutSpec}},\n+ abstract = {{BACKGROUND}: The nature of somatic mutations observed in human tumors at single gene or genome-wide levels can reveal information on past carcinogenic exposures and mutational processes contributing to tumor development. While large amounts of sequencing data are being generated, the associated analysis and interpretation of mutation patterns that may reveal clues about the natural history of cancer present complex and challenging tasks that require advanced bioinformatics skills. To make such analyses accessible to a wider community of researchers with no programming expertise, we have developed within the web-based user-friendly platform Galaxy a first-of-its-kind package called {MutSpec}.\n+ {RESULTS}: {MutSpec} includes a set of tools that perform variant annotation and use advanced statistics for the identification of mutation signatures present in cancer genomes and for comparing the obtained signatures with those published in the {COSMIC} database and other sources. {MutSpec} offers an accessible framework for building reproducible analysis pipelines, integrating existing methods and scripts developed in-house with publicly available R packages. {MutSpec} may be used to analyse data from whole-exome, whole-genome or targeted sequencing experiments performed on human or mouse genomes. Results are provided in various formats including rich graphical outputs. An example is presented to illustrate the package functionalities, the straightforward workflow analysis and the richness of the statistics and publication-grade graphics produced by the tool.\n+ {CONCLUSIONS}: {MutSpec} offers an easy-to-use graphical interface embedded in the popular Galaxy platform that can be used by researchers with limited programming or bioinformatics expertise to analyse mutation signatures present in cancer genomes. {MutSpec} can thus effectively assist in the discovery of complex mutational processes resulting from exogenous and endogenous carcinogenic insults.},\n+ pages = {170},\n+ number = {1},\n+ journaltitle = {{BMC} Bioinformatics},\n+ author = {Ardin, Maude and Cahais, Vincent and Castells, Xavier and Bouaoun, Liacine and Byrnes, Graham and Herceg, Zdenko and Zavadil, Jiri and Olivier, Magali},\n+ date = {2016},\n+ pmid = {27091472},\n+ keywords = {Galaxy, Mutation signatures, Mutation spectra, Single base substitutions}\n+ }\n+ </citation>\n+</citations>\n+\n+</tool> \n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mutspecStat.pl --- a/mutspecStat.pl Tue Jun 28 02:59:32 2016 -0400 +++ b/mutspecStat.pl Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -1,3340 +1,2767 @@\n-#!/usr/bin/env perl\r\n-\r\n-#-----------------------------------#\r\n-# Author: Maude #\r\n-# Script: mutspecStat.pl #\r\n-# Last update: 16/06/16 #\r\n-#-----------------------------------#\r\n-\r\n-use strict;\r\n-use warnings;\r\n-use Getopt::Long;\r\n-use Pod::Usage;\r\n-use File::Basename; # my ($filename, $directories, $suffix) = fileparse($file, qr/\\.[^.]*/);\r\n-use File::Path;\r\n-use Statistics::R;\r\n-use Spreadsheet::WriteExcel;\r\n-\r\n-our ($verbose, $man, $help) = (0, 0, 0); # Parse options and print usage if there is a syntax error, or if usage was explicitly requested.\r\n-our ($refGenome, $output, $folder_temp, $path_R_Scripts, $path_SeqrefGenome) = ("empty", "empty", "empty", "empty", "empty"); # The reference genome to use; The path for saving the result; The path for saving the temporary files; The path to R scripts; The path to the fasta reference sequences\r\n-our ($poolData, $oneReportPerSample) = (2, 2); # If a folder is pass as input file pool all the data and generate the report on the pool and for each samples; # Generate one report for each samples\r\n-\r\n-\r\n-GetOptions(\'verbose|v\'=>\\$verbose, \'help|h\'=>\\$help, \'man|m\'=>\\$man, \'refGenome=s\'=>\\$refGenome, \'outfile|o=s\' => \\$output, \'pathTemporary|temp=s\' => \\$folder_temp, \'pathRscript=s\' => \\$path_R_Scripts, \'pathSeqRefGenome=s\' => \\$path_SeqrefGenome, \'poolData\' => \\$poolData, \'reportSample\' => \\$oneReportPerSample) or pod2usage(2);\r\n-\r\n-our ($input) = @ARGV;\r\n-\r\n-pod2usage(-verbose=>1, -exitval=>1, -output=>\\*STDERR) if ($help);\r\n-pod2usage(-verbose=>2, -exitval=>1, -output=>\\*STDERR) if ($man);\r\n-pod2usage(-verbose=>0, -exitval=>1, -output=>\\*STDERR) if(@ARGV == 0); # No argument is pass to the command line print the usage of the script\r\n-pod2usage(-verbose=>0, -exitval=>1, -output=>\\*STDERR) if(@ARGV == 2); # Only one argument is expected to be pass to @ARGV (the input)\r\n-\r\n-\r\n-\r\n-######################################################################################################################################################\r\n-#\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tGLOBAL VARIABLES\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t #\r\n-######################################################################################################################################################\r\n-# Recover the current path\r\n-our $pwd = `pwd`;\r\n-chomp($pwd);\r\n-\r\n-# Path to R scripts\r\n-our $pathRScriptTxnSB = "$path_R_Scripts/R/transciptionalStrandBias.r";\r\n-our $pathRScriptMutSpectrum = "$path_R_Scripts/R/mutationSpectra_Galaxy.r";\r\n-\r\n-our $folderMutAnalysis = "";\r\n-our @pathInput = split("/", $input);\r\n-\r\n-# Hash table with the length of each chromosomes\r\n-our %chromosomes;\r\n-\r\n-######################################################################################################################################################\r\n-#\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tMAIN \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t #\r\n-######################################################################################################################################################\r\n-# Check the presence of the flags and create the output and temp directories\r\n-CheckFlags();\r\n-\r\n-# Retrieve chromosomes length\r\n-checkChrDir();\r\n-\r\n-\r\n-print "-----------------------------------------------------------------\\n";\r\n-print "-----------------Report Mutational Analysis----------------------\\n";\r\n-print"-----------------------------------------------------------------\\n";\r\n-\r\n-# First check if the file is annotated or not\r\n-CheckAnnotationFile($input);\r\n-\r\n-# Calculate the statistics and generate the report\r\n-my @colInfoAV = qw(Chr Start Ref Alt);\r\n-ReportMutDist($input, $folderMutAnalysis, $folder_temp, \\@colInfoAV, $refGenome);\r\n-\r\n-# Remove the temporary directory\r\n-rmtree($folder_temp);\r\n-\r\n-\r\n-######################################################################################################################################################\r\n-#\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t'..b"middleHeader = $wb->add_format(valign=>'center', bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>10);\n+\t$$table_middleHeader2 = $wb->add_format(valign=>'center', bg_color=>$bgColor_totallighGray, font=>'Arial', bold=>1, size=>10); $$table_middleHeader2->set_bottom(1);\n+}\n+# Define the color\n+sub Color\n+{\n+\tmy ($wb, $blue, $black, $red, $gray, $green, $pink) = @_;\n+\n+\t$$blue = $wb->set_custom_color(40, 0, 0, 204);# C:G>A:T in blue\n+\t$$black = $wb->set_custom_color(41, 0, 0, 0);# C:G>G:C in black\n+\t$$red = $wb->set_custom_color(42, 255, 0, 0);# C:G>T:A in red\n+\t$$gray = $wb->set_custom_color(43, 205, 205, 205); # T:A>A:T in light gray\n+\t$$green = $wb->set_custom_color(44, 0, 204, 51);# T:A>C:G in green\n+\t$$pink = $wb->set_custom_color(45, 255, 192, 203);# T:A>G:C in pink\n+}\n+sub BackgroundColor\n+{\n+\tmy ($wb, $bgColor_blue, $bgColor_black, $bgColor_red, $bgColor_gray, $bgColor_green, $bgColor_pink) = @_;\n+\n+\t$$bgColor_blue = $wb->set_custom_color(48, 0, 0, 204);\n+\t$$bgColor_black = $wb->set_custom_color(49, 0, 0, 0);\n+\t$$bgColor_red = $wb->set_custom_color(50, 255, 0, 0);\n+\t$$bgColor_gray = $wb->set_custom_color(51, 205, 205, 205);\n+\t$$bgColor_green = $wb->set_custom_color(52, 0, 204, 51);\n+\t$$bgColor_pink = $wb->set_custom_color(53, 255, 192, 203);\n+}\n+\n+\n+\n+\n+=head1 NAME\n+\n+mutSpec-Stat\n+\n+=head1 SYNOPSIS\n+\n+\tmutSpecstat.pl [arguments] <query-file>\n+\n+ <query-file> a folder with one or multiple VCFs\n+\n+ Arguments:\n+ -h, --help print help message\n+ -m, --man print complete documentation\n+ -v, --verbose use verbose output\n+ --refGenome the reference genome to use (human, mouse or rat genomes)\n+ -o, --outfile <string> output directory for the result. If none is specify the result will be write in the same directory as the input file\n+ --temp <string> the path for saving the temporary files\n+ --pathSeqRefGenome the path to the fasta reference sequences\n+ --poolData generate the pool of all the samples (optional)\n+ --reportSample generate a report for each sample (optional)\n+\n+\n+Function: automatically run a pipeline and calculate various statistics on mutations\n+\n+ Example: mutSpecstat.pl --refGenome hg19 --outfile output_directory --temp path_to_temporary_directory --pathRscript path_to_R_scripts --pathSeqRefGenome path_fasta_ref_seq --poolData --reportSample inputFolder\n+\n+ Version: 02-2017 (February 2016)\n+\n+\n+=head1 OPTIONS\n+\n+=over 8\n+\n+=item B<--help>\n+\n+print a brief usage message and detailed explanation of options.\n+\n+=item B<--man>\n+\n+print the complete manual of the program.\n+\n+=item B<--verbose>\n+\n+use verbose output.\n+\n+=item B<--refGenome>\n+\n+the reference genome to use, could be human, mouse or rat genomes.\n+\n+=item B<--outfile>\n+\n+the directory of output file names. If it is nor specify the same directory as the input file is used.\n+\n+=item B<--temp>\n+\n+the path for saving temporary files generated by the script.\n+If any is specify a temporary folder is created in the same directory where the script is running.\n+Deleted when the script is finish\n+\n+=item B<--pathSeqRefGenome>\n+\n+The path to the fasta reference sequences\n+\n+=item B<--poolData only for the report>\n+\n+calculate the statistics on the pool of all the data pass in input\n+\n+=item B<--reportSample only for the report>\n+\n+generate a report for each samples\n+\n+=head1 DESCRIPTION\n+\n+mutSpecstat is a perl script for calculated various statistics on mutations\n+An Excel report containing the mutation type distribution per functional region, the strand bias and the sequence context on genomic and coding sequence is created.\n+The different statistics are illustrated using ggplot2.\n+\n+=cut\n" |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mutspecStat.xml --- a/mutspecStat.xml Tue Jun 28 02:59:32 2016 -0400 +++ b/mutspecStat.xml Mon Mar 13 08:21:19 2017 -0400 |
b |
b'@@ -1,179 +1,198 @@\n-<tool id="mutSpecStat" name="MutSpec Stat" version="0.1" hidden="false">\r\n-<description>Calculate various statistics on mutations</description>\r\n-\r\n-<requirements>\r\n- <requirement type="set_environment">SCRIPT_PATH</requirement>\r\n- <requirement type="package" version="5.18.1">perl</requirement>\r\n- <requirement type="package" version="3.3">weblogo</requirement>\r\n- <requirement type="package" version="1.7.1">numpy</requirement>\r\n- <requirement type="package" version="3.1.2">R</requirement>\r\n- <requirement type="package" version="0.1">mutspec</requirement>\r\n-</requirements>\r\n-\r\n-<command interpreter="bash">\r\n- mutspecStat_wrapper.sh\r\n- $html\r\n- ${GALAXY_DATA_INDEX_DIR}/shared/ucsc/chrom/\r\n- #if str($estimateSignature.estimSign) == "true" or $estimateSignature.estimSign == True:\r\n- ${estimateSignature.estimT}\r\n- #else\r\n- 0\r\n- #end if\r\n-\r\n- "--refGenome ${refGenome} --pathSeqRefGenome ${refGenome.fields.path} $pooldata $reportSample"\r\n- #import re\r\n- #for $f in $dataset_list\r\n- \t#set $regexp = $re.compile("\\((.*)\\)")\r\n- \t#if $regexp.search($f.name)\r\n- \t\t#set filename=$regexp.search($f.name)\r\n-\t\t\t\t "$f=${filename.group(1)}"\r\n- \t#else\r\n-\t\t\t\t "$f=${f.name}"\r\n-\t\t\t#end if\r\n-\t\t#end for\r\n-</command>\r\n-\r\n-<inputs>\r\n-\t<param name="dataset_list" type="data_collection" format="tabular" collection_type="list" label="Annotated Dataset List" help="Select a dataset list/collection from your history" />\r\n-\t<param name="refGenome" type="select" label="Reference genome" help="All data in your dataset list should have been generated with the selected genome">\r\n-\t <options from_data_table="annovar_index" />\r\n-\t</param>\r\n-\r\n-\t<param name="pooldata" type="boolean" checked="true" truevalue="--pooldata" falsevalue="" label="Include statistics on the pooled samples" />\r\n-\t<param name="reportSample" type="boolean" checked="false" truevalue="--reportSample" falsevalue="" label="Generate one output file for each sample" help="By default, one output Excel file will be generated with statistics of each sample shown in different data sheets. Setting this option to true will generate one Excel file for each sample instead. It is recommended to use this option if your dataset list contains more than 250 files as the Excel output file may be too heavy to open easily on a computer with limited RAM"/>\r\n-\r\n- <conditional name="estimateSignature">\r\n- <param name="estimSign" type="boolean" checked="false" truevalue="true" label="Compute statistics for estimating the number of signatures" help="This option gererates different statistics that can be used to estimate the number of signatures to extract with NMF (this number should be used in the MutSpec-NMF tool"/>\r\n- <when value="true">\r\n- <param name="estimT" type="text" value="8" label="Maximum number of signatures to compute" help="Warning: Selecting a number above 8 may not work on small datasets"/>\r\n- </when>\r\n- </conditional>\r\n-\r\n-</inputs>\r\n-\r\n-<outputs>\r\n-\t<data name="html" type="data" format="html" label="mutation spectra report on ${dataset_list.name}" />\r\n-</outputs>\r\n-\r\n-<stdio>\r\n- <regex match="FutureWarning"\r\n- source="both"\r\n- level="warning"\r\n- description="FutureWarning" />\r\n-</stdio>\r\n-\r\n-<help>\r\n-\r\n-**What it does**\r\n-\r\n-MutSpec-Stat calculates various statistics describing mutation characteristics extracted from a dataset collection, and estimate (optional) the number of signatures present in the dataset.\r\n-The statistics include overall distribution of mutations, mutation distribution for single base substitutions (SBS) by functional regions, chromosomes, or in their trinucleotide sequence context (see details below).\r\n-\r\n---------------------------------------------------------------------------------------------------------------------------------------------------\r\n-\r\n'..b' or bar graphs. The heatmap colors are scaled to the maximum value of the corresponding table. The bar graph is scaled to the maximum frequency value (total number of mutation by SBS type is shown in parenthesis).\n+\n+\n+\n+**Panel 2. Stranded analysis of trinucleotide sequence context of SBS**\n+SBS within their trinucleotide sequence context are counted on the non-transcribed and transcribed strands of the gene region they are located in. Counts and frequencies are shown as tables or bar graphs.\n+Only SBS with strand orientation annotation are considered in this analysis (strand annotation retrieved from RefSeq database).\n+\n+\n+\n+--------------------------------------------------------------------------------------------------------------------------------------------------\n+\n+**Contact**\n+\n+ardinm@fellows.iarc.fr; cahaisv@iarc.fr\n+\n+--------------------------------------------------------------------------------------------------------------------------------------------------\n+\n+**Code**\n+\n+The source code is available on `GitHub`__\n+\n+.. __: https://github.com/IARCbioinfo/mutspec.git\n+\n+</help>\n+\n+<citations>\n+ <citation type="bibtex">\n+ @article{ardin_mutspec:_2016,\n+ title = {{MutSpec}: a Galaxy toolbox for streamlined analyses of somatic mutation spectra in human and mouse cancer genomes},\n+ volume = {17},\n+ issn = {1471-2105},\n+ doi = {10.1186/s12859-016-1011-z},\n+ shorttitle = {{MutSpec}},\n+ abstract = {{BACKGROUND}: The nature of somatic mutations observed in human tumors at single gene or genome-wide levels can reveal information on past carcinogenic exposures and mutational processes contributing to tumor development. While large amounts of sequencing data are being generated, the associated analysis and interpretation of mutation patterns that may reveal clues about the natural history of cancer present complex and challenging tasks that require advanced bioinformatics skills. To make such analyses accessible to a wider community of researchers with no programming expertise, we have developed within the web-based user-friendly platform Galaxy a first-of-its-kind package called {MutSpec}.\n+ {RESULTS}: {MutSpec} includes a set of tools that perform variant annotation and use advanced statistics for the identification of mutation signatures present in cancer genomes and for comparing the obtained signatures with those published in the {COSMIC} database and other sources. {MutSpec} offers an accessible framework for building reproducible analysis pipelines, integrating existing methods and scripts developed in-house with publicly available R packages. {MutSpec} may be used to analyse data from whole-exome, whole-genome or targeted sequencing experiments performed on human or mouse genomes. Results are provided in various formats including rich graphical outputs. An example is presented to illustrate the package functionalities, the straightforward workflow analysis and the richness of the statistics and publication-grade graphics produced by the tool.\n+ {CONCLUSIONS}: {MutSpec} offers an easy-to-use graphical interface embedded in the popular Galaxy platform that can be used by researchers with limited programming or bioinformatics expertise to analyse mutation signatures present in cancer genomes. {MutSpec} can thus effectively assist in the discovery of complex mutational processes resulting from exogenous and endogenous carcinogenic insults.},\n+ pages = {170},\n+ number = {1},\n+ journaltitle = {{BMC} Bioinformatics},\n+ author = {Ardin, Maude and Cahais, Vincent and Castells, Xavier and Bouaoun, Liacine and Byrnes, Graham and Herceg, Zdenko and Zavadil, Jiri and Olivier, Magali},\n+ date = {2016},\n+ pmid = {27091472},\n+ keywords = {Galaxy, Mutation signatures, Mutation spectra, Single base substitutions}\n+ }\n+ </citation>\n+</citations>\n+\n+</tool>\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c mutspecStat_wrapper.sh --- a/mutspecStat_wrapper.sh Tue Jun 28 02:59:32 2016 -0400 +++ b/mutspecStat_wrapper.sh Mon Mar 13 08:21:19 2017 -0400 |
[ |
b'@@ -1,507 +1,527 @@\n-#!/bin/bash\r\n-\r\n-#########################################\r\n-### SPECIFY THE NUMBER OF CPU ###\r\n-#########################################\r\n-cpu=1\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-#########################################\r\n-### Recover the arguments ###\r\n-#########################################\r\n-html=$1;shift\r\n-len_file_path=$1;shift\r\n-estimSign=$1;shift\r\n-parameters=$1;shift\r\n-working_dir=`pwd`\r\n-\r\n-\r\n-\r\n-mkdir in\r\n-cd in\r\n-\r\n-names=$(sed \'s/\\s/_/g\' <<< $*)\r\n-names=$(sed \'s/_\\// \\//g\' <<< $names)\r\n-names=$(sed \'s/_annotated//g\' <<< $names)\r\n-names=$(sed \'s/_filtered//g\' <<< $names)\r\n-names=$(sed \'s/\\.txt_/_/\' <<< $names)\r\n-\r\n-for name in ${names}\r\n-do\r\n- file=$(sed \'s/=/ /\' <<< $name);\r\n- echo $file\r\n- ln -s $file\r\n-done\r\n-cd ..\r\n-\r\n-output_dir=${html%%.*}_files\r\n-\r\n-\r\n-#########################################\r\n-### Calculates the statistics ###\r\n-#########################################\r\n-\r\n-perl $SCRIPT_PATH/mutspecStat.pl --outfile $output_dir \\\r\n-\t--temp "$working_dir/temp" \\\r\n-\t--pathRscript $SCRIPT_PATH \\\r\n-\t$parameters \\\r\n-\t$working_dir/in\r\n-\r\n-\r\n-#########################################\r\n-### Estimate the number of signatures ###\r\n-#########################################\r\n-if [[ $estimSign > 0 ]]; then\r\n- Rscript $SCRIPT_PATH/R/estimateSign_Galaxy.r --input $output_dir/Mutational_Analysis/Figures/Input_NMF/Input_NMF_Count.txt --stop $estimSign --cpu $cpu --output $output_dir/Mutational_Analysis/Figures/Estimate_Number_Signatures.png 2>&1\r\n-fi\r\n-\r\n-\r\n-#########################################\r\n-### Create css #\r\n-#########################################\r\n-css=$output_dir/Mutational_Analysis/style.css\r\n-echo ".legend{position:relative}.legend .legend-hidden{display:none;position:absolute;background-color:#fff;border:3px solid #03F;padding:3px;color:#000;font-size:1em;border-radius:10px;margin-top:-40px}.legend:hover .legend-hidden{display:block}" > $css\r\n-\r\n-\r\n-\r\n-# HMTL page for the result of the tool\r\n-echo "<html>" >> $html\r\n-echo "<body>" >> $html\r\n-\r\n-if [ -d $output_dir/Mutational_Analysis/Figures ]; then\r\n-\r\n-echo "<center> <h2>Mutational spectra report summary</h2> </center>" >> $html\r\n-\r\n-echo "<br/> Download the full report in Excel" >> $html\r\n-\r\n-## One report with all the samples. Specify the full path\r\n-if [[ -e "$output_dir/Mutational_Analysis/Report_Mutation_Spectra.xls" ]]\r\n-then\r\n-\t# Interpreted by Galaxy so don\'t need the full path\r\n-\techo "<br/><a href="Mutational_Analysis/Report_Mutation_Spectra.xls">Report_Mutation_Spectra.xls</a>" >> $html\r\n-fi\r\n-## One report for each samples\r\n-for file in $names\r\n-do\r\n- name=$(echo ${file}| cut -d"=" -f2)\r\n- name=${name%.*}\r\n-\r\n- # One report for each samples\r\n- if [[ -e "$output_dir/Mutational_Analysis/Report_Mutation_Spectra-$name.xls" ]]\r\n- then\r\n- echo "<br/><a href="Mutational_Analysis/Report_Mutation_Spectra-$name.xls">Report_Mutation_Spectra-$name.xls</a>" >> $html\r\n- fi\r\n-done\r\n-## One report for each samples: Pool_Data\r\n-if [[ $parameters =~ "--pooldata" ]]; then\r\n- if [[ -e "$output_dir/Mutational_Analysis/Report_Mutation_Spectra-Pool_Data.xls" ]]; then\r\n- echo "<br/><a href="Mutational_Analysis/Report_Mutation_Spectra-Pool_Data.xls">Report_Mutation_Spectra-Pool_Data.xls</a>" >> $html\r\n- fi\r\n-fi\r\n-\r\n-\r\n-## Input file for NMF\r\n-if [[ -e "$output_dir/Mutational_Analysis/Figures/Input_NMF/Input_NMF_Count.txt" ]]\r\n-then\r\n- # Interpreted by Galaxy so don\'t need the full path\r\n- echo "<br/><br/> Download the input file for the tool mutSpec-NMF" >> $html\r\n- echo "<br/><a href="Mutational_Analysis/Figures/Input_NMF/Input_NMF_Count.txt">Input_NMF_Count.txt</a><br/>" >> $html\r\n-fi\r\n-\r\n-## Computed statistics for estimating the number of signatures\r\n-if [[ $estimSign > 0 ]]; then\r\n- echo "<br/> Link to the computed statistics for estimating the number of signatures <br/>" >> $html\r\n- if [[ -e "$output_dir/Mutational_Analysis/Figures/Estimate_Number_Signatures.png" ]]; then\r\n'..b'ignaturePercent.txt</a> </center> </td>" >> $outfilePoolData\n+ echo "</tr><tr>" >> $outfilePoolData\n+\n+ echo "<td>" >> $outfilePoolData\n+ echo "<span class="legend"><img src="Figures/Stranded_Analysis/Pool_Data/Pool_Data-StrandedSignaturePercent.png width="1300""/>" >> $outfilePoolData\n+ echo "<span class="legend-hidden">" >> $outfilePoolData\n+ echo "<center><B>Panel 2. Stranded analysis of trinucleotide sequence context of SBS</center></B><br/>Proportion of SBS with their trinucleotide context considering the non-transcribed and transcribed strand<br/>" >> $outfilePoolData\n+ echo "</td>" >> $outfilePoolData\n+ echo "</tr>" >> $outfilePoolData\n+ echo "</table>" >> $outfilePoolData\n+\n+ echo "<br/><br/>" >> $outfilePoolData\n+\n+ #####################################################\n+ # Sequence logo generated with Weblogo3: Pool #\n+ #####################################################\n+ echo "<table>" >> $outfilePoolData\n+ echo "<h3>Sequence logo generated with Weblogo3</h3>" >> $outfilePoolData\n+ # C>A\n+ echo "<tr>" >> $outfilePoolData\n+ if [[ ! -e "$output_dir/Mutational_Analysis/Figures/WebLogo/Pool_Data/Pool_Data-CA-Probability.png" ]]; then\n+ echo "<td>WARNING: No sequence for C>A </br> </td>" >> $outfilePoolData\n+ else\n+ echo "<td><a href="Figures/WebLogo/Pool_Data/Pool_Data-CA.fa">Pool_Data-CA.fa</a><br/>" >> $outfilePoolData\n+ echo "<img src="Figures/WebLogo/Pool_Data/Pool_Data-CA-Probability.png"/><br/></td>" >> $outfilePoolData\n+ fi\n+ # C>G\n+ if [[ ! -e "$output_dir/Mutational_Analysis/Figures/WebLogo/Pool_Data/Pool_Data-CG-Probability.png" ]]; then\n+ echo "<td>WARNING: No sequence for C>G </br> </td>" >> $outfilePoolData\n+ else\n+ echo "<td><a href="Figures/WebLogo/Pool_Data/Pool_Data-CG.fa">Pool_Data-CG.fa</a><br/>" >> $outfilePoolData\n+ echo "<img src="Figures/WebLogo/Pool_Data/Pool_Data-CG-Probability.png"/><br/></td>" >> $outfilePoolData\n+ fi\n+ # C>T\n+ if [[ ! -e "$output_dir/Mutational_Analysis/Figures/WebLogo/Pool_Data/Pool_Data-CT-Probability.png" ]]; then\n+ echo "<td>WARNING: No sequence for C>T </br> </td>" >> $outfilePoolData\n+ else\n+ echo "<td><a href="Figures/WebLogo/Pool_Data/Pool_Data-CT.fa">Pool_Data-CT.fa</a><br/>" >> $outfilePoolData\n+ echo "<img src="Figures/WebLogo/Pool_Data/Pool_Data-CT-Probability.png"/><br/></td>" >> $outfilePoolData\n+ fi\n+ echo "</tr>" >> $outfilePoolData\n+\n+ # T>A\n+ echo "<tr>" >> $outfilePoolData\n+ if [[ ! -e "$output_dir/Mutational_Analysis/Figures/WebLogo/Pool_Data/Pool_Data-TA-Probability.png" ]]; then\n+ echo "<td>WARNING: No sequence for T>A </br> </td>" >> $outfilePoolData\n+ else\n+ echo "<td><a href="Figures/WebLogo/Pool_Data/Pool_Data-TA.fa">Pool_Data-TA.fa</a><br/>" >> $outfilePoolData\n+ echo "<img src="Figures/WebLogo/Pool_Data/Pool_Data-TA-Probability.png"/><br/></td>" >> $outfilePoolData\n+ fi\n+ # T>C\n+ if [[ ! -e "$output_dir/Mutational_Analysis/Figures/WebLogo/Pool_Data/Pool_Data-TC-Probability.png" ]]; then\n+ echo "<td>WARNING: No sequence for T>C </br> </td>" >> $outfilePoolData\n+ else\n+ echo "<td><a href="Figures/WebLogo/Pool_Data/Pool_Data-TC.fa">Pool_Data-TC.fa</a><br/>" >> $outfilePoolData\n+ echo "<img src="Figures/WebLogo/Pool_Data/Pool_Data-TC-Probability.png"/><br/></td>" >> $outfilePoolData\n+ fi\n+ # T>G\n+ if [[ ! -e "$output_dir/Mutational_Analysis/Figures/WebLogo/Pool_Data/Pool_Data-TG-Probability.png" ]]; then\n+ echo "<td>WARNING: No sequence for T>G </br> </td>" >> $outfilePoolData\n+ else\n+ echo "<td><a href="Figures/WebLogo/Pool_Data/Pool_Data-TG.fa">Pool_Data-TG.fa</a><br/>" >> $outfilePoolData\n+ echo "<img src="Figures/WebLogo/Pool_Data/Pool_Data-TG-Probability.png"/><br/></td>" >> $outfilePoolData\n+ fi\n+ echo "</tr>" >> $outfilePoolData\n+ echo "</table>" >> $outfilePoolData\n+\n+ echo "</body></html>" >> $outfilePoolData\n+\n+fi # End if --poolData\n+\n+fi # End if [ -d $output_dir/Mutational_Analysis/Figures ]\n+\n+echo "</body></html>" >> $html\n+\n+exit 0\n+\n' |
b |
diff -r 46a10309dfe2 -r eda59b985b1c rn6_listAVDB.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rn6_listAVDB.txt Mon Mar 13 08:21:19 2017 -0400 |
b |
@@ -0,0 +1,14 @@ +#This is a sample file distributed with Galaxy that is used by the +#MutSpec-Annot tools. The hg18_listAVDB.txt has this format (white space +#characters are TAB characters): +# +#<RefGenome_DatabaseName> <operation> +# +# +# +#rn6_refGene.txt g +#rn6_genomicSuperDups.txt r +#rn6_snp138.txt f +rn6_refGene.txt g +rn6_ensGene.txt g +rn6_snp146.txt f \ No newline at end of file |
b |
diff -r 46a10309dfe2 -r eda59b985b1c tool-data/annovar_index.loc.sample --- a/tool-data/annovar_index.loc.sample Tue Jun 28 02:59:32 2016 -0400 +++ b/tool-data/annovar_index.loc.sample Mon Mar 13 08:21:19 2017 -0400 |
b |
@@ -1,7 +1,7 @@ -# -# Database name (value), dbkey, type, and path. -# -# -#hg19 hg19 filter /home/galaxy/annovar/hg19db/ - - +# +# Database name (value), dbkey, type, and path. +# +# +#hg19 hg19 filter /home/galaxy/annovar/hg19db/ + + |
b |
diff -r 46a10309dfe2 -r eda59b985b1c tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Tue Jun 28 02:59:32 2016 -0400 +++ b/tool_data_table_conf.xml.sample Mon Mar 13 08:21:19 2017 -0400 |
b |
@@ -1,7 +1,7 @@ -<!-- ANNOVAR files --> -<tables> -<table name="annovar_index" comment_char="#"> -<columns>value, dbkey, type, path</columns> -<file path="tool-data/annovar_index.loc" /> -</table> -</tables> +<!-- ANNOVAR files --> +<tables> +<table name="annovar_index" comment_char="#"> +<columns>value, dbkey, type, path</columns> +<file path="tool-data/annovar_index.loc" /> +</table> +</tables> |
b |
diff -r 46a10309dfe2 -r eda59b985b1c tool_dependencies.xml --- a/tool_dependencies.xml Tue Jun 28 02:59:32 2016 -0400 +++ b/tool_dependencies.xml Mon Mar 13 08:21:19 2017 -0400 |
b |
@@ -12,17 +12,13 @@ <package name="perl" prior_installation_required="True" version="5.18.1" /> </repository> - <repository changeset_revision="4d2fd1413b56" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu"> - <package name="R" version="3.0.1" /> - </repository> - <!-- allow downloading and installing an Perl package from cpan.org--> <package>http://search.cpan.org/CPAN/authors/id/T/TO/TODDR/IPC-Run-0.94.tar.gz</package> <package>http://search.cpan.org/CPAN/authors/id/A/AB/ABIGAIL/Regexp-Common-2013031301.tar.gz</package> - <package>http://search.cpan.org/CPAN/authors/id/F/FA/FANGLY/Statistics-R-0.33.tar.gz</package> <package>http://search.cpan.org/CPAN/authors/id/J/JM/JMCNAMARA/OLE-Storage_Lite-0.19.tar.gz</package> <package>http://search.cpan.org/CPAN/authors/id/J/JM/JMCNAMARA/Spreadsheet-WriteExcel-2.40.tar.gz</package> <package>http://search.cpan.org/CPAN/authors/id/D/DL/DLUX/Parallel-ForkManager-0.7.5.tar.gz</package> + <package>http://search.cpan.org/CPAN/authors/id/G/GR/GROMMEL/Math-Round-0.06.tar.gz</package> </action> <action type="set_environment"> <environment_variable action="prepend_to" name="PERL5LIB">$INSTALL_DIR/lib/perl5</environment_variable> @@ -34,21 +30,42 @@ <package name="perl" version="5.18.1"> <repository changeset_revision="35f117d7396b" name="package_perl_5_18" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> - - <package name="weblogo" version="3.3"> - <repository changeset_revision="648e4b32f15c" name="package_weblogo_3_3" owner="devteam" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> + + <!-- package_weblogo_3_3 has broken link in main Galaxy toolshed (February 2017) --> + <package name="weblogo" version="3.5"> + <install version="1.0"> + <actions> + <action type="download_by_url">https://github.com/WebLogo/weblogo/archive/master.zip</action> + <!-- Use screen instead of printer to work around a color bug in ghostscript. --> + <action type="shell_command">sed -i.bak 's#"-dPDFSETTINGS=/printer",#"-dPDFSETTINGS=/screen",#' weblogolib/__init__.py</action> + <action type="move_directory_files"> + <source_directory>.</source_directory> + <destination_directory>$INSTALL_DIR</destination_directory> + </action> + <action type="set_environment"> + <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR</environment_variable> + <environment_variable action="prepend_to" name="PYTHONPATH">$INSTALL_DIR</environment_variable> + </action> + </actions> + </install> + <readme /> </package> - + <package name="numpy" version="1.9"> <repository changeset_revision="f24fc0b630fc" name="package_python_2_7_numpy_1_9" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> - <package name="R" version="3.1.2"> - <repository changeset_revision="4d2fd1413b56" name="package_r_3_1_2" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> + <package name="R" version="3.2.1"> + <repository changeset_revision="d9f7d84125b7" name="package_r_3_2_1" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> - + + <!-- package_R_3_2_1 doesn't load fontconfig, load it manually --> + <package name="fontconfig" version="2.11.1"> + <repository changeset_revision="d88d844df0cb" name="package_fontconfig_2_11_1" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="mutspec" version="0.1"> - <repository changeset_revision="63cc1719e1aa" name="package_r_mutspec_0_1" owner="iarc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="47d4bc32a67c" name="package_r_mutspec_0_1" owner="iarc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency> |