Next changeset 1:796a42e10f77 (2018-06-17) |
Commit message:
planemo upload |
added:
protein_rna_correlation.r protein_rna_correlation.xml test_data/GE_mouse_singlesample.txt test_data/PE_abundance_GE_abundance_pearson.html test_data/PE_mouse_singlesample.txt test_data/mmusculus_gene_ensembl__GRCm38.p6.map |
b |
diff -r 000000000000 -r fc89f8c3b777 protein_rna_correlation.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protein_rna_correlation.r Sun Jun 17 04:20:06 2018 -0400 |
[ |
b'@@ -0,0 +1,1025 @@\n+#==================================================================================\n+# About the script\n+#==================================================================================\n+\t# Version: V1\n+\t# This script works for single sample only \n+\t# It takes GE (Gene Expression) and PE (Protein expression) data of one sample and perform correlation, regression analysis between PE and GE data\n+\t# Input data can be of tsv format\n+\t# Script also need a parameter or option file\n+\n+#==================================================================================\n+# Dependencies\n+#==================================================================================\n+\t# Following R package has to be installed.\n+\t\t# data.table\n+\t\t# gplots\n+\t\t# MASS\n+\t\t# DMwR\n+\t\t# mgcv\n+\t# It can be installed by following R command in R session. e.g. install.packages("data.table")\n+\n+#==================================================================================\n+# How to Run\n+#==================================================================================\n+\t# Rscript PE_GE_association_singleSample_V1.r <PE_file> <GE_file> <Option_file containing tool parameters> <Ensembl map file containing directory path> <outdir>\n+\n+#==================================================================================\n+# Arguments\n+#==================================================================================\n+\t# Arg1. <PE file>: PE data (tsv format)\n+\t# Arg2. <GE file>: GE data (tsv format)\n+\t# Arg3. <Option file>: tsv format, key\\tvalue\n+\t#\t Options are\n+\t# \t\tPE_idcolno: Column number of PE file containing protein IDs\t\n+\t# \t\tGE_idcolno: Column number of GE file containing transcript IDs\n+\t# \t\tPE_expcolno: Column number of PE file containing protein expression values\n+\t# \t\tGE_expcolno: Column number of GE file containing transcript expression values\n+\t#\t\tPE_idtype: protein id type. It can be either Uniprot or Ensembl or HGNC_symbol\n+\t# \t\tGE_idtype: transcript id type. At present it is only one type i.e. Ensembl or HGNC_symbol\n+\t# \t\tOrganism: Organism\n+\t#\t\twriteMapUnmap: Whether to write mapped and unmapped data in input data format. It takes value as 1 or 0. If 1, mapped and unmapped data is written. Default is 1.\n+\t#\t\tdoscale: Whether perform scaling to input data or not. If yet, abundance values are normalized by standard normalization. Default 1\n+\t# Arg4. <Ensembl map file containg directory>: Path to Ensembl map file containg directory e.g. /home/user/Ensembl/mapfiles\n+\t# Arg5. <Outdir>: output directory (e.g. /home/user/out1)\n+\n+#==================================================================================\n+# Sample option file\n+#==================================================================================\n+\t#PE_idcolno\t7\n+\t#GE_idcolno\t1\n+\t#PE_expcolno\t2\n+\t#GE_expcolno\t3\n+\t#PE_idtype\tEnsembl\n+\t#GE_idtype\tEnsembl\n+\t#Organism\tmmusculus\n+\t#writeMapUnmap\t1\n+\t#doscale\t1\n+\n+#==================================================================================\n+# Output\n+#==================================================================================\n+\t# The script outputs image and data folder along with Correlation_result.html and Result.log file\n+\t# Result.log: Log file\n+\t# Correlation_result.html; main result file in html format\n+\t\n+\t# data folder contains following output files\n+\n+\t# PE_abundance.tsv: 2 column tsv file containing mapped id and protein expression values\n+\t# GE_abundance.tsv: 2 column tsv file containing mapped id and transcript expression values\n+\t\n+\t# If writeMapUnmap is 1 i.e. to write mapped and unmapped data, 4 additional file will be written\n+\t\t# PE_unmapped.tsv: Output format is same as input, PE unmapped data is written\n+\t\t# GE_unmapped.tsv: Output format is same as input, GE unmapped data is written\n+\t\t# PE_mapped.tsv: Output format is same as input, PE mapped data is written\n+\t\t# GE_mapped.tsv: Output format is same as input, GE mapped data is written\n+\t\n+\t# PE_GE_influential_observation.ts'..b'ized additive models");\n+\tpoints(PE_GE_data[,"GE_abundance"], regmodel_gam_predictedy, col="red");\n+\tpdf(outplot);\t\n+\tplot(regmodel_gam,pages=1,residuals=TRUE); ## show partial residuals\n+\tplot(regmodel_gam,pages=1,seWithMean=TRUE) ## `with intercept\' CIs\n+\tdev.off();\n+\tdev.off(devnum);\n+\n+\tcat(\n+\t"<font color=\'blue\'><h3>Other regression model fitting</h3></font>\\n",\n+\tfile = htmloutfile, append = TRUE);\n+\t\n+\tcat(\n+\t"<ul>\n+\t<li>MAE:mean absolute error</li>\n+\t<li>MSE: mean squared error</li>\n+\t<li>RMSE:root mean squared error ( sqrt(MSE) )</li>\n+\t<li>MAPE:mean absolute percentage error</li>\n+\t</ul>\n+\t",\n+\tfile = htmloutfile, append = TRUE);\n+\t\n+\tcat(\n+\t\'<h4><a href="PE_GE_modelfit.pdf" target="_blank">Comparison of model fits</a></h4>\',\n+\tfile = htmloutfile, append = TRUE);\n+\t\n+\tcat(\n+\t\'<table class="embedded-table" border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#c3f0d6"><th>Model</th><th>MAE</th><th>MSE</th><th>RMSE</th><th>MAPE</th><th>Diagnostics Plot</th></tr>\',\n+\tfile = htmloutfile, append = TRUE);\n+\t\n+\tcat(\n+\t"<tr><td>Linear regression with all data</td><td>",regmodel_metrics[1],"</td><td>",regmodel_metrics[2],"</td><td>",regmodel_metrics[3],"</td><td>",regmodel_metrics[4],"</td><td>",\'<a href="PE_GE_lm.pdf" target="_blank">Link</a>\',"</td></tr>",\n+\t\n+\t"<tr><td>Linear regression with removal of outliers</td><td>",regmodel_no_outlier_metrics[1],"</td><td>",regmodel_no_outlier_metrics[2],"</td><td>",regmodel_no_outlier_metrics[3],"</td><td>",regmodel_no_outlier_metrics[4],"</td><td>",\'<a href="PE_GE_lm_without_outliers.pdf" target="_blank">Link</a>\',"</td></tr>",\n+\t\n+\t"<tr><td>Resistant regression (lqs / least trimmed squares method)</td><td>",regmodel_lqs_metrics[1],"</td><td>",regmodel_lqs_metrics[2],"</td><td>",regmodel_lqs_metrics[3],"</td><td>",regmodel_lqs_metrics[4],"</td><td>", \'<a href="PE_GE_lqs.pdf" target="_blank">Link</a>\',"</td></tr>",\n+\t\n+\t"<tr><td>Robust regression (rlm / Huber M-estimator method)</td><td>",regmodel_rlm_metrics[1],"</td><td>",regmodel_rlm_metrics[2],"</td><td>",regmodel_rlm_metrics[3],"</td><td>",regmodel_rlm_metrics[4],"</td><td>",\'<a href="PE_GE_rlm.pdf" target="_blank">Link</a>\',"</td></tr>",\n+\t\n+\t\n+\t"<tr><td>Polynomial regression with degree 2</td><td>",regmodel_poly2_metrics[1],"</td><td>",regmodel_poly2_metrics[2],"</td><td>",regmodel_poly2_metrics[3],"</td><td>",regmodel_poly2_metrics[4],"</td><td>",\'<a href="PE_GE_poly2.pdf" target="_blank">Link</a>\',"</td></tr>",\n+\t\n+\t"<tr><td>Polynomial regression with degree 3</td><td>",regmodel_poly3_metrics[1],"</td><td>",regmodel_poly3_metrics[2],"</td><td>",regmodel_poly3_metrics[3],"</td><td>",regmodel_poly3_metrics[4],"</td><td>",\'<a href="PE_GE_poly3.pdf" target="_blank">Link</a>\',"</td></tr>",\n+\t\n+\t"<tr><td>Polynomial regression with degree 4</td><td>",regmodel_poly4_metrics[1],"</td><td>",regmodel_poly4_metrics[2],"</td><td>",regmodel_poly4_metrics[3],"</td><td>",regmodel_poly4_metrics[4],"</td><td>",\'<a href="PE_GE_poly4.pdf" target="_blank">Link</a>\',"</td></tr>",\n+\t\n+\t"<tr><td>Polynomial regression with degree 5</td><td>",regmodel_poly5_metrics[1],"</td><td>",regmodel_poly5_metrics[2],"</td><td>",regmodel_poly5_metrics[3],"</td><td>",regmodel_poly5_metrics[4],"</td><td>",\'<a href="PE_GE_poly5.pdf" target="_blank">Link</a>\',"</td></tr>",\n+\t\n+\t"<tr><td>Polynomial regression with degree 6</td><td>",regmodel_poly6_metrics[1],"</td><td>",regmodel_poly6_metrics[2],"</td><td>",regmodel_poly6_metrics[3],"</td><td>",regmodel_poly6_metrics[4],"</td><td>",\'<a href="PE_GE_poly6.pdf" target="_blank">Link</a>\',"</td></tr>",\n+\t\n+\t"<tr><td>Generalized additive models</td><td>",regmodel_gam_metrics[1],"</td><td>",regmodel_gam_metrics[2],"</td><td>",regmodel_gam_metrics[3],"</td><td>",regmodel_gam_metrics[4],"</td><td>",\'<a href="PE_GE_gam.pdf" target="_blank">Link</a>\',"</td></tr>",\n+\t\n+\t"</table>",\t\n+\tfile = htmloutfile, append = TRUE);\n+\t\n+\t\n+\t# Warning On\n+\toptions(warn = oldw)\n+\t\n+\t\n+\t\n\\ No newline at end of file\n' |
b |
diff -r 000000000000 -r fc89f8c3b777 protein_rna_correlation.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protein_rna_correlation.xml Sun Jun 17 04:20:06 2018 -0400 |
[ |
@@ -0,0 +1,96 @@ +<tool id="protein_rna_correlation" name="protein_rna_correlation" version="0.1.0"> + <description>Correlation between protein and rna expression (Single Sample)</description> + <requirements> + <requirement type="package" version="3.3.1">r-base</requirement> + <requirement type="package" version="1.18.0">bioconductor-rgalaxy</requirement> + <requirement type="package" version="1.21.0">bioconductor-biocinstaller</requirement> + <requirement type="package" version="1.9">rmarkdown</requirement> + <requirement type="package" version="1.9">MASS</requirement> + <requirement type="package" version="1.8-23">mgcv</requirement> + <requirement type="package" version="0.4.1">DMwR</requirement> + <requirement type="package" version="1.11.4">data.table</requirement> + <requirement type="package" version="3.0.1">gplots</requirement> + </requirements> + <command detect_errors="exit_code" interpreter="Rscript"><![CDATA[protein_rna_correlation.r $pe_exp $ge_exp $pe_idcol $ge_idcol $pe_expcol $ge_expcol $pe_idtype $ge_idtype $organism_map $writeMapUnmap $doScale "$html_file" "$html_file.files_path"]]></command> + + <inputs> + <param name="pe_exp" type="data" format="tabular"> + <label>Input Protein Expression File</label> + </param> + <param name="pe_idcol" type="integer"> + <label>Column: Protein/Gene ID</label> + </param> + <param name="pe_expcol" type="integer"> + <label>Column: Protein Expression Values</label> + </param> + + <param name="ge_exp" type="data" format="tabular"> + <label>Input RNA Expression File</label> + </param> + + <param name="ge_idcol" type="integer"> + <label>Column: RNA/Gene ID</label> + </param> + <param name="ge_expcol" type="integer"> + <label>Column: RNA Expression Values</label> + </param> + + <param name="pe_idtype" type="select"> + <option value='ensembl' selected>Ensembl</option> + <option value='uniprot'>Uniprot</option> + <option value='hgnc'>HGNC</option> + </param> + + <param name="ge_idtype" type="select"> + <option value='ensembl' selected>Ensembl</option> + <option value='uniprot'>Uniprot</option> + <option value='hgnc'>HGNC</option> + </param> + + <param name="organism_map" type="data" format="tabular"> + <label>Biomart ID Mapping file (.map)</label> + </param> + + <!--<param name="method" type="select" label="Correlation Method"> + <option value="pearson" selected="true">Pearson</option> + <option value="spearman">Spearman</option> + <option value="kendall">Kendall</option> + </param>--> + + <param name="writeMapUnmap" type="boolean"> + <label>Create the list of Mapped and Unmapped Identifiers in HTML</label> + </param> + + <param name="doScale" type="boolean"> + <label>Scale the abundance values</label> + </param> + + </inputs> + + <outputs> + <data format="html" name="html_file" label="protein_rna_corr_${tool_name}.html"/> + </outputs> + + <tests> + <test> + <param name="pe_exp" value="PE_mouse_singlesample.txt"/> + <param name="pe_idcol" value="7"/> + <param name="ge_exp" value="GE_mouse_singlesample.txt"/> + <param name="ge_idcol" value="1"/> + <param name="method" value="pearson"/> + <param name="pe_expcol" value="13"/> + <param name="ge_expcol" value="10"/> + <param name="pe_idtype" value="Ensembl_with_version"/> + <param name="ge_idtype" value="Ensembl_with_version"/> + <param name="organism_map" value="mmusculus_gene_ensembl__GRCm38.p6.map"/> + <param name="writeMapUnmap" value="1"/> + <param name="doScale" value="1"/> + <output name="html_file" file="PE_abundance_GE_abundance_pearson.html"/> + </test> + </tests> + + <help> +Proteome Transcriptome Correlation +Developer: Priyabrata Panigrahi + </help> +</tool> |
b |
diff -r 000000000000 -r fc89f8c3b777 test_data/GE_mouse_singlesample.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/GE_mouse_singlesample.txt Sun Jun 17 04:20:06 2018 -0400 |
b |
b'@@ -0,0 +1,191651 @@\n+test_id\tgene_id\tgene\tlocus\tsample_1\tsample_2\tstatus\tvalue_1\tvalue_2\tlog2(fold_change)\ttest_stat\tp_value\tq_value\tsignificant\n+ENSMUST00000000001\tMSTRG.47743\tGnai3\t3:108107279-108146146\tGrp1\tGrp2\tOK\t40.0527\t38.1887\t-0.0687541\t-0.0482803\t0.9623\t0.986143\tno\n+ENSMUST00000000003\tENSMUSG00000000003\tPbsn\tX:77837900-77853623\tGrp1\tGrp2\tNOTEST\t0\t0\t0\t0\t1\t1\tno\n+ENSMUST00000000010\tENSMUSG00000020875\tHoxb9\t11:96271456-96276595\tGrp1\tGrp2\tNOTEST\t0\t6.47449e-06\tinf\t0\t1\t1\tno\n+ENSMUST00000000028\tMSTRG.27984\tCdc45\t16:18780446-18835306\tGrp1\tGrp2\tOK\t3.67585\t13.8308\t1.91174\t0.603655\t0.4881\t0.985955\tno\n+ENSMUST00000000033\tENSMUSG00000048583\tIgf2\t7:142650765-142670356\tGrp1\tGrp2\tOK\t1.15289\t0.0663305\t-4.11944\t-1.95411\t0.1407\t0.866662\tno\n+ENSMUST00000000049\tMSTRG.13684\tApoh\t11:107933386-108414396\tGrp1\tGrp2\tNOTEST\t0\t0\t0\t0\t1\t1\tno\n+ENSMUST00000000058\tENSMUSG00000000058\tCav2\t6:17197750-17385604\tGrp1\tGrp2\tNOTEST\t0.0272761\t0.223174\t3.03246\t0\t1\t1\tno\n+ENSMUST00000000080\tMSTRG.18095\tKlf6\t13:5861481-5870394\tGrp1\tGrp2\tOK\t8.10148\t11.8782\t0.552065\t0.48068\t0.617\t0.985955\tno\n+ENSMUST00000000087\tMSTRG.51759\tScmh1\t4:120405280-120530186\tGrp1\tGrp2\tOK\t0.635323\t0\t-inf\t-nan\t0.13515\t0.86537\tno\n+ENSMUST00000000090\tMSTRG.71988\tCox5a\t9:57521273-57543184\tGrp1\tGrp2\tOK\t14.3539\t17.9177\t0.319945\t0.11626\t0.63655\t0.985955\tno\n+ENSMUST00000000094\tMSTRG.43839\tDlgap4\t2:156613704-156764363\tGrp1\tGrp2\tNOTEST\t0.000265083\t0\t-inf\t0\t1\t1\tno\n+ENSMUST00000000095\tENSMUSG00000000093\tTbx2\t11:85832550-85841948\tGrp1\tGrp2\tNOTEST\t0.166062\t0.0582297\t-1.5119\t0\t1\t1\tno\n+ENSMUST00000000096\tENSMUSG00000000094\tTbx4\t11:85886421-85916097\tGrp1\tGrp2\tNOTEST\t0.000308944\t0.000301493\t-0.0352199\t0\t1\t1\tno\n+ENSMUST00000000109\tENSMUSG00000055022\tCntn1\t15:92051164-92341967\tGrp1\tGrp2\tNOTEST\t0\t0.0215102\tinf\t0\t1\t1\tno\n+ENSMUST00000000122\tENSMUSG00000000120\tNgfr\t11:95568817-95587735\tGrp1\tGrp2\tNOTEST\t0.111538\t0\t-inf\t0\t1\t1\tno\n+ENSMUST00000000127\tENSMUSG00000000125\tWnt3\t11:103774149-103817957\tGrp1\tGrp2\tNOTEST\t0.00899237\t0.0693697\t2.94753\t0\t1\t1\tno\n+ENSMUST00000000128\tENSMUSG00000000126\tWnt9a\t11:59306927-59333552\tGrp1\tGrp2\tOK\t0.301105\t0\t-inf\t-nan\t0.1185\t0.864173\tno\n+ENSMUST00000000129\tENSMUSG00000000127\tFer\t17:63896017-64139494\tGrp1\tGrp2\tOK\t0.242907\t0.568402\t1.22651\t0.580561\t0.4202\t0.985955\tno\n+ENSMUST00000000137\tMSTRG.9587\tActr2\t11:20062303-20112913\tGrp1\tGrp2\tOK\t84.9559\t62.2068\t-0.449641\t-0.313631\t0.762\t0.985955\tno\n+ENSMUST00000000145\tENSMUSG00000020333\tAcsl6\t11:54303797-54364756\tGrp1\tGrp2\tNOTEST\t0\t0\t0\t0\t1\t1\tno\n+ENSMUST00000000153\tMSTRG.58097\tGna12\t5:140724126-140830431\tGrp1\tGrp2\tOK\t11.4348\t2.7398\t-2.06129\t-1.28412\t0.22425\t0.985955\tno\n+ENSMUST00000000161\tENSMUSG00000000157\tItgb2l\t16:96361667-96525793\tGrp1\tGrp2\tNOTEST\t0.00420308\t0\t-inf\t0\t1\t1\tno\n+ENSMUST00000000163\tENSMUSG00000000159\tIgsf5\t16:96361667-96525793\tGrp1\tGrp2\tNOTEST\t0\t0\t0\t0\t1\t1\tno\n+ENSMUST00000000171\tMSTRG.71697\tPih1d2\t9:50603909-50625000\tGrp1\tGrp2\tNOTEST\t0\t0\t0\t0\t1\t1\tno\n+ENSMUST00000000175\tMSTRG.71694\tSdhd\t9:50596356-50603812\tGrp1\tGrp2\tOK\t82.4595\t65.8385\t-0.324754\t-0.292372\t0.7828\t0.985955\tno\n+ENSMUST00000000186\tENSMUSG00000000182\tFgf23\t6:127072901-127081408\tGrp1\tGrp2\tNOTEST\t0\t0.288512\tinf\t0\t1\t1\tno\n+ENSMUST00000000187\tENSMUSG00000000183\tFgf6\t6:127015585-127028187\tGrp1\tGrp2\tNOTEST\t0\t0.0223783\tinf\t0\t1\t1\tno\n+ENSMUST00000000188\tMSTRG.62131\tCcnd2\t6:127122965-127212411\tGrp1\tGrp2\tOK\t23.9548\t18.3554\t-0.384111\t-0.329916\t0.74875\t0.985955\tno\n+ENSMUST00000000193\tENSMUSG00000035385\tCcl2\t11:82028223-82038562\tGrp1\tGrp2\tNOTEST\t0.168996\t0\t-inf\t0\t1\t1\tno\n+ENSMUST00000000194\tENSMUSG00000035352\tCcl12\t11:82101844-82103400\tGrp1\tGrp2\tNOTEST\t0\t0\t0\t0\t1\t1\tno\n+ENSMUST00000000199\tMSTRG.39378\tNcs1\t2:31245822-31295989\tGrp1\tGrp2\tOK\t1.76532\t0.552493\t-1.6759\t-1.01327\t0.2932\t0.985955\tno\n+ENSMUST00000000201\tMSTRG.24766\tNalcn\t14:123276633-123627144\tGrp1\tGrp2\tNOTEST\t0\t0\t0\t0\t1\t1\tno\n+ENSMUST00000000206\tENSMUSG00000000202\tBtbd17\t11:114791216-114795945\tGrp1\tGrp2\tNOTEST\t0\t0\t0\t0\t1\t1\tno\n+ENSMUST00000000208\tENSMUSG00000000204\tSlfn4\t11:83175185-83190216\tGrp1\tGrp2\tNOTEST\t0\t0\t0\t0\t1\t1\tno\n+ENSMUST000000002'..b'\t-nan\t0.13405\t0.86537\tno\n+MSTRG.9970.1\tMSTRG.9970\t-\t11:30885357-31102704\tGrp1\tGrp2\tOK\t6.19575\t1.64816\t-1.91042\t-0.0911642\t0.4795\t0.985955\tno\n+MSTRG.9971.1\tMSTRG.9971\t-\t11:30885357-31102704\tGrp1\tGrp2\tOK\t2.55443\t10.3999\t2.0255\t0.0911279\t0.45045\t0.985955\tno\n+MSTRG.9972.1\tMSTRG.9972\t-\t11:30885357-31102704\tGrp1\tGrp2\tOK\t3.18003\t4.36117\t0.455677\t0.0380224\t0.8508\t0.985955\tno\n+MSTRG.9973.1\tMSTRG.9973\t-\t11:30885357-31102704\tGrp1\tGrp2\tOK\t0.714891\t20.2176\t4.82175\t0.0988331\t0.32585\t0.985955\tno\n+MSTRG.9974.1\tMSTRG.9974\t-\t11:30885357-31102704\tGrp1\tGrp2\tOK\t2.38802\t2.42592\t0.02272\t0.00288644\t0.98405\t0.989359\tno\n+MSTRG.9975.1\tMSTRG.9975\t-\t11:30885357-31102704\tGrp1\tGrp2\tOK\t1.71757\t7.57117\t2.14015\t0.104932\t0.4328\t0.985955\tno\n+MSTRG.9976.1\tMSTRG.9976\t-\t11:30885357-31102704\tGrp1\tGrp2\tOK\t4.13424\t2.59593\t-0.671374\t-0.053745\t0.7914\t0.985955\tno\n+MSTRG.9977.1\tMSTRG.9977\t-\t11:30885357-31102704\tGrp1\tGrp2\tOK\t2.86051\t5.97639\t1.063\t0.0866481\t0.66895\t0.985955\tno\n+MSTRG.998.1\tMSTRG.998\t-\t1:44551510-44796838\tGrp1\tGrp2\tOK\t0\t3.39939\tinf\t-nan\t0.14295\t0.866668\tno\n+MSTRG.9980.1\tMSTRG.9980\t-\t11:34787784-34788343\tGrp1\tGrp2\tOK\t4.18965\t0.92874\t-2.17348\t-1.08065\t0.16065\t0.881798\tno\n+MSTRG.9981.1\tMSTRG.9981\t-\t11:34794972-34795555\tGrp1\tGrp2\tOK\t3.18169\t0.783335\t-2.02209\t-0.986648\t0.1871\t0.927387\tno\n+MSTRG.9982.1\tMSTRG.9982\t-\t11:34797053-34797427\tGrp1\tGrp2\tOK\t7.01613\t4.03054\t-0.799701\t-0.409199\t0.6435\t0.985955\tno\n+MSTRG.9983.1\tMSTRG.9983\t-\t11:34800093-34800342\tGrp1\tGrp2\tOK\t8.51451\t0.547361\t-3.95936\t-1.49321\t0.26925\t0.985955\tno\n+MSTRG.9984.1\tMSTRG.9984\t-\t11:34834643-34834893\tGrp1\tGrp2\tOK\t0\t12.9798\tinf\t-nan\t0.00385\t0.217368\tno\n+MSTRG.9985.1\tMSTRG.9985\t-\t11:34835419-34835632\tGrp1\tGrp2\tOK\t0.693285\t18.7187\t4.75489\t1.74918\t0.26605\t0.985955\tno\n+MSTRG.9986.1\tMSTRG.9986\t-\t11:35769027-35769249\tGrp1\tGrp2\tOK\t0\t14.8332\tinf\t-nan\t0.0042\t0.217368\tno\n+MSTRG.9987.3\tMSTRG.9987\t-\t11:34809189-34833641\tGrp1\tGrp2\tOK\t1.25602\t2.72447\t1.11711\t0.1319\t0.68495\t0.985955\tno\n+MSTRG.9988.1\tMSTRG.9988\t-\t11:34809189-34833641\tGrp1\tGrp2\tOK\t0.692226\t6.4918\t3.2293\t0.494203\t0.38545\t0.985955\tno\n+MSTRG.9989.1\tMSTRG.9989\t-\t11:34809189-34833641\tGrp1\tGrp2\tOK\t10.1922\t7.56016\t-0.430979\t-0.0413738\t0.8606\t0.985955\tno\n+MSTRG.9990.1\tMSTRG.9990\t-\t11:34809189-34833641\tGrp1\tGrp2\tOK\t0.274983\t4.9497\t4.16993\t0.233166\t0.33295\t0.985955\tno\n+MSTRG.9991.1\tMSTRG.9991\t-\t11:34809189-34833641\tGrp1\tGrp2\tOK\t1.10444\t4.91537\t2.15398\t0.442262\t0.5498\t0.985955\tno\n+MSTRG.9992.1\tMSTRG.9992\t-\t11:34809189-34833641\tGrp1\tGrp2\tOK\t0\t20.388\tinf\t-nan\t0.0844\t0.844716\tno\n+MSTRG.9993.1\tMSTRG.9993\t-\t11:34809189-34833641\tGrp1\tGrp2\tOK\t1.26821\t13.8598\t3.45003\t0.184368\t0.36755\t0.985955\tno\n+MSTRG.9994.1\tMSTRG.9994\t-\t11:34809189-34833641\tGrp1\tGrp2\tOK\t0.234847\t5.38012\t4.51785\t0.601977\t0.35495\t0.985955\tno\n+MSTRG.9995.1\tMSTRG.9995\t-\t11:34809189-34833641\tGrp1\tGrp2\tOK\t1.98826\t16.1901\t3.02554\t0.190949\t0.3608\t0.985955\tno\n+MSTRG.9996.10\tMSTRG.9996\t-\t11:23306894-23499661\tGrp1\tGrp2\tOK\t2.81672\t2.06278\t-0.449425\t-0.0309266\t0.85\t0.985955\tno\n+MSTRG.9996.14\tMSTRG.9996\t-\t11:23306894-23499661\tGrp1\tGrp2\tOK\t2.51718\t2.36244\t-0.0915281\t-0.0207139\t0.92695\t0.985955\tno\n+MSTRG.9996.3\tMSTRG.9996\t-\t11:23306894-23499661\tGrp1\tGrp2\tOK\t9.93914\t5.72822\t-0.795034\t-0.0363089\t0.73525\t0.985955\tno\n+MSTRG.9996.4\tMSTRG.9996\t-\t11:23306894-23499661\tGrp1\tGrp2\tOK\t2.70843\t2.18448\t-0.310169\t-0.0141147\t0.89915\t0.985955\tno\n+MSTRG.9996.5\tMSTRG.9996\t-\t11:23306894-23499661\tGrp1\tGrp2\tOK\t2.41244\t4.73318\t0.972316\t0.0341829\t0.69865\t0.985955\tno\n+MSTRG.9996.6\tMSTRG.9996\t-\t11:23306894-23499661\tGrp1\tGrp2\tOK\t7.44431\t4.04894\t-0.878593\t-0.0818482\t0.72945\t0.985955\tno\n+MSTRG.9996.9\tMSTRG.9996\t-\t11:23306894-23499661\tGrp1\tGrp2\tOK\t3.14913\t1.31346\t-1.26158\t-0.0732764\t0.6069\t0.985955\tno\n+MSTRG.9997.1\tMSTRG.9997\t-\t11:23306894-23499661\tGrp1\tGrp2\tOK\t1.95521\t4.11356\t1.07306\t0.0440485\t0.6565\t0.985955\tno\n+MSTRG.9998.1\tMSTRG.9998\t-\t11:23306894-23499661\tGrp1\tGrp2\tOK\t4.66021\t4.34398\t-0.101377\t-0.00487762\t0.96305\t0.986149\tno\n+MSTRG.9999.1\tMSTRG.9999\t-\t11:23306894-23499661\tGrp1\tGrp2\tOK\t2.61942\t1.87673\t-0.481028\t-0.0393643\t0.84695\t0.985955\tno\n' |
b |
diff -r 000000000000 -r fc89f8c3b777 test_data/PE_abundance_GE_abundance_pearson.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/PE_abundance_GE_abundance_pearson.html Sun Jun 17 04:20:06 2018 -0400 |
b |
b'@@ -0,0 +1,56 @@\n+<html><body>\n+<h1>Association between proteomics and transcriptomics data</h1>\n+ <font color=\'blue\'><h3>Input data summary</h3></font> <ul> <li>Abbrebiations used: PE (Proteomics) and GE (Transcriptomics) </li> <li>Input PE data dimension (Row Column): 3597 58 </li> <li>Input GE data dimension (Row Column): 191650 14 </li> <li>Protein ID fetched from column: 7 </li> <li>Transcript ID fetched from column: 1 </li> <li>Protein ID type: ensembl_peptide_id_version </li> <li>Transcript ID type: ensembl_transcript_id_version </li> <li>Protein expression data fetched from column: 13 </li> <li>Transcript expression data fetched from column: 10 </li><li>Total Protein ID mapped: 3582 </li> <li>Total Protein ID unmapped: 15 </li> <li>Total Transcript ID mapped: 3582 </li> <li>Total Transcript ID unmapped: 188068 </li></ul><font color=\'blue\'><h3>Download mapped unmapped data</h3></font> <ul><li>Protein mapped data: <a href=" output_fold/PE_mapped.tsv " target="_blank"> Link</a> </li> <li>Protein unmapped data: <a href=" output_fold/PE_unmapped.tsv " target="_blank"> Link</a> </li> <li>Transcript mapped data: <a href=" output_fold/GE_mapped.tsv " target="_blank"> Link</a> </li> <li>Transcript unmapped data: <a href=" output_fold/GE_unmapped.tsv " target="_blank"> Link</a> </li><li>Protein abundance data: <a href=" output_fold/PE_abundance.tsv " target="_blank"> Link</a> </li> <li>Transcript abundance data: <a href=" output_fold/GE_abundance.tsv " target="_blank"> Link</a> </li></ul><ul> <li>Number of entries in Transcriptome data used for correlation: 3582 </li> <li>Number of entries in Proteome data used for correlation: 3582 </li></ul><font color=\'blue\'><h3>Filtering</h3></font> Checking for NA or Inf or -Inf in either Transcriptome or Proteome data, if found, remove those entry<br> <ul> <li>Number of NA found: 88 </li> <li>Number of Inf or -Inf found: 559 </li></ul><ul><li>Protein excluded data with NA or Inf or -Inf: <a href=" output_fold/PE_excluded_NA_Inf.tsv " target="_blank"> Link</a> </li> <li>Transcript excluded data with NA or Inf or -Inf: <a href=" output_fold/GE_excluded_NA_Inf.tsv " target="_blank"> Link</a> </li></ul><font color=\'blue\'><h3>Filtered data summary</h3></font> Excluding entires with abundance values: NA/Inf/-Inf<br> <ul> <li>Number of entries in Transcriptome data remained: 2949 </li> <li>Number of entries in Proteome data remained: 2949 </li></ul><font color=\'blue\'><h3>Proteome data summary</h3></font>\n+ <table class="embedded-table" border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#c3f0d6"><th>Parameter</th><th>Value</th></tr><tr><td> </td><td> Min. :-2.98277 </td></tr>\n+<tr><td> </td><td> 1st Qu.:-0.40393 </td></tr>\n+<tr><td> </td><td> Median :-0.07986 </td></tr>\n+<tr><td> </td><td> Mean : 0.00000 </td></tr>\n+<tr><td> </td><td> 3rd Qu.: 0.26061 </td></tr>\n+<tr><td> </td><td> Max. :15.13211 </td></tr>\n+</table>\n+<font color=\'blue\'><h3>Transcriptome data summary</h3></font>\n+ <table class="embedded-table" border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#c3f0d6"><th>Parameter</th><th>Value</th></tr><tr><td> </td><td> Min. :-8.33003 </td></tr>\n+<tr><td> </td><td> 1st Qu.:-0.06755 </td></tr>\n+<tr><td> </td><td> Median : 0.09635 </td></tr>\n+<tr><td> </td><td> Mean : 0.00000 </td></tr>\n+<tr><td> </td><td> 3rd Qu.: 0.18103 </td></tr>\n+<tr><td> </td><td> Max. : 8.50430 </td></tr>\n+</table>\n+<font color=\'blue\'><h3>Distribution of Proteome and Transcripome abundance (Box plot and Density plot)</h3></font>\n+ <img src="AbundancePlot.png"><font color=\'blue\'><h3>Scatter plot between Proteome and Transcriptome Abundance</h3></font>\n+ <img src="AbundancePlot_scatter.png"><font color=\'blue\'><h3>Correlation with all data</h3></font>\n+ <table class="embedded-table" border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#c3f0d6"><th>Parameter</th><th>Method 1</th><th>'..b'3 </td> <td> 0.9756628 </td> <td> 0.001209039 </td></tr><tr> <td> ENSMUSP00000081956.8 </td> <td> 3.674308 </td> <td> ENSMUST00000005607 </td> <td> 1.306612 </td> <td> 0.006223403 </td></tr></table><font color=\'blue\'><h3>Scatter plot between Proteome and Transcriptome Abundance, after removal of outliers/influential observations</h3></font>\n+ <img src="AbundancePlot_scatter_without_outliers.png"><font color=\'blue\'><h3>Correlation with removal of outliers / influential observations</h3></font>\n+ <p>We removed the influential observations and reestimated the correlation values.</p><table class="embedded-table" border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#c3f0d6"><th>Parameter</th><th>Method 1</th><th>Method 2</th><th>Method 3</th></tr><tr><td>Correlation method used</td><td> Pearson\'s product-moment correlation </td><td> Spearman\'s rank correlation rho </td><td> Kendall\'s rank correlation tau </td></tr> <tr><td>Correlation</td><td> 0.01485058 </td><td> 0.0246989 </td><td> 0.01689519 </td></tr> <tr><td>Pvalue</td><td> 0.4273403 </td><td> 0.1867467 </td><td> 0.1918906 </td></tr></table>\n+<font color=\'blue\'><h3>Heatmap of PE and GE abundance values</h3></font>\n+<img src="PE_GE_heatmap.png"><font color=\'blue\'><h3>Kmean clustering</h3></font>\n+Number of Clusters: 5<br><a href=" output_fold/PE_GE_kmeans_clusterpoints.txt " target="_blank">Download cluster list</a><br><img src="PE_GE_kmeans.png"><font color=\'blue\'><h3>Other regression model fitting</h3></font>\n+<ul>\n+\t<li>MAE:mean absolute error</li>\n+\t<li>MSE: mean squared error</li>\n+\t<li>RMSE:root mean squared error ( sqrt(MSE) )</li>\n+\t<li>MAPE:mean absolute percentage error</li>\n+\t</ul>\n+\t<h4><a href="PE_GE_modelfit.pdf" target="_blank">Comparison of model fits</a></h4><table class="embedded-table" border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#c3f0d6"><th>Model</th><th>MAE</th><th>MSE</th><th>RMSE</th><th>MAPE</th><th>Diagnostics Plot</th></tr><tr><td>Linear regression with all data</td><td> 0.5463329 </td><td> 0.9996481 </td><td> 0.999824 </td><td> 0.9996321 </td><td> <a href="PE_GE_lm.pdf" target="_blank">Link</a> </td></tr> <tr><td>Linear regression with removal of outliers</td><td> 0.5404805 </td><td> 1.006281 </td><td> 1.003136 </td><td> 1.455637 </td><td> <a href="PE_GE_lm_without_outliers.pdf" target="_blank">Link</a> </td></tr> <tr><td>Resistant regression (lqs / least trimmed squares method)</td><td> 0.5407598 </td><td> 1.007932 </td><td> 1.003958 </td><td> 1.537172 </td><td> <a href="PE_GE_lqs.pdf" target="_blank">Link</a> </td></tr> <tr><td>Robust regression (rlm / Huber M-estimator method)</td><td> 0.5404879 </td><td> 1.005054 </td><td> 1.002524 </td><td> 1.411806 </td><td> <a href="PE_GE_rlm.pdf" target="_blank">Link</a> </td></tr> <tr><td>Polynomial regression with degree 2</td><td> 0.546322 </td><td> 0.9996472 </td><td> 0.9998236 </td><td> 0.9993865 </td><td> <a href="PE_GE_poly2.pdf" target="_blank">Link</a> </td></tr> <tr><td>Polynomial regression with degree 3</td><td> 0.5469588 </td><td> 0.9976384 </td><td> 0.9988185 </td><td> 1.043158 </td><td> <a href="PE_GE_poly3.pdf" target="_blank">Link</a> </td></tr> <tr><td>Polynomial regression with degree 4</td><td> 0.5467885 </td><td> 0.9975077 </td><td> 0.9987531 </td><td> 1.041541 </td><td> <a href="PE_GE_poly4.pdf" target="_blank">Link</a> </td></tr> <tr><td>Polynomial regression with degree 5</td><td> 0.5467813 </td><td> 0.9975076 </td><td> 0.998753 </td><td> 1.041209 </td><td> <a href="PE_GE_poly5.pdf" target="_blank">Link</a> </td></tr> <tr><td>Polynomial regression with degree 6</td><td> 0.5465911 </td><td> 0.996652 </td><td> 0.9983246 </td><td> 1.056632 </td><td> <a href="PE_GE_poly6.pdf" target="_blank">Link</a> </td></tr> <tr><td>Generalized additive models</td><td> 0.5463695 </td><td> 0.9976796 </td><td> 0.9988391 </td><td> 1.032766 </td><td> <a href="PE_GE_gam.pdf" target="_blank">Link</a> </td></tr> </table>\n\\ No newline at end of file\n' |
b |
diff -r 000000000000 -r fc89f8c3b777 test_data/PE_mouse_singlesample.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/PE_mouse_singlesample.txt Sun Jun 17 04:20:06 2018 -0400 |
[ |
b'@@ -0,0 +1,3598 @@\n+N\tUnused\tTotal\t%Cov\t%Cov(50)\t%Cov(95)\tAccession\tName\tSpecies\tPeptides(95%)\t\t\t115:114\tPVal 115:114\tEF 115:114\tLowerCI 115:114\tUpperCI 115:114\tGene Ontology\tGene Names\tPathway\tInteractions\tProtein Families\tSubcellular Location\tFunction\tDisease\tTissue Specificity\tKeywords\tModifications\tNatural Variants\tGlycosylations\tPropeptide\tSignal Peptide\tInitiator Methionine\tSequence Conflicts\tSequence Uncertainties\tAlternative Sequence\tNon-Standard Residues\tLipidations\tDisulfide Bonds\tCross Links\tProtein Existence\tInterPro\tPolymorphism\tPTM\tRNA Editing\tActive Site\tBinding Site\tEntry\tEntry Name\tEntry Status\tSequence\tProtein Names\tOrganism\tSequence Length\tFeatures\tMass Spectrometry\tVirus Hosts\tEntry Information\n+1\t151.83\t151.83\t59.03000236\t50.45999885\t37.70000041\tENSMUSP00000016771.6\tpep chromosome:GRCm38:15:77760587:77842175:-1 gene:ENSMUSG00000022443.16 transcript:ENSMUST00000016771.12 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Myh9 description:myosin, heavy polypeptide 9, non-muscle [Source:MGI Symbol;Acc:MGI:107717]\tSource:MGI Symbol;Acc:MGI:107717\t95\t\t\t0.959251702\t6.30E-02\t1.04489696\t0.918034732\t1.002319217\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n+2\t146.36\t143.05\t35.35999954\t24.4599998\t16.13000035\tENSMUSP00000075772.5\tpep chromosome:GRCm38:15:76170976:76206322:-1 gene:ENSMUSG00000022565.15 transcript:ENSMUST00000076442.11 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Plec description:plectin [Source:MGI Symbol;Acc:MGI:1277961]\tSource:MGI Symbol;Acc:MGI:1277961\t80\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n+3\t130.22\t127.4\t42.37000048\t31.04000092\t24.42999929\tENSMUSP00000033310.7\tpep chromosome:GRCm38:7:135689784:135716361:-1 gene:ENSMUSG00000031004.8 transcript:ENSMUST00000033310.8 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Mki67 description:antigen identified by monoclonal antibody Ki 67 [Source:MGI Symbol;Acc:MGI:106035]\tSource:MGI Symbol;Acc:MGI:106035\t74\t\t\t1.258249998\t2.58E-11\t1.058794022\t1.188380361\t1.332227588\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n+4\t122.82\t121.97\t44.33999956\t36.10999882\t28.38000059\tENSMUSP00000112606.2\tpep chromosome:GRCm38:1:150393250:150449935:1 gene:ENSMUSG00000006005.17 transcript:ENSMUST00000119161.8 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Tpr description:translocated promoter region, nuclear basket protein [Source:MGI Symbol;Acc:MGI:1922066]\tSource:MGI Symbol;Acc:MGI:1922066\t70\t\t\t0.956575096\t1.00E-01\t1.054646015\t0.907010555\t1.008848071\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n+5\t121.55\t119.73\t40.77000022\t35.13999879\t28.45000029\tENSMUSP00000030187.7\tpep chromosome:GRCm38:4:43531519:43562422:-1 gene:ENSMUSG00000028465.16 transcript:ENSMUST00000030187.13 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Tln1 description:talin 1 [Source:MGI Symbol;Acc:MGI:1099832]\tSource:MGI Symbol;Acc:MGI:1099832\t76\t\t\t0.827451229\t8.32E-10\t1.052091002\t0.786482573\t0.87055397\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n+6\t108.13\t107.1\t43.70000064\t35.42999923\t26.26999915\tENSMUSP00000081912.5\tpep chromosome:GRCm38:7:101969821:102014964:1 gene:ENSMUSG00000066306.13 transcript:ENSMUST00000084852.12 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Numa1 description:nuclear mitotic apparatus protein 1 [Source:MGI Symbol;Acc:MGI:2443665]\tSource:MGI Symbol;Acc:MGI:2443665\t61\t\t\t0.799315274\t4.78E-08\t1.071599007\t0.745908916\t0.856545448\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n+7\t105.08\t103.7\t38.01999986\t30.23999929\t24.81999993\tENSMUSP00000121082.1\tpep chromosome:GRCm38:X:74223789:74246364:-1 gene:ENSMUSG00000031328.15 transcript:ENSMUST00000130007.7 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Flna description:filamin, alpha [Source:MGI Symbol;Acc:MGI:95556]\tSource:MGI Symbol;Acc:MGI:95556\t61\t\t\t0.953702807\t7.01E-02\t1.052806973\t0.905866742\t1.004064918\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n+8\t94.13\t92.93\t28.18999887\t18'..b'29\t20.04999965\t8.883000165\t0\tENSMUSP00000034000.8\tpep chromosome:GRCm38:8:41340197:41374773:-1 gene:ENSMUSG00000031591.14 transcript:ENSMUST00000034000.14 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Asah1 description:N-acylsphingosine amidohydrolase 1 [Source:MGI Symbol;Acc:MGI:1277124]\tSource:MGI Symbol;Acc:MGI:1277124\t0\t\t\t0.753979385\t0.220781296\t3.6574049\t0.206151471\t2.757607937\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n+3590\t0.25\t0.29\t10.96000001\t7.017999887\t0\tENSMUSP00000131171.1\tpep chromosome:GRCm38:10:128401474:128409722:-1 gene:ENSMUSG00000025374.13 transcript:ENSMUST00000166608.7 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Nabp2 description:nucleic acid binding protein 2 [Source:MGI Symbol;Acc:MGI:1917167]\tSource:MGI Symbol;Acc:MGI:1917167\t0\t\t\t1.09097302\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n+3591\t0.24\t3.65\t23.48999977\t11.41000018\t7.15899989\tENSMUSP00000015435.4\tpep chromosome:GRCm38:X:74304998:74311862:1 gene:ENSMUSG00000015291.10 transcript:ENSMUST00000015435.10 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Gdi1 description:guanosine diphosphate (GDP) dissociation inhibitor 1 [Source:MGI Symbol;Acc:MGI:99846]\tSource:MGI Symbol;Acc:MGI:99846\t3\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n+3593\t0.24\t0.28\t16.88999981\t6.019999832\t2.507999912\tENSMUSP00000105070.2\tpep chromosome:GRCm38:13:96542735:96640167:1 gene:ENSMUSG00000021669.15 transcript:ENSMUST00000109444.2 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Col4a3bp description:collagen, type IV, alpha 3 (Goodpasture antigen) binding protein [Source:MGI Symbol;Acc:MGI:1915268]\tSource:MGI Symbol;Acc:MGI:1915268\t1\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n+3594\t0.24\t0.27\t16.70999974\t6.921000034\t0\tENSMUSP00000046027.5\tpep chromosome:GRCm38:12:102568582:102704930:-1 gene:ENSMUSG00000057963.9 transcript:ENSMUST00000046518.11 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Itpk1 description:inositol 1,3,4-triphosphate 5/6 kinase [Source:MGI Symbol;Acc:MGI:2446159]\tSource:MGI Symbol;Acc:MGI:2446159\t0\t\t\t0.355408788\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n+3595\t0.23\t1.45\t17.14999974\t7.440000027\t1.260999963\tENSMUSP00000123590.1\tpep chromosome:GRCm38:8:119910360:119957555:1 gene:ENSMUSG00000031826.20 transcript:ENSMUST00000144458.7 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Usp10 description:ubiquitin specific peptidase 10 [Source:MGI Symbol;Acc:MGI:894652]\tSource:MGI Symbol;Acc:MGI:894652\t1\t\t\t1.09217\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n+3596\t0.23\t0.89\t10.09000018\t6.210000068\t1.55199999\tENSMUSP00000009321.4\tpep chromosome:GRCm38:16:18253948:18289246:-1 gene:ENSMUSG00000022718.11 transcript:ENSMUST00000009321.10 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Dgcr8 description:DGCR8, microprocessor complex subunit [Source:MGI Symbol;Acc:MGI:2151114]\tSource:MGI Symbol;Acc:MGI:2151114\t2\t\t\t1.030024052\t0.890802383\t8.756504059\t0.117629595\t9.019410133\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n+3597\t0.23\t0.82\t13.94000053\t8.477000147\t0.446199998\tENSMUSP00000063999.5\tpep chromosome:GRCm38:12:24974925:25059697:1 gene:ENSMUSG00000036333.11 transcript:ENSMUST00000066652.6 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Kidins220 description:kinase D-interacting substrate 220 [Source:MGI Symbol;Acc:MGI:1924730]\tSource:MGI Symbol;Acc:MGI:1924730\t2\t\t\t1.125159979\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n+3599\t0.22\t0.38\t20.75999975\t13.56000006\t3.601999953\tENSMUSP00000075614.6\tpep chromosome:GRCm38:X:37091678:37110322:-1 gene:ENSMUSG00000036572.16 transcript:ENSMUST00000076265.12 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:Upf3b description:UPF3 regulator of nonsense transcripts homolog B (yeast) [Source:MGI Symbol;Acc:MGI:1915384]\tSource:MGI Symbol;Acc:MGI:1915384\t2\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n' |
b |
diff -r 000000000000 -r fc89f8c3b777 test_data/mmusculus_gene_ensembl__GRCm38.p6.map --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/mmusculus_gene_ensembl__GRCm38.p6.map Sun Jun 17 04:20:06 2018 -0400 |
b |
b'@@ -0,0 +1,65606 @@\n+ensembl_gene_id\tensembl_gene_id_version\tensembl_transcript_id\tensembl_transcript_id_version\tensembl_peptide_id\tensembl_peptide_id_version\thgnc_symbol\tuniprotswissprot\tuniprotsptrembl\n+ENSMUSG00000064341\tENSMUSG00000064341.1\tENSMUST00000082392\tENSMUST00000082392.1\tENSMUSP00000080991\tENSMUSP00000080991.2\t\tP03888\tQ4JFN6\n+ENSMUSG00000064345\tENSMUSG00000064345.1\tENSMUST00000082396\tENSMUST00000082396.1\tENSMUSP00000080992\tENSMUSP00000080992.1\t\tP03893\tQ9MD59\n+ENSMUSG00000064351\tENSMUSG00000064351.1\tENSMUST00000082402\tENSMUST00000082402.1\tENSMUSP00000080993\tENSMUSP00000080993.1\t\tP00397\tQ9MD68\n+ENSMUSG00000064354\tENSMUSG00000064354.1\tENSMUST00000082405\tENSMUST00000082405.1\tENSMUSP00000080994\tENSMUSP00000080994.1\t\tP00405\tQ7JCZ1\n+ENSMUSG00000064356\tENSMUSG00000064356.3\tENSMUST00000082407\tENSMUST00000082407.1\tENSMUSP00000080995\tENSMUSP00000080995.1\t\tP03930\tQ7JCZ0\n+ENSMUSG00000064357\tENSMUSG00000064357.1\tENSMUST00000082408\tENSMUST00000082408.1\tENSMUSP00000080996\tENSMUSP00000080996.1\t\tP00848\tQ7JCY9\n+ENSMUSG00000064358\tENSMUSG00000064358.1\tENSMUST00000082409\tENSMUST00000082409.1\tENSMUSP00000080997\tENSMUSP00000080997.1\t\tP00416\tQ7JCX7\n+ENSMUSG00000064360\tENSMUSG00000064360.1\tENSMUST00000082411\tENSMUST00000082411.1\tENSMUSP00000080998\tENSMUSP00000080998.2\t\tP03899\tQ7GIP5\n+ENSMUSG00000065947\tENSMUSG00000065947.3\tENSMUST00000084013\tENSMUST00000084013.1\tENSMUSP00000081021\tENSMUSP00000081021.1\t\tP03903\tQ9MD77\n+ENSMUSG00000064363\tENSMUSG00000064363.1\tENSMUST00000082414\tENSMUST00000082414.1\tENSMUSP00000081000\tENSMUSP00000081000.1\t\tP03911\tQ7JCY6\n+ENSMUSG00000064367\tENSMUSG00000064367.1\tENSMUST00000082418\tENSMUST00000082418.1\tENSMUSP00000081001\tENSMUSP00000081001.2\t\tP03921\tQ9MD82\n+ENSMUSG00000064368\tENSMUSG00000064368.1\tENSMUST00000082419\tENSMUST00000082419.1\tENSMUSP00000081002\tENSMUSP00000081002.1\t\tP03925\tQ7JCY4\n+ENSMUSG00000064370\tENSMUSG00000064370.1\tENSMUST00000082421\tENSMUST00000082421.1\tENSMUSP00000081003\tENSMUSP00000081003.1\t\tP00158\tQ7JCZ3\n+ENSMUSG00000096427\tENSMUSG00000096427.3\tENSMUST00000212156\tENSMUST00000212156.2\tENSMUSP00000148605\tENSMUSP00000148605.1\t\t\tA0A1D5RM29\n+ENSMUSG00000036790\tENSMUSG00000036790.5\tENSMUST00000036043\tENSMUST00000036043.4\tENSMUSP00000044094\tENSMUSP00000044094.4\t\tQ810C0\tQ14DT0\n+ENSMUSG00000050015\tENSMUSG00000050015.3\tENSMUST00000215100\tENSMUST00000215100.1\tENSMUSP00000149667\tENSMUSP00000149667.1\t\t\tQ8VFP8\n+ENSMUSG00000073117\tENSMUSG00000073117.2\tENSMUST00000095004\tENSMUST00000095004.3\tENSMUSP00000092613\tENSMUSP00000092613.3\t\t\tD3YYI9\n+ENSMUSG00000096427\tENSMUSG00000096427.3\tENSMUST00000093864\tENSMUST00000093864.1\tENSMUSP00000091387\tENSMUSP00000091387.1\t\t\tL7N210\n+ENSMUSG00000066242\tENSMUSG00000066242.3\tENSMUST00000208563\tENSMUST00000208563.2\tENSMUSP00000147125\tENSMUSP00000147125.1\t\t\tQ7TRV2\n+ENSMUSG00000094258\tENSMUSG00000094258.7\tENSMUST00000163738\tENSMUST00000163738.7\tENSMUSP00000126261\tENSMUSP00000126261.1\t\t\tK7N6Y0\n+ENSMUSG00000050015\tENSMUSG00000050015.3\tENSMUST00000214457\tENSMUST00000214457.1\tENSMUSP00000150158\tENSMUSP00000150158.1\t\t\tQ8VFP8\n+ENSMUSG00000043102\tENSMUSG00000043102.2\tENSMUST00000057407\tENSMUST00000057407.2\tENSMUSP00000055746\tENSMUSP00000055746.2\t\tQ8CE23\t\n+ENSMUSG00000036858\tENSMUSG00000036858.8\tENSMUST00000041012\tENSMUST00000041012.8\tENSMUSP00000035683\tENSMUSP00000035683.8\t\t\tA0A0R4J0F4\n+ENSMUSG00000091933\tENSMUSG00000091933.2\tENSMUST00000170064\tENSMUST00000170064.2\tENSMUSP00000127481\tENSMUSP00000127481.2\t\t\tL7N291\n+ENSMUSG00000094258\tENSMUSG00000094258.7\tENSMUST00000170546\tENSMUST00000170546.7\tENSMUSP00000127748\tENSMUSP00000127748.1\t\t\tE9Q0R1\n+ENSMUSG00000066242\tENSMUSG00000066242.3\tENSMUST00000214253\tENSMUST00000214253.1\tENSMUSP00000149295\tENSMUSP00000149295.1\t\t\tQ7TRV2\n+ENSMUSG00000091926\tENSMUSG00000091926.2\tENSMUST00000169130\tENSMUST00000169130.1\tENSMUSP00000130557\tENSMUSP00000130557.1\t\t\tK7N712\n+ENSMUSG00000050015\tENSMUSG00000050015.3\tENSMUST00000055130\tENSMUST00000055130.2\tENSMUSP00000053105\tENSMUSP00000053105.2\t\t\tQ8VFP8\n+ENSMUSG00000094258\tENSMUSG00000094258.7\tENSMUST00000179231\tENS'..b'877.3\tENSMUST00000200962\tENSMUST00000200962.1\tENSMUSP00000143935\tENSMUSP00000143935.1\t\t\t\n+ENSMUSG00000111226\tENSMUSG00000111226.2\tENSMUST00000214518\tENSMUST00000214518.1\tENSMUSP00000149828\tENSMUSP00000149828.1\t\t\tQ3TTI1\n+ENSMUSG00000111226\tENSMUSG00000111226.2\tENSMUST00000230099\tENSMUST00000230099.1\tENSMUSP00000154987\tENSMUSP00000154987.1\t\t\tQ3TTI1\n+ENSMUSG00000098615\tENSMUSG00000098615.4\tENSMUST00000184717\tENSMUST00000184717.4\tENSMUSP00000139078\tENSMUSP00000139078.1\t\tP98203\t\n+ENSMUSG00000098615\tENSMUSG00000098615.4\tENSMUST00000230969\tENSMUST00000230969.1\tENSMUSP00000155564\tENSMUSP00000155564.1\t\t\t\n+ENSMUSG00000098615\tENSMUSG00000098615.4\tENSMUST00000230794\tENSMUST00000230794.1\tENSMUSP00000155535\tENSMUSP00000155535.1\t\t\tQ80WW5\n+ENSMUSG00000098615\tENSMUSG00000098615.4\tENSMUST00000230355\tENSMUST00000230355.1\tENSMUSP00000155348\tENSMUSP00000155348.1\t\t\tQ80WW5\n+ENSMUSG00000106685\tENSMUSG00000106685.3\tENSMUST00000201065\tENSMUST00000201065.3\tENSMUSP00000143947\tENSMUSP00000143947.1\t\tQ9QUG9\t\n+ENSMUSG00000106685\tENSMUSG00000106685.3\tENSMUST00000202484\tENSMUST00000202484.3\tENSMUSP00000144241\tENSMUSP00000144241.1\t\tQ9QUG9\t\n+ENSMUSG00000106685\tENSMUSG00000106685.3\tENSMUST00000201962\tENSMUST00000201962.3\tENSMUSP00000144041\tENSMUSP00000144041.1\t\t\t\n+ENSMUSG00000106685\tENSMUSG00000106685.3\tENSMUST00000202181\tENSMUST00000202181.3\tENSMUSP00000144528\tENSMUSP00000144528.1\t\t\t\n+ENSMUSG00000106685\tENSMUSG00000106685.3\tENSMUST00000201729\tENSMUST00000201729.3\tENSMUSP00000144318\tENSMUSP00000144318.1\t\t\t\n+ENSMUSG00000106685\tENSMUSG00000106685.3\tENSMUST00000201063\tENSMUST00000201063.3\tENSMUSP00000144621\tENSMUSP00000144621.1\t\t\t\n+ENSMUSG00000106685\tENSMUSG00000106685.3\tENSMUST00000200858\tENSMUST00000200858.3\tENSMUSP00000143974\tENSMUSP00000143974.1\t\tQ9QUG9\t\n+ENSMUSG00000106685\tENSMUSG00000106685.3\tENSMUST00000201596\tENSMUST00000201596.3\tENSMUSP00000143992\tENSMUSP00000143992.1\t\t\t\n+ENSMUSG00000106685\tENSMUSG00000106685.3\tENSMUST00000201411\tENSMUST00000201411.3\tENSMUSP00000144081\tENSMUSP00000144081.1\t\t\t\n+ENSMUSG00000106685\tENSMUSG00000106685.3\tENSMUST00000200777\tENSMUST00000200777.3\tENSMUSP00000143871\tENSMUSP00000143871.1\t\tQ9QUG9\t\n+ENSMUSG00000106685\tENSMUSG00000106685.3\tENSMUST00000202609\tENSMUST00000202609.3\tENSMUSP00000144289\tENSMUSP00000144289.1\t\t\t\n+ENSMUSG00000106685\tENSMUSG00000106685.3\tENSMUST00000200711\tENSMUST00000200711.3\tENSMUSP00000144159\tENSMUSP00000144159.1\t\tQ9QUG9\t\n+ENSMUSG00000106685\tENSMUSG00000106685.3\tENSMUST00000202628\tENSMUST00000202628.1\tENSMUSP00000144037\tENSMUSP00000144037.1\t\t\t\n+ENSMUSG00000106685\tENSMUSG00000106685.3\tENSMUST00000201395\tENSMUST00000201395.2\tENSMUSP00000144508\tENSMUSP00000144508.1\t\tQ9QUG9\t\n+ENSMUSG00000106685\tENSMUSG00000106685.3\tENSMUST00000202309\tENSMUST00000202309.1\tENSMUSP00000144066\tENSMUSP00000144066.1\t\t\t\n+ENSMUSG00000107104\tENSMUSG00000107104.3\tENSMUST00000202301\tENSMUST00000202301.3\tENSMUSP00000143991\tENSMUSP00000143991.1\t\t\t\n+ENSMUSG00000107104\tENSMUSG00000107104.3\tENSMUST00000201950\tENSMUST00000201950.3\tENSMUSP00000144193\tENSMUSP00000144193.1\t\t\t\n+ENSMUSG00000107104\tENSMUSG00000107104.3\tENSMUST00000202580\tENSMUST00000202580.3\tENSMUSP00000144625\tENSMUSP00000144625.1\t\t\t\n+ENSMUSG00000107104\tENSMUSG00000107104.3\tENSMUST00000202463\tENSMUST00000202463.3\tENSMUSP00000144319\tENSMUSP00000144319.1\t\t\t\n+ENSMUSG00000107104\tENSMUSG00000107104.3\tENSMUST00000201938\tENSMUST00000201938.3\tENSMUSP00000144389\tENSMUSP00000144389.1\t\t\t\n+ENSMUSG00000107104\tENSMUSG00000107104.3\tENSMUST00000201577\tENSMUST00000201577.1\tENSMUSP00000144493\tENSMUSP00000144493.1\t\t\t\n+ENSMUSG00000107104\tENSMUSG00000107104.3\tENSMUST00000201188\tENSMUST00000201188.3\tENSMUSP00000144068\tENSMUSP00000144068.1\t\t\t\n+ENSMUSG00000107104\tENSMUSG00000107104.3\tENSMUST00000200719\tENSMUST00000200719.1\tENSMUSP00000144689\tENSMUSP00000144689.1\t\t\t\n+ENSMUSG00000107099\tENSMUSG00000107099.3\tENSMUST00000202867\tENSMUST00000202867.3\tENSMUSP00000144526\tENSMUSP00000144526.1\t\t\t\n+ENSMUSG00000107099\tENSMUSG00000107099.3\tENSMUST00000202211\tENSMUST00000202211.1\tENSMUSP00000144375\tENSMUSP00000144375.1\t\t\t\n' |