Repository 'proteore_goprofiles'
hg clone https://toolshed.g2.bx.psu.edu/repos/proteore/proteore_goprofiles

Changeset 5:781072a65600 (2018-09-19)
Previous changeset 4:715002a394ec (2018-03-23) Next changeset 6:6afe8166a9a4 (2018-09-21)
Commit message:
planemo upload commit 0ce4c81e6d2f7af8c9b52f6c07e83b0319c2adb1-dirty
modified:
README.rst
goprofiles.R
goprofiles.xml
added:
test-data/GO_Profile_text_output.txt
test-data/ID_Converted_FKW_Lacombe_et_al_2017_OK.txt
test-data/profile.BP.pdf
test-data/profile.CC.pdf
test-data/profile.MF.pdf
removed:
test-data/GO_Profile_diagram_outputs__profile.BP.pdf
test-data/GO_Profile_diagram_outputs__profile.CC.pdf
test-data/GO_Profile_diagram_outputs__profile.MF.pdf
test-data/ID_Converter_FKW_Lacombe_et_al_2017_OK.txt
b
diff -r 715002a394ec -r 781072a65600 README.rst
--- a/README.rst Fri Mar 23 10:47:17 2018 -0400
+++ b/README.rst Wed Sep 19 05:49:06 2018 -0400
b
@@ -15,7 +15,7 @@
 
 -----------------------------------
 
-This tool, based on the goProfiles R package, performs statistical analysis of functional profiles. It is based on GO ontology and considers either a gene set ('Entrez’ Identifiers) or a protein set (Uniprot accession number) as input. 
+This tool, based on the goProfiles R package, performs statistical analysis of functional profiles. It is based on GO ontology and considers either a gene set ('Entrez’ Identifiers) or a protein set (Uniprot ID) as input. 
 
 You can choose one or more GO categories: 
 
@@ -23,8 +23,8 @@
 * Cellular Component (CC) 
 * Molecular Function (MF) 
 
-Functional profile at a given GO level is obtained by counting the number of identifiers having a hit in each category of this level (2 by default). Results are displayed as bar plots (with absolute or relative frequencies) and can be exported in pdf, png and jpeg formats; textual output with GO terms and their computed frequencies is also provided.  
+Functional profile at a given GO level is obtained by counting the number of identifiers having a hit in each category of this level (2 by default). Results are displayed as bar plots (with absolute or relative frequencies) and can be exported in pdf, png and jpeg formats.  
 
 For more details about GoProfiles, please read: Salicrú et al. Comparison of lists of genes based on functional profiles. BMC Bioinformatics. 2011;12:401.(https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-401)  
 
-If your type of identifiers is not supported (i.e. different from Uniprot and Entrez), please use the **ID Converter** tool in the ProteoRE section to convert your list of IDs first.
+If your type of identifiers is not supported (i.e. different form Uniprot and Entrez), please use the **ID Converter** component in the ProteoRE section to convert your list of IDs first.
b
diff -r 715002a394ec -r 781072a65600 goprofiles.R
--- a/goprofiles.R Fri Mar 23 10:47:17 2018 -0400
+++ b/goprofiles.R Wed Sep 19 05:49:06 2018 -0400
[
@@ -1,6 +1,5 @@
 # Load necessary libraries
-library(org.Hs.eg.db)
-library(goProfiles)
+library(goProfiles,quietly = TRUE)
 
 # Read file and return file content as data.frame
 readfile = function(filename, header) {
@@ -22,32 +21,51 @@
   return(file)
 }
 
-getprofile = function(ids, id_type, level, duplicate) {
+check_ids <- function(vector,type) {
+  uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$"
+  entrez_id = "^'[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
+  if (type == "Entrez"){
+    return(grepl(entrez_id,vector))
+  } else if (type == "UniProt") {
+    return(grepl(uniprot_pattern,vector))
+  }
+}
+
+getprofile = function(ids, id_type, level, duplicate,species) {
   ####################################################################
   # Arguments
   #   - ids: list of input IDs
   #   - id_type: type of input IDs (UniProt/ENTREZID)
   #   - level
   #   - duplicate: if the duplicated IDs should be removed or not (TRUE/FALSE)
+  #   - species
   ####################################################################
   
+  library(species, character.only = TRUE, quietly = TRUE)
+  
+  if (species=="org.Hs.eg.db"){
+    package=org.Hs.eg.db
+  } else if (species=="org.Mm.eg.db"){
+    package=org.Mm.eg.db
+  }
+  
+  
+  
   # Check if level is number
   if (! as.numeric(level) %% 1 == 0) {
     stop("Please enter an integer for level")
-  }
-  else {
+  } else {
     level = as.numeric(level)
   }
   #genes = as.vector(file[,ncol])
   
   # Extract Gene Entrez ID
   if (id_type == "Entrez") {
-    id = select(org.Hs.eg.db, ids, "ENTREZID", multiVals = "first")
+    id = select(package, ids, "ENTREZID", multiVals = "first")
     genes_ids = id$ENTREZID[which( ! is.na(id$ENTREZID))]
-  }
-  else {
+  } else {
     genes_ids = c()
-    id = select(org.Hs.eg.db, ids, "ENTREZID", "UNIPROT", multiVals = "first")
+    id = select(package, ids, "ENTREZID", "UNIPROT", multiVals = "first")
     if (duplicate == "TRUE") {
       id = unique(id)
     }
@@ -60,10 +78,10 @@
   }
   
   # Create basic profiles
-  profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
-  profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
-  profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
-  profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
+  profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
+  profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
+  profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
+  profile.ALL = basicProfile(genes_ids, onto='ANY', level=level, orgPackage=species, empty.cats=F, ord=T, na.rm=T)
   
   # Print profile
   # printProfiles(profile)
@@ -165,7 +183,8 @@
         --per
         --title: title of the plot
         --duplicate: remove dupliate input IDs (true/false)
-        --text_output: text output filename \n")
+        --text_output: text output filename \n
+        --species")
     q(save="no")
   }
   
@@ -175,18 +194,20 @@
   args <- as.list(as.character(argsDF$V2))
   names(args) <- argsDF$V1
 
+  #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/goprofiles/args.Rda")
+  #load("/home/dchristiany/proteore_project/ProteoRE/tools/goprofiles/args.Rda")
+  
+  id_type = args$id_type
   input_type = args$input_type
   if (input_type == "text") {
     input = strsplit(args$input, "[ \t\n]+")[[1]]
-  }
-  else if (input_type == "file") {
+  } else if (input_type == "file") {
     filename = args$input
     ncol = args$ncol
     # Check ncol
     if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
       stop("Please enter an integer for level")
-    }
-    else {
+    } else {
       ncol = as.numeric(gsub("c", "", ncol))
     }
     header = args$header
@@ -198,7 +219,11 @@
       input = c(input, strsplit(row, ";")[[1]][1])
     }
   }
-  id_type = args$id_type
+  
+  if (! any(check_ids(input,id_type))){
+    stop(paste(id_type,"not found in your ids list, please check your IDs in input or the selected column of your input file"))
+  }
+  
   ontoopt = strsplit(args$onto_opt, ",")[[1]]
   #print(ontoopt)
   #plotopt = strsplit(args[3], ",")
@@ -208,8 +233,9 @@
   title = args$title
   duplicate = args$duplicate
   text_output = args$text_output
+  species=args$species
 
-  profiles = getprofile(input, id_type, level, duplicate)
+  profiles = getprofile(input, id_type, level, duplicate,species)
   profile.CC = profiles[1]
   #print(profile.CC)
   profile.MF = profiles[2]
b
diff -r 715002a394ec -r 781072a65600 goprofiles.xml
--- a/goprofiles.xml Fri Mar 23 10:47:17 2018 -0400
+++ b/goprofiles.xml Wed Sep 19 05:49:06 2018 -0400
b
@@ -1,11 +1,11 @@
-<tool id="goProfiles" name="goProfiles" version="0.1.0">
-    <description> Statistical analysis of functional profiles
-    </description>
-    <requirements>
+<tool id="goProfiles" name="goProfiles" version="2018.09.04">
+    <description>Statistical analysis of functional profiles</description>
+    <requirements> 
         <requirement type="package" version="3.4.1">R</requirement>
-        <requirement type="package" version="3.4.1">bioconductor-org.hs.eg.db</requirement>
-        <requirement type="package" version="1.38.2">bioconductor-annotationdbi</requirement>
-        <requirement type="package" version="2.36.2">bioconductor-biobase</requirement>
+        <requirement type="package" version="3.5.0">bioconductor-org.hs.eg.db</requirement>
+        <requirement type="package" version="3.5.0">bioconductor-org.mm.eg.db</requirement>
+        <requirement type="package" version="1.40.0">bioconductor-annotationdbi</requirement>
+        <requirement type="package" version="2.38.0">bioconductor-biobase</requirement>
         <requirement type="package" version="1.38.0">goprofiles</requirement>
     </requirements>
     <stdio>
@@ -39,6 +39,8 @@
 
         --text_output="$text_output"
 
+        --species="$species"
+
     ]]></command>
     <inputs>
         <conditional name="input" >
@@ -73,6 +75,10 @@
             </when>            
         </conditional>
         <param name="duplicate" type="boolean" label="Remove duplicated IDs" truevalue="TRUE" falsevalue="FALSE" />
+        <param name="species" type="select" label="Select your species">
+            <option value="org.Hs.eg.db">Human (Homo sapiens)</option>
+            <option value="org.Mm.eg.db">Mouse (Mus musculus)</option>
+        </param>
         <param type="select" name="onto_opt" label="Please select GO terms category" multiple="True" display="checkboxes" >
             <option value="CC">Cellular Component (CC)</option>
             <option value="MF">Molecular Function (MF)</option>
@@ -94,7 +100,7 @@
         </section>
     </inputs>
     <outputs>
-     <collection type="list" label="GO Profile diagram outputs" name="output" >
+     <collection type="list" label="GO Profile diagram output" name="output" >
          <discover_datasets pattern="(?P&lt;designation&gt;.+\.png)" ext="png" />
          <discover_datasets pattern="(?P&lt;designation&gt;.+\.jpeg)" ext="jpg" />
          <discover_datasets pattern="(?P&lt;designation&gt;.+\.pdf)" ext="pdf" />
@@ -105,9 +111,9 @@
         <test>
             <conditional name="input">
                 <param name="ids" value="file" />
-                <param name="file" value="ID_Converter_FKW_Lacombe_et_al_2017_OK.txt" />
+                <param name="file" value="ID_Converted_FKW_Lacombe_et_al_2017_OK.txt" />
                 <param name="ncol" value="c1" />
-                <param name="header" value="talse" /> 
+                <param name="header" value="true" /> 
                 <param name="id_type" value="UniProt" />              
             </conditional>
             <param name="duplicate" value="false"/>
@@ -119,9 +125,9 @@
                 <param name="plot_opt" value="PDF" />
             </section>
             <output_collection name="output" type="list" >
-                <element name="GO_Profile_diagram_outputs__profile.BP.pdf" file="GO_Profile_diagram_outputs__profile.BP.pdf" ftype="pdf" />
-                <element name="GO_Profile_diagram_outputs__profile.CC.pdf" file="GO_Profile_diagram_outputs__profile.CC.pdf" ftype="pdf" />
-                <element name="GO_Profile_diagram_outputs__profile.MF.pdf" file="GO_Profile_diagram_outputs__profile.MF.pdf" ftype="pdf" />
+                <element name="profile.BP.pdf" file="profile.BP.pdf" ftype="pdf" compare="sim_size"/>
+                <element name="profile.CC.pdf" file="profile.CC.pdf" ftype="pdf" compare="sim_size"/>
+                <element name="profile.MF.pdf" file="profile.MF.pdf" ftype="pdf" compare="sim_size"/>
             </output_collection>
             <output name="text_output" file="GO_Profile_text_output.txt"/>
         </test>
b
diff -r 715002a394ec -r 781072a65600 test-data/GO_Profile_diagram_outputs__profile.BP.pdf
b
Binary file test-data/GO_Profile_diagram_outputs__profile.BP.pdf has changed
b
diff -r 715002a394ec -r 781072a65600 test-data/GO_Profile_diagram_outputs__profile.CC.pdf
b
Binary file test-data/GO_Profile_diagram_outputs__profile.CC.pdf has changed
b
diff -r 715002a394ec -r 781072a65600 test-data/GO_Profile_diagram_outputs__profile.MF.pdf
b
Binary file test-data/GO_Profile_diagram_outputs__profile.MF.pdf has changed
b
diff -r 715002a394ec -r 781072a65600 test-data/GO_Profile_text_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/GO_Profile_text_output.txt Wed Sep 19 05:49:06 2018 -0400
b
@@ -0,0 +1,55 @@
+CC.Description CC.GOID CC.Frequency
+cell GO:0005623 128
+cell junction GO:0030054 33
+cell part GO:0044464 128
+extracellular region GO:0005576 132
+extracellular region part GO:0044421 132
+macromolecular complex GO:0032991 41
+membrane GO:0016020 70
+membrane part GO:0044425 29
+membrane-enclosed lumen GO:0031974 67
+organelle GO:0043226 132
+organelle part GO:0044422 97
+other organism GO:0044215 3
+other organism part GO:0044217 3
+supramolecular complex GO:0099080 27
+synapse GO:0045202 5
+synapse part GO:0044456 3
+MF.Description MF.GOID MF.Frequency
+antioxidant activity GO:0016209 8
+binding GO:0005488 129
+catalytic activity GO:0003824 60
+hijacked molecular function GO:0104005 6
+molecular carrier activity GO:0140104 1
+molecular function regulator GO:0098772 30
+molecular transducer activity GO:0060089 4
+signal transducer activity GO:0004871 4
+structural molecule activity GO:0005198 27
+transcription regulator activity GO:0140110 4
+transporter activity GO:0005215 9
+BP.Description BP.GOID BP.Frequency
+behavior GO:0007610 2
+biological adhesion GO:0022610 27
+biological regulation GO:0065007 104
+cell killing GO:0001906 10
+cell proliferation GO:0008283 25
+cellular component organization or biogenesis GO:0071840 64
+cellular process GO:0009987 134
+detoxification GO:0098754 8
+developmental process GO:0032502 76
+growth GO:0040007 13
+immune system process GO:0002376 69
+localization GO:0051179 84
+locomotion GO:0040011 19
+metabolic process GO:0008152 65
+multi-organism process GO:0051704 38
+multicellular organismal process GO:0032501 97
+negative regulation of biological process GO:0048519 61
+positive regulation of biological process GO:0048518 57
+presynaptic process involved in chemical synaptic transmission GO:0099531 1
+regulation of biological process GO:0050789 98
+reproduction GO:0000003 8
+reproductive process GO:0022414 10
+response to stimulus GO:0050896 103
+rhythmic process GO:0048511 2
+signaling GO:0023052 56
b
diff -r 715002a394ec -r 781072a65600 test-data/ID_Converted_FKW_Lacombe_et_al_2017_OK.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ID_Converted_FKW_Lacombe_et_al_2017_OK.txt Wed Sep 19 05:49:06 2018 -0400
b
b'@@ -0,0 +1,152 @@\n+Protein accession number (UniProt)\tProtein name\tNumber of peptides (razor + unique)\tneXtProt\tUniProt.ID\tGeneID\tMIM\tEnsembl\n+P15924\tDesmoplakin\t69\tNX_P15924\tDESP_HUMAN\t1832\t125647; 605676; 607450; 607655; 609638; 612908; 615821\tENSG00000096696\n+P02538\tKeratin, type II cytoskeletal 6A\t53\tNX_P02538\tK2C6A_HUMAN\t3853\t148041; 615726\tENSG00000205420\n+P02768\tSerum albumin\t44\tNX_P02768\tALBU_HUMAN\t213\t103600; 615999; 616000\tENSG00000163631\n+P08779\tKeratin, type I cytoskeletal 16\t29\tNX_P08779\tK1C16_HUMAN\t3868\t148067; 167200; 613000\tENSG00000186832\n+Q02413\tDesmoglein-1\t24\tNX_Q02413\tDSG1_HUMAN\t1828\t125670; 148700; 615508\tENSG00000134760\n+P07355\t"Annexin A2;Putative annexin A2-like protein"\t22\tNX_P07355\tANXA2_HUMAN\t302\t151740\tENSG00000182718\n+P14923\tJunction plakoglobin\t22\tNX_P14923\tPLAK_HUMAN\t3728\t173325; 601214; 611528\tENSG00000173801\n+P02788\tLactotransferrin\t21\tNX_P02788\tTRFL_HUMAN\t4057\t150210\tENSG00000012223\n+Q9HC84\tMucin-5B\t21\tNX_Q9HC84\tMUC5B_HUMAN\t727897\t178500; 600770\tENSG00000117983\n+P29508\tSerpin B3\t20\tNX_P29508\tSPB3_HUMAN\t6317\t600517\tENSG00000057149\n+P63261\tActin, cytoplasmic 2\t19\tNX_P63261\tACTG_HUMAN\t71\t102560; 604717; 614583\tENSG00000184009\n+Q8N1N4\tKeratin, type II cytoskeletal 78\t18\tNX_Q8N1N4\tK2C78_HUMAN\t196374\t611159\tENSG00000170423\n+Q04695\tKeratin, type I cytoskeletal 17\t18\tNX_Q04695\tK1C17_HUMAN\t3872\t148069; 167210; 184500\tENSG00000128422\n+P01876\tIg alpha-1 chain C region\t16\tNX_P01876\tIGHA1_HUMAN\tNA\t146900\tENSG00000211895; ENSG00000282633\n+Q01469\tFatty acid-binding protein 5, epidermal\t15\tNX_Q01469\tFABP5_HUMAN\t2171\t605168\tENSG00000164687\n+P31944\tCaspase-14\t15\tNX_P31944\tCASPE_HUMAN\t23581\t605848; 617320\tENSG00000105141\n+P01833\tPolymeric immunoglobulin receptor\t15\tNX_P01833\tPIGR_HUMAN\t5284\t173880\tENSG00000162896\n+P06733\tAlpha-enolase\t15\tNX_P06733\tENOA_HUMAN\t2023\t172430\tENSG00000074800\n+P25311\tZinc-alpha-2-glycoprotein\t15\tNX_P25311\tZA2G_HUMAN\t563\t194460\tENSG00000160862\n+Q15149\tPlectin\t15\tNX_Q15149\tPLEC_HUMAN\t5339\t131950; 226670; 601282; 612138; 613723; 616487\tENSG00000178209\n+P19013\tKeratin, type II cytoskeletal 4\t13\tNX_P19013\tK2C4_HUMAN\tNA\t123940; 193900\tENSG00000170477\n+Q6KB66\tKeratin, type II cytoskeletal 80\t13\tNX_Q6KB66\tK2C80_HUMAN\t144501\t611161\tENSG00000167767\n+Q08188\tProtein-glutamine gamma-glutamyltransferase E\t12\tNX_Q08188\tTGM3_HUMAN\t7053\t600238; 617251\tENSG00000125780\n+P13646\tKeratin, type I cytoskeletal 13\t11\tNX_P13646\tK1C13_HUMAN\t3860\t148065; 615785\tENSG00000171401\n+Q86YZ3\tHornerin\t11\tNX_Q86YZ3\tHORN_HUMAN\t388697\t616293\tENSG00000197915\n+P04259\tKeratin, type II cytoskeletal 6B\t10\tNX_P04259\tK2C6B_HUMAN\t3854\t148042; 615728\tENSG00000185479\n+P02545\t"Prelamin-A/C;Lamin-A/C"\t10\tNX_P02545\tLMNA_HUMAN\t4000\t115200; 150330; 151660; 159001; 176670; 181350; 212112; 248370; 275210; 605588; 610140; 613205; 616516\tENSG00000160789\n+P04083\tAnnexin A1\t10\tNX_P04083\tANXA1_HUMAN\t301\t151690\tENSG00000135046\n+P11021\t78 kDa glucose-regulated protein\t10\tNX_P11021\tBIP_HUMAN\t3309\t138120\tENSG00000044574\n+P02787\tSerotransferrin\t9\tNX_P02787\tTRFE_HUMAN\t7018\t190000; 209300\tENSG00000091513\n+P04040\tCatalase\t9\tNX_P04040\tCATA_HUMAN\t847\t115500; 614097\tENSG00000121691\n+P31151\tProtein S100-A7\t9\tNX_P31151\tS10A7_HUMAN\t6278\t600353\tENSG00000143556\n+P31947\t14-3-3 protein sigma\t9\tNX_P31947\t1433S_HUMAN\t2810\t601290\tENSG00000175793\n+Q96P63\tSerpin B12\t9\tNX_Q96P63\tSPB12_HUMAN\t89777\t615662\tENSG00000166634\n+P14618\tPyruvate kinase PKM\t9\tNX_P14618\tKPYM_HUMAN\t5315\t179050\tENSG00000067225\n+P60174\tTriosephosphate isomerase\t9\tNX_P60174\tTPIS_HUMAN\t7167\t190450; 615512\tENSG00000111669\n+Q06830\tPeroxiredoxin-1\t9\tNX_Q06830\tPRDX1_HUMAN\t5052\t176763\tENSG00000117450\n+P01040\tCystatin-A\t8\tNX_P01040\tCYTA_HUMAN\t1475\t184600; 607936\tENSG00000121552\n+P05089\tArginase-1\t8\tNX_P05089\tARGI1_HUMAN\t383\t207800; 608313\tENSG00000118520\n+P01834\tIg kappa chain C region\t8\tNX_P01834\tIGKC_HUMAN\tNA\t147200; 614102\tNA\n+P04406\tGlyceraldehyde-3-phosphate dehydrogenase\t8\tNX_P04406\tG3P_HUMAN\t2597\t138400\tENSG00000111640\n+P0DMV9\tHeat shock 70 kDa protein 1B\t8\tNX_P0DMV9\tHS71B_HUMAN\t3303; 3304\t140550; 6'..b'X_P36952\tSPB5_HUMAN\t5268\t154790\tENSG00000206075\n+P40926\tMalate dehydrogenase, mitochondrial\t3\tNX_P40926\tMDHM_HUMAN\t4191\t154100; 617339\tENSG00000146701\n+Q9Y6R7\tIgGFc-binding protein\t3\tNX_Q9Y6R7\tFCGBP_HUMAN\t8857\t617553\tENSG00000281123\n+O95274\tLy6/PLAUR domain-containing protein 3\t2\tNX_O95274\tLYPD3_HUMAN\t27076\t609484\tENSG00000124466\n+P00491\tPurine nucleoside phosphorylase\t2\tNX_P00491\tPNPH_HUMAN\t4860\t164050; 613179\tENSG00000198805\n+P04080\tCystatin-B\t2\tNX_P04080\tCYTB_HUMAN\t1476\t254800; 601145\tENSG00000160213\n+P09972\tFructose-bisphosphate aldolase C\t2\tNX_P09972\tALDOC_HUMAN\t230\t103870\tENSG00000109107\n+P19012\tKeratin, type I cytoskeletal 15\t2\tNX_P19012\tK1C15_HUMAN\t3866\t148030\tENSG00000171346\n+P20930\tFilaggrin\t2\tNX_P20930\tFILA_HUMAN\t2312\t135940; 146700; 605803\tENSG00000143631\n+Q96FX8\tp53 apoptosis effector related to PMP-22\t2\tNX_Q96FX8\tPERP_HUMAN\t64065\t609301\tENSG00000112378\n+Q9UIV8\tSerpin B13\t2\tNX_Q9UIV8\tSPB13_HUMAN\t5275\t604445\tENSG00000197641\n+P01625\tIg kappa chain V-IV region Len\t2\tNA\tNA\tNA\tNA\tNA\n+P01765\tIg heavy chain V-III region TIL\t2\tNA\tNA\tNA\tNA\tNA\n+P01766\tIg heavy chain V-III region BRO\t2\tNX_P01766\tHV313_HUMAN\tNA\tNA\tENSG00000211942; ENSG00000282286\n+P01860\tIg gamma-3 chain C region\t2\tNX_P01860\tIGHG3_HUMAN\tNA\t147120\tNA\n+P01871\tIg mu chain C region\t2\tNX_P01871\tIGHM_HUMAN\tNA\t147020; 601495\tENSG00000211899; ENSG00000282657\n+P05090\tApolipoprotein D\t2\tNX_P05090\tAPOD_HUMAN\t347\t107740\tENSG00000189058\n+P06870\tKallikrein-1\t2\tNX_P06870\tKLK1_HUMAN\t3816\t147910; 615953\tENSG00000167748\n+P07858\tCathepsin B\t2\tNX_P07858\tCATB_HUMAN\t1508\t116810\tENSG00000164733\n+P08865\t40S ribosomal protein SA\t2\tNX_P08865\tRSSA_HUMAN\t3921\t150370; 271400\tENSG00000168028\n+P11279\tLysosome-associated membrane glycoprotein 1\t2\tNX_P11279\tLAMP1_HUMAN\t3916\t153330\tENSG00000185896\n+P13473\tLysosome-associated membrane glycoprotein 2\t2\tNX_P13473\tLAMP2_HUMAN\t3920\t300257; 309060\tENSG00000005893\n+P19971\tThymidine phosphorylase\t2\tNX_P19971\tTYPH_HUMAN\t1890\t131222; 603041\tENSG00000025708\n+P23284\tPeptidyl-prolyl cis-trans isomerase B\t2\tNX_P23284\tPPIB_HUMAN\t5479\t123841; 259440\tENSG00000166794\n+P23396\t40S ribosomal protein S3\t2\tNX_P23396\tRS3_HUMAN\t6188\t600454\tENSG00000149273\n+P25705\tATP synthase subunit alpha, mitochondrial\t2\tNX_P25705\tATPA_HUMAN\t498\t164360; 615228; 616045\tENSG00000152234\n+P27482\tCalmodulin-like protein 3\t2\tNX_P27482\tCALL3_HUMAN\t810\t114184\tENSG00000178363\n+P31949\tProtein S100-A11\t2\tNX_P31949\tS10AB_HUMAN\t6282\t603114\tENSG00000163191\n+P40121\tMacrophage-capping protein\t2\tNX_P40121\tCAPG_HUMAN\t822\t153615\tENSG00000042493\n+P42357\tHistidine ammonia-lyase\t2\tNX_P42357\tHUTH_HUMAN\t3034\t235800; 609457\tENSG00000084110\n+P47756\tF-actin-capping protein subunit beta\t2\tNX_P47756\tCAPZB_HUMAN\t832\t601572\tENSG00000077549\n+P48637\tGlutathione synthetase\t2\tNX_P48637\tGSHB_HUMAN\t2937\t231900; 266130; 601002\tENSG00000100983\n+P49720\tProteasome subunit beta type-3\t2\tNX_P49720\tPSB3_HUMAN\t5691\t602176\tENSG00000277791; ENSG00000275903\n+P50395\tRab GDP dissociation inhibitor beta\t2\tNX_P50395\tGDIB_HUMAN\t2665\t600767\tENSG00000057608\n+P59998\tActin-related protein 2/3 complex subunit 4\t2\tNX_P59998\tARPC4_HUMAN\t10093\t604226\tENSG00000241553\n+P61160\tActin-related protein 2\t2\tNX_P61160\tARP2_HUMAN\t10097\t604221\tENSG00000138071\n+P61916\tEpididymal secretory protein E1\t2\tNX_P61916\tNPC2_HUMAN\t10577\t601015; 607625\tENSG00000119655\n+P04745\tAlpha-amylase 1\t23\tNX_P04745\tAMY1_HUMAN\t276; 277; 278\t104700; 104701; 104702\tENSG00000174876; ENSG00000187733; ENSG00000237763\n+Q9NZT1\tCalmodulin-like protein 5\t8\tNX_Q9NZT1\tCALL5_HUMAN\t51806\t605183\tENSG00000178372\n+P12273\tProlactin-inducible protein\t6\tNX_P12273\tPIP_HUMAN\t5304\t176720\tENSG00000159763\n+Q96DA0\tZymogen granule protein 16 homolog B\t5\tNX_Q96DA0\tZG16B_HUMAN\t124220\tNA\tENSG00000162078; ENSG00000283056\n+P01036\tCystatin-S\t5\tNX_P01036\tCYTS_HUMAN\t1472\t123857\tENSG00000101441\n+Q8TAX7\tMucin-7\t2\tNX_Q8TAX7\tMUC7_HUMAN\t4589\t158375; 600807\tENSG00000171195\n+P01037\tCystatin-SN\t2\tNX_P01037\tCYTN_HUMAN\t1469\t123855\tENSG00000170373\n+P09228\tCystatin-SA\t2\tNX_P09228\tCYTT_HUMAN\t1470\t123856\tENSG00000170369\n'
b
diff -r 715002a394ec -r 781072a65600 test-data/ID_Converter_FKW_Lacombe_et_al_2017_OK.txt
--- a/test-data/ID_Converter_FKW_Lacombe_et_al_2017_OK.txt Fri Mar 23 10:47:17 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,152 +0,0 @@\n-Protein accession number (UniProt)\tProtein name\tNumber of peptides (razor + unique)\tneXtProt_ID\tUniProt.ID\tGeneID\tMIM\tEnsembl\n-P15924\tDesmoplakin\t69\tNX_P15924\tDESP_HUMAN\t1832\t125647; 605676; 607450; 607655; 609638; 612908; 615821\tENSG00000096696\n-P02538\tKeratin, type II cytoskeletal 6A\t53\tNX_P02538\tK2C6A_HUMAN\t3853\t148041; 615726\tENSG00000205420\n-P02768\tSerum albumin\t44\tNX_P02768\tALBU_HUMAN\t213\t103600; 615999; 616000\tENSG00000163631\n-P08779\tKeratin, type I cytoskeletal 16\t29\tNX_P08779\tK1C16_HUMAN\t3868\t148067; 167200; 613000\tENSG00000186832\n-Q02413\tDesmoglein-1\t24\tNX_Q02413\tDSG1_HUMAN\t1828\t125670; 148700; 615508\tENSG00000134760\n-P07355\tAnnexin A2;Putative annexin A2-like protein\t22\tNX_P07355\tANXA2_HUMAN\t302\t151740\tENSG00000182718\n-P14923\tJunction plakoglobin\t22\tNX_P14923\tPLAK_HUMAN\t3728\t173325; 601214; 611528\tENSG00000173801\n-P02788\tLactotransferrin\t21\tNX_P02788\tTRFL_HUMAN\t4057\t150210\tENSG00000012223\n-Q9HC84\tMucin-5B\t21\tNX_Q9HC84\tMUC5B_HUMAN\t727897\t178500; 600770\tENSG00000117983\n-P29508\tSerpin B3\t20\tNX_P29508\tSPB3_HUMAN\t6317\t600517\tENSG00000057149\n-P63261\tActin, cytoplasmic 2\t19\tNX_P63261\tACTG_HUMAN\t71\t102560; 604717; 614583\tENSG00000184009\n-Q8N1N4\tKeratin, type II cytoskeletal 78\t18\tNX_Q8N1N4\tK2C78_HUMAN\t196374\t611159\tENSG00000170423\n-Q04695\tKeratin, type I cytoskeletal 17\t18\tNX_Q04695\tK1C17_HUMAN\t3872\t148069; 167210; 184500\tENSG00000128422\n-P01876\tIg alpha-1 chain C region\t16\tNX_P01876\tIGHA1_HUMAN\tNA\t146900\tENSG00000211895; ENSG00000282633\n-Q01469\tFatty acid-binding protein 5, epidermal\t15\tNX_Q01469\tFABP5_HUMAN\t2171\t605168\tENSG00000164687\n-P31944\tCaspase-14\t15\tNX_P31944\tCASPE_HUMAN\t23581\t605848; 617320\tENSG00000105141\n-P01833\tPolymeric immunoglobulin receptor\t15\tNX_P01833\tPIGR_HUMAN\t5284\t173880\tENSG00000162896\n-P06733\tAlpha-enolase\t15\tNX_P06733\tENOA_HUMAN\t2023\t172430\tENSG00000074800\n-P25311\tZinc-alpha-2-glycoprotein\t15\tNX_P25311\tZA2G_HUMAN\t563\t194460\tENSG00000160862\n-Q15149\tPlectin\t15\tNX_Q15149\tPLEC_HUMAN\t5339\t131950; 226670; 601282; 612138; 613723; 616487\tENSG00000178209\n-P19013\tKeratin, type II cytoskeletal 4\t13\tNX_P19013\tK2C4_HUMAN\tNA\t123940; 193900\tENSG00000170477\n-Q6KB66\tKeratin, type II cytoskeletal 80\t13\tNX_Q6KB66\tK2C80_HUMAN\t144501\t611161\tENSG00000167767\n-Q08188\tProtein-glutamine gamma-glutamyltransferase E\t12\tNX_Q08188\tTGM3_HUMAN\t7053\t600238; 617251\tENSG00000125780\n-P13646\tKeratin, type I cytoskeletal 13\t11\tNX_P13646\tK1C13_HUMAN\t3860\t148065; 615785\tENSG00000171401\n-Q86YZ3\tHornerin\t11\tNX_Q86YZ3\tHORN_HUMAN\t388697\t616293\tENSG00000197915\n-P04259\tKeratin, type II cytoskeletal 6B\t10\tNX_P04259\tK2C6B_HUMAN\t3854\t148042; 615728\tENSG00000185479\n-P02545\tPrelamin-A/C;Lamin-A/C\t10\tNX_P02545\tLMNA_HUMAN\t4000\t115200; 150330; 151660; 159001; 176670; 181350; 212112; 248370; 275210; 605588; 610140; 613205; 616516\tENSG00000160789\n-P04083\tAnnexin A1\t10\tNX_P04083\tANXA1_HUMAN\t301\t151690\tENSG00000135046\n-P11021\t78 kDa glucose-regulated protein\t10\tNX_P11021\tGRP78_HUMAN\t3309\t138120\tENSG00000044574\n-P02787\tSerotransferrin\t9\tNX_P02787\tTRFE_HUMAN\t7018\t190000; 209300\tENSG00000091513\n-P04040\tCatalase\t9\tNX_P04040\tCATA_HUMAN\t847\t115500; 614097\tENSG00000121691\n-P31151\tProtein S100-A7\t9\tNX_P31151\tS10A7_HUMAN\t6278\t600353\tENSG00000143556\n-P31947\t14-3-3 protein sigma\t9\tNX_P31947\t1433S_HUMAN\t2810\t601290\tENSG00000175793\n-Q96P63\tSerpin B12\t9\tNX_Q96P63\tSPB12_HUMAN\t89777\t615662\tENSG00000166634\n-P14618\tPyruvate kinase PKM\t9\tNX_P14618\tKPYM_HUMAN\t5315\t179050\tENSG00000067225\n-P60174\tTriosephosphate isomerase\t9\tNX_P60174\tTPIS_HUMAN\t7167\t190450; 615512\tENSG00000111669\n-Q06830\tPeroxiredoxin-1\t9\tNX_Q06830\tPRDX1_HUMAN\t5052\t176763\tENSG00000117450\n-P01040\tCystatin-A\t8\tNX_P01040\tCYTA_HUMAN\t1475\t184600; 607936\tENSG00000121552\n-P05089\tArginase-1\t8\tNX_P05089\tARGI1_HUMAN\t383\t207800; 608313\tENSG00000118520\n-P01834\tIg kappa chain C region\t8\tNX_P01834\tIGKC_HUMAN\tNA\t147200; 614102\tNA\n-P04406\tGlyceraldehyde-3-phosphate dehydrogenase\t8\tNX_P04406\tG3P_HUMAN\t2597\t138400\tENSG00000111640\n-P0DMV9\tHeat shock 70 kDa protein 1B\t8\tNX_P0DMV9\tHS71B_HUMAN\t3303; 3304\t140550; '..b'X_P36952\tSPB5_HUMAN\t5268\t154790\tENSG00000206075\n-P40926\tMalate dehydrogenase, mitochondrial\t3\tNX_P40926\tMDHM_HUMAN\t4191\t154100; 617339\tENSG00000146701\n-Q9Y6R7\tIgGFc-binding protein\t3\tNX_Q9Y6R7\tFCGBP_HUMAN\t8857\t617553\tENSG00000281123\n-O95274\tLy6/PLAUR domain-containing protein 3\t2\tNX_O95274\tLYPD3_HUMAN\t27076\t609484\tENSG00000124466\n-P00491\tPurine nucleoside phosphorylase\t2\tNX_P00491\tPNPH_HUMAN\t4860\t164050; 613179\tENSG00000198805\n-P04080\tCystatin-B\t2\tNX_P04080\tCYTB_HUMAN\t1476\t254800; 601145\tENSG00000160213\n-P09972\tFructose-bisphosphate aldolase C\t2\tNX_P09972\tALDOC_HUMAN\t230\t103870\tENSG00000109107\n-P19012\tKeratin, type I cytoskeletal 15\t2\tNX_P19012\tK1C15_HUMAN\t3866\t148030\tENSG00000171346\n-P20930\tFilaggrin\t2\tNX_P20930\tFILA_HUMAN\t2312\t135940; 146700; 605803\tENSG00000143631\n-Q96FX8\tp53 apoptosis effector related to PMP-22\t2\tNX_Q96FX8\tPERP_HUMAN\t64065\t609301\tENSG00000112378\n-Q9UIV8\tSerpin B13\t2\tNX_Q9UIV8\tSPB13_HUMAN\t5275\t604445\tENSG00000197641\n-P01625\tIg kappa chain V-IV region Len\t2\tNA\tNA\tNA\tNA\tNA\n-P01765\tIg heavy chain V-III region TIL\t2\tNA\tNA\tNA\tNA\tNA\n-P01766\tIg heavy chain V-III region BRO\t2\tNX_P01766\tHV313_HUMAN\tNA\tNA\tENSG00000211942; ENSG00000282286\n-P01860\tIg gamma-3 chain C region\t2\tNX_P01860\tIGHG3_HUMAN\tNA\t147120\tNA\n-P01871\tIg mu chain C region\t2\tNX_P01871\tIGHM_HUMAN\tNA\t147020; 601495\tENSG00000211899; ENSG00000282657\n-P05090\tApolipoprotein D\t2\tNX_P05090\tAPOD_HUMAN\t347\t107740\tENSG00000189058\n-P06870\tKallikrein-1\t2\tNX_P06870\tKLK1_HUMAN\t3816\t147910; 615953\tENSG00000167748\n-P07858\tCathepsin B\t2\tNX_P07858\tCATB_HUMAN\t1508\t116810\tENSG00000164733\n-P08865\t40S ribosomal protein SA\t2\tNX_P08865\tRSSA_HUMAN\t3921\t150370; 271400\tENSG00000168028\n-P11279\tLysosome-associated membrane glycoprotein 1\t2\tNX_P11279\tLAMP1_HUMAN\t3916\t153330\tENSG00000185896\n-P13473\tLysosome-associated membrane glycoprotein 2\t2\tNX_P13473\tLAMP2_HUMAN\t3920\t300257; 309060\tENSG00000005893\n-P19971\tThymidine phosphorylase\t2\tNX_P19971\tTYPH_HUMAN\t1890\t131222; 603041\tENSG00000025708\n-P23284\tPeptidyl-prolyl cis-trans isomerase B\t2\tNX_P23284\tPPIB_HUMAN\t5479\t123841; 259440\tENSG00000166794\n-P23396\t40S ribosomal protein S3\t2\tNX_P23396\tRS3_HUMAN\t6188\t600454\tENSG00000149273\n-P25705\tATP synthase subunit alpha, mitochondrial\t2\tNX_P25705\tATPA_HUMAN\t498\t164360; 615228; 616045\tENSG00000152234\n-P27482\tCalmodulin-like protein 3\t2\tNX_P27482\tCALL3_HUMAN\t810\t114184\tENSG00000178363\n-P31949\tProtein S100-A11\t2\tNX_P31949\tS10AB_HUMAN\t6282\t603114\tENSG00000163191\n-P40121\tMacrophage-capping protein\t2\tNX_P40121\tCAPG_HUMAN\t822\t153615\tENSG00000042493\n-P42357\tHistidine ammonia-lyase\t2\tNX_P42357\tHUTH_HUMAN\t3034\t235800; 609457\tENSG00000084110\n-P47756\tF-actin-capping protein subunit beta\t2\tNX_P47756\tCAPZB_HUMAN\t832\t601572\tENSG00000077549\n-P48637\tGlutathione synthetase\t2\tNX_P48637\tGSHB_HUMAN\t2937\t231900; 266130; 601002\tENSG00000100983\n-P49720\tProteasome subunit beta type-3\t2\tNX_P49720\tPSB3_HUMAN\t5691\t602176\tENSG00000277791; ENSG00000275903\n-P50395\tRab GDP dissociation inhibitor beta\t2\tNX_P50395\tGDIB_HUMAN\t2665\t600767\tENSG00000057608\n-P59998\tActin-related protein 2/3 complex subunit 4\t2\tNX_P59998\tARPC4_HUMAN\t10093\t604226\tENSG00000241553\n-P61160\tActin-related protein 2\t2\tNX_P61160\tARP2_HUMAN\t10097\t604221\tENSG00000138071\n-P61916\tEpididymal secretory protein E1\t2\tNX_P61916\tNPC2_HUMAN\t10577\t601015; 607625\tENSG00000119655\n-P04745\tAlpha-amylase 1\t23\tNX_P04745\tAMY1_HUMAN\t276; 277; 278\t104700; 104701; 104702\tENSG00000174876; ENSG00000187733; ENSG00000237763\n-Q9NZT1\tCalmodulin-like protein 5\t8\tNX_Q9NZT1\tCALL5_HUMAN\t51806\t605183\tENSG00000178372\n-P12273\tProlactin-inducible protein\t6\tNX_P12273\tPIP_HUMAN\t5304\t176720\tENSG00000159763\n-Q96DA0\tZymogen granule protein 16 homolog B\t5\tNX_Q96DA0\tZG16B_HUMAN\t124220\tNA\tENSG00000162078; ENSG00000283056\n-P01036\tCystatin-S\t5\tNX_P01036\tCYTS_HUMAN\t1472\t123857\tENSG00000101441\n-Q8TAX7\tMucin-7\t2\tNX_Q8TAX7\tMUC7_HUMAN\t4589\t158375; 600807\tENSG00000171195\n-P01037\tCystatin-SN\t2\tNX_P01037\tCYTN_HUMAN\t1469\t123855\tENSG00000170373\n-P09228\tCystatin-SA\t2\tNX_P09228\tCYTT_HUMAN\t1470\t123856\tENSG00000170369\n'
b
diff -r 715002a394ec -r 781072a65600 test-data/profile.BP.pdf
b
Binary file test-data/profile.BP.pdf has changed
b
diff -r 715002a394ec -r 781072a65600 test-data/profile.CC.pdf
b
Binary file test-data/profile.CC.pdf has changed
b
diff -r 715002a394ec -r 781072a65600 test-data/profile.MF.pdf
b
Binary file test-data/profile.MF.pdf has changed