Repository 'proteore_clusterprofiler'
hg clone https://toolshed.g2.bx.psu.edu/repos/proteore/proteore_clusterprofiler

Changeset 0:bd052861852b (2018-03-01)
Next changeset 1:09ba28df72ad (2018-03-01)
Commit message:
planemo upload commit ffa3be72b850aecbfbd636de815967c06a8f643f-dirty
added:
GO-enrich.R
README.rst
cluster_profiler.xml
test-data/Lacombe_et_al_2017_OK.txt
test-data/clusterProfiler_diagram_outputs__GGO.CC.png
test-data/clusterProfiler_text_output.tabular
b
diff -r 000000000000 -r bd052861852b GO-enrich.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GO-enrich.R Thu Mar 01 10:05:18 2018 -0500
[
@@ -0,0 +1,167 @@
+library(clusterProfiler)
+
+#library(org.Sc.sgd.db)
+library(org.Hs.eg.db)
+library(org.Mm.eg.db)
+
+# Read file and return file content as data.frame?
+readfile = function(filename, header) {
+  if (header == "true") {
+    # Read only the first line of the files as data (without headers):
+    headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE)
+    #Read the data of the files (skipping the first row):
+    file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE)
+    # Remove empty rows
+    file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
+    #And assign the headers of step two to the data:
+    names(file) <- headers
+  }
+  else {
+    file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE)
+    file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE]
+  }
+  return(file)
+}
+
+repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) {
+  ggo<-groupGO(gene=geneid, 
+               OrgDb = orgdb, 
+               ont=ontology, 
+               level=level, 
+               readable=TRUE)
+  name <- paste("GGO.", ontology, ".png", sep = "")
+  png(name)
+  p <- barplot(ggo)
+  print(p)
+  dev.off()
+  return(ggo)
+}
+
+# GO over-representation test
+enrich.GO <- function(geneid, orgdb, ontology, pval_cutoff, qval_cutoff) {
+  ego<-enrichGO(gene=geneid,
+                OrgDb=orgdb,
+                keytype="ENTREZID",
+                ont=ontology,
+                pAdjustMethod="BH",
+                pvalueCutoff=pval_cutoff,
+                qvalueCutoff=qval_cutoff,
+                readable=TRUE)
+  bar_name <- paste("EGO.", ontology, ".bar.png", sep = "")
+  png(bar_name)
+  p <- barplot(ego)
+  print(p)
+  dev.off()
+  dot_name <- paste("EGO.", ontology, ".dot.png", sep = "")
+  png(dot_name)
+  p <- dotplot(ego)
+  print(p)
+  dev.off()
+  return(ego)
+}
+
+clusterProfiler = function() {
+  args <- commandArgs(TRUE)
+  if(length(args)<1) {
+    args <- c("--help")
+  }
+  
+  # Help section
+  if("--help" %in% args) {
+    cat("clusterProfiler Enrichment Analysis
+    Arguments:
+        --input_type: type of input (list of id or filename)
+        --input: input
+        --ncol: the column number which you would like to apply...
+        --header: true/false if your file contains a header
+        --id_type: the type of input IDs (UniProt/EntrezID)
+        --species
+        --onto_opt: ontology options
+        --go_function: groupGO/enrichGO
+        --level: 1-3
+        --pval_cutoff
+        --qval_cutoff
+        --text_output: text output filename \n")
+    q(save="no")
+  }
+  # Parse arguments
+  parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
+  argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
+  args <- as.list(as.character(argsDF$V2))
+  names(args) <- argsDF$V1
+
+  input_type = args$input_type
+  if (input_type == "text") {
+    input = args$input
+  }
+  else if (input_type == "file") {
+    filename = args$input
+    ncol = args$ncol
+    # Check ncol
+    if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
+      stop("Please enter an integer for level")
+    }
+    else {
+      ncol = as.numeric(gsub("c", "", ncol))
+    }
+    header = args$header
+    # Get file content
+    file = readfile(filename, header)
+    # Extract Protein IDs list
+    input = c()
+    for (row in as.character(file[,ncol])) {
+      input = c(input, strsplit(row, ";")[[1]][1])
+    }
+  }
+  id_type = args$id_type
+
+  
+  #ID format Conversion 
+  #This case : from UNIPROT (protein id) to ENTREZ (gene id)
+  #bitr = conversion function from clusterProfiler
+
+  if (args$species=="human") {
+    orgdb<-org.Hs.eg.db
+  }
+  else if (args$species=="mouse") {
+    orgdb<-org.Mm.eg.db
+  }
+  else if (args$species=="rat") {
+    orgdb<-org.Rn.eg.db
+  }
+  
+  ##to initialize
+  if (id_type=="Uniprot") {
+    idFrom<-"UNIPROT"
+    idTo<-"ENTREZID"
+    gene<-bitr(input, fromType=idFrom, toType=idTo, OrgDb=orgdb)
+  }
+  else if (id_type=="Entrez") {
+    gene<-input
+  }
+
+  ontology <- strsplit(args$onto_opt, ",")[[1]]
+  if (args$go_represent == "true") {
+    go_represent <- args$go_represent
+    level <- as.numeric(args$level)
+  }
+  if (args$go_enrich == "true") {
+    go_enrich <- args$go_enrich
+    pval_cutoff <- as.numeric(args$pval_cutoff)
+    qval_cutoff <- as.numeric(args$qval_cutoff)
+  }
+
+  ##enrichGO : GO over-representation test
+  for (onto in ontology) {
+    if (args$go_represent == "true") {
+      ggo<-repartition.GO(gene$ENTREZID, orgdb, onto, level, readable=TRUE)
+      write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+    }
+    if (args$go_enrich == "true") {
+      ego<-enrich.GO(gene$ENTREZID, orgdb, onto, pval_cutoff, qval_cutoff)
+      write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+    }
+  }
+}
+
+clusterProfiler()
b
diff -r 000000000000 -r bd052861852b README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Thu Mar 01 10:05:18 2018 -0500
[
@@ -0,0 +1,45 @@
+Wrapper for clusterProfiler Enrichment Analysis Tool
+====================================================
+
+**Authors**

+clusterProfiler R package reference : 
+G Yu, LG Wang, Y Han, QY He. clusterProfiler: an R package for comparing biological themes among gene clusters. 
+OMICS: A Journal of Integrative Biology 2012, 16(5):284-287. 
+doi:[10.1089/omi.2011.0118](http://dx.doi.org/10.1089/omi.2011.0118)
+
+
+**Galaxy integration**
+
+T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+
+Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform
+
+This work has been partially funded through the French National Agency for Research (ANR) IFB project.
+
+Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
+
+
+===================
+
+**Galaxy component based on R package clusterProfiler (see ref below)**

+This component allows to perform GO enrichment-analyses. 
+Given a list of IDs, the tool either 
+(i)  performs gene classification based on GO distribution at a specific level, or
+(ii) calculates GO categories enrichment (over- or under-representation) for the IDs of the input list, 
+compared to a background (whole organism or user-defined list). 
+
+**Input required**
+    
+This component works with Gene ids (e.g : 4151, 7412) or Uniprot accession number (e.g. P31946). 
+You can copy/paste these identifiers or supply a tabular file (.csv, .tsv, .txt, .tab) where there are contained.
+

+**Output**
+
+Text (tables) and graphics representing the repartition and/or enrichment of GO categories. 
+
+**User manual / Documentation** of the clusterProfiler R package (functions and parameters):
+https://bioconductor.org/packages/3.7/bioc/vignettes/clusterProfiler/inst/doc/clusterProfiler.html
+(Very well explained)
\ No newline at end of file
b
diff -r 000000000000 -r bd052861852b cluster_profiler.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cluster_profiler.xml Thu Mar 01 10:05:18 2018 -0500
[
b'@@ -0,0 +1,205 @@\n+<tool id="cluter_profiler" name="clusterProfiler Enrichment Analysis" version="0.1.0">\n+    <requirements>\n+        <requirement type="package" version="3.4.1">R</requirement>\n+        <requirement type="package" version="3.5.0">bioconductor-org.hs.eg.db</requirement>\n+        <requirement type="package" version="3.5.0">bioconductor-org.mm.eg.db</requirement>\n+        <requirement type="package" version="3.2.0">bioconductor-dose</requirement>\n+        <requirement type="package" version="3.0.5">bioconductor-clusterprofiler</requirement>\n+    </requirements>\n+    <command detect_errors="exit_code"><![CDATA[\n+        Rscript "$__tool_directory__/GO-enrich.R"\n+        #if $input.ids == "text"\n+            --input_type="text"\n+            --input="$input.text"\n+        #else\n+            --input_type="file"\n+            --input="$input.file"\n+            --ncol="$input.ncol"\n+            --header="$input.header"\n+        #end if\n+        \n+        --id_type="$idti.idtypein"\n+\n+        --species="$species"\n+\n+        #if $ggo.go_represent == "true"\n+            --go_represent="true"\n+            --level="$ggo.level"\n+        #else\n+            --go_represent="false"\n+        #end if\n+\n+        #if $ego.go_enrich == "true"\n+            --go_enrich="true"\n+            --pval_cutoff="$ego.pval"\n+            --qval_cutoff="$ego.qval"\n+        #else\n+            --go_enrich="false"\n+        #end if\n+        \n+        --onto_opt="$ontology"\n+\n+        --text_output="$text_output"\n+    ]]></command>\n+    <inputs>\n+        <conditional name="input" >\n+            <param name="ids" type="select" label="Provide your identifiers" help="Copy/paste or ID list from a file (e.g. table)" >\n+                <option value="text">Copy/paste your identifiers</option>\n+                <option value="file">Input file containing your identifiers</option>\n+            </param>\n+            <when value="text" >\n+                <param name="txt" type="text" label="Copy/paste your identifiers" help=\'IDs must be separated by spaces into the form field, for example: P31946 P62258\' >\n+                    <sanitizer>\n+                        <valid initial="string.printable">\n+                            <remove value="&apos;"/>\n+                        </valid>\n+                        <mapping initial="none">\n+                            <add source="&apos;" target="__sq__"/>\n+                        </mapping>\n+                    </sanitizer>\n+                </param>\n+            </when>\n+            <when value="file" >\n+                <param name="file" type="data" format="txt,tabular" label="Choose a file that contains your list of IDs" help="" />\n+                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" />\n+                <param name="ncol" type="text" label="The column number of IDs to map" help=\'For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on\' />                \n+            </when>\n+        </conditional>\n+        <conditional name="idti" >\n+            <param name="idtypein" type="select" label="Select type/source of identifier of your list" help="Please see example of IDs in help section" >\n+                <option value="Uniprot">UniProt accession number</option>\n+                <option value="Entrez">Entrez Gene ID</option>\n+            </param>\n+            <when value="Uniprot"/>\n+            <when value="Entrez"/>\n+        </conditional>\n+        <param name="species" type="select" label="Select a species" >\n+            <option value="human">Human</option>\n+            <option value="mouse">Mouse</option>\n+            <option value="rat">Rat</option>\n+        </param>\n+        <conditional name="ggo">\n+            <param name="go_represent" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Do you want to perform GO categories representation analysis?"/>\n+      '..b'  <when value="ego" >\n+\t\t\t    <param name="pval" type="float" value="0.01" label="P-value cut off"/>\n+\t\t\t    <param name="qval" type="float" value="0.05" label="Q-value cut off"/>\n+\t\t    </when>\n+\t    </conditional-->\n+\t\t\t\t\n+\t\t<param name="ontology" type="select" display="checkboxes" multiple="true" label="Please select GO terms category">\n+            <option value="CC">Cellular Component</option>\n+            <option value="BP">Biological Process</option>\n+            <option value="MF">Molecular Function</option>\n+        </param>\n+\t    \n+\t    \n+\t    \n+    </inputs>\n+    <outputs>\n+        <data name="text_output" format="tabular" label="clusterProfiler text output" />\n+        <collection type="list" label="clusterProfiler diagram outputs" name="output" >\n+\t    <discover_datasets pattern="(?P&lt;designation&gt;.+\\.png)" ext="png" />\n+\t</collection>\n+    </outputs>\n+    <tests>\n+        <test>\n+            <conditional name="input">\n+                <param name="ids" value="file"/>\n+                <param name="file" value="Lacombe_et_al_2017_OK.txt"/>\n+                <param name="header" value="true"/>\n+                <param name="ncol" value="c1"/>\n+            </conditional>\n+            <conditional name="idti">\n+                <param name="idtypein" value="Uniprot"/>\n+            </conditional>\n+            <param name="species" value="human"/>\n+            <conditional name="ggo">\n+                <param name="go_represent" value="true"/>\n+                <param name="level" value="3"/>\n+            </conditional>\n+            <conditional name="ego">\n+                <param name="go_enrich" value="false"/>\n+            </conditional>\n+            <param name="ontology" value="CC"/>\n+            <output name="text_output" file="clusterProfiler_text_output.tabular"/>\n+            <output_collection name="output">\n+                <element name="clusterProfiler_diagram_outputs__GGO.CC.png" file="clusterProfiler_diagram_outputs__GGO.CC.png" ftype="png"/>\n+            </output_collection>\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+       \n+ \t**Galaxy component based on R package clusterProfiler (see ref below)**\n+ \t\n+\tThis component allows to perform GO enrichment-analyses. \n+\tGiven a list of IDs, the tool either \n+\t(i)  performs gene classification based on GO distribution at a specific level, or\n+\t(ii) calculates GO categories enrichment (over- or under-representation) for the IDs of the input list, \n+\tcompared to a background (whole organism or user-defined list). \n+\n+\t**Input required**\n+    \n+\tThis component works with Gene ids (e.g : 4151, 7412) or Uniprot accession number (e.g. P31946). \n+\tYou can copy/paste these identifiers or supply a tabular file (.csv, .tsv, .txt, .tab) where there are contained.\n+\n+ \n+\t**Output**\n+\n+\tText (tables) and graphics representing the repartition and/or enrichment of GO categories. \n+\n+\t**User manual / Documentation** of the clusterProfiler R package (functions and parameters):\n+\thttps://bioconductor.org/packages/3.7/bioc/vignettes/clusterProfiler/inst/doc/clusterProfiler.html\n+\t(Very well explained)\n+\n+\t**Reference**\n+ \n+\tclusterProfiler R package reference : \n+\tG Yu, LG Wang, Y Han, QY He. clusterProfiler: an R package for comparing biological themes among gene clusters. \n+\tOMICS: A Journal of Integrative Biology 2012, 16(5):284-287. \n+\tdoi:[10.1089/omi.2011.0118](http://dx.doi.org/10.1089/omi.2011.0118)\n+\n+\t\n+\t**Galaxy integration**\n+\n+\tT.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR\n+\n+\tSandra D\xc3\xa9rozier, Olivier Ru\xc3\xa9, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform\n+\n+\tThis work has been partially funded through the French National Agency for Research (ANR) IFB project.\n+\n+\tContact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.\n+\n+\n+    ]]></help>\n+    <citations>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r bd052861852b test-data/Lacombe_et_al_2017_OK.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Lacombe_et_al_2017_OK.txt Thu Mar 01 10:05:18 2018 -0500
[
@@ -0,0 +1,165 @@
+Protein accession number (UniProt) Protein name Number of peptides (razor + unique)
+P15924 Desmoplakin 69
+P02538 Keratin, type II cytoskeletal 6A 53
+P02768 Serum albumin 44
+P08779 Keratin, type I cytoskeletal 16 29
+Q02413 Desmoglein-1 24
+P07355 "Annexin A2;Putative annexin A2-like protein" 22
+P14923 Junction plakoglobin 22
+P02788 Lactotransferrin 21
+Q9HC84 Mucin-5B 21
+P29508 Serpin B3 20
+P63261 Actin, cytoplasmic 2 19
+Q8N1N4 Keratin, type II cytoskeletal 78 18
+Q04695 Keratin, type I cytoskeletal 17 18
+P01876 Ig alpha-1 chain C region 16
+Q01469 Fatty acid-binding protein 5, epidermal 15
+P31944 Caspase-14 15
+P01833 Polymeric immunoglobulin receptor 15
+P06733 Alpha-enolase 15
+P25311 Zinc-alpha-2-glycoprotein 15
+Q15149 Plectin 15
+P19013 Keratin, type II cytoskeletal 4 13
+Q6KB66 Keratin, type II cytoskeletal 80 13
+Q08188 Protein-glutamine gamma-glutamyltransferase E 12
+P13646 Keratin, type I cytoskeletal 13 11
+Q86YZ3 Hornerin 11
+P04259 Keratin, type II cytoskeletal 6B 10
+P02545 "Prelamin-A/C;Lamin-A/C" 10
+P04083 Annexin A1 10
+P11021 78 kDa glucose-regulated protein 10
+P02787 Serotransferrin 9
+P04040 Catalase 9
+P31151 Protein S100-A7 9
+P31947 14-3-3 protein sigma 9
+Q96P63 Serpin B12 9
+P14618 Pyruvate kinase PKM 9
+P60174 Triosephosphate isomerase 9
+Q06830 Peroxiredoxin-1 9
+P01040 Cystatin-A 8
+P05089 Arginase-1 8
+P01834 Ig kappa chain C region 8
+P04406 Glyceraldehyde-3-phosphate dehydrogenase 8
+P0DMV9 Heat shock 70 kDa protein 1B 8
+P13639 Elongation factor 2 8
+P35579 Myosin-9 8
+P68371 Tubulin beta-4B chain 8
+Q8WVV4 Protein POF1B 8
+O75635 Serpin B7 7
+P01857 Ig gamma-1 chain C region 7
+P61626 Lysozyme C 7
+P68363 Tubulin alpha-1B chain 7
+P01009 "Alpha-1-antitrypsin;Short peptide from AAT" 6
+P07900 Heat shock protein HSP 90-alpha 6
+Q9NZH8 Interleukin-36 gamma 6
+O43707 "Alpha-actinin-4;Alpha-actinin-1" 6
+O75223 Gamma-glutamylcyclotransferase 6
+P00338 L-lactate dehydrogenase A chain 6
+P07339 Cathepsin D 6
+P62987 Ubiquitin-60S ribosomal protein L40 6
+P10599 Thioredoxin 6
+Q9UGM3 Deleted in malignant brain tumors 1 protein 6
+Q9UI42 Carboxypeptidase A4 6
+P47929 Galectin-7 5
+Q13867 Bleomycin hydrolase 5
+Q6P4A8 Phospholipase B-like 1 5
+O75369 Filamin-B 5
+P00441 Superoxide dismutase [Cu-Zn] 5
+P04792 Heat shock protein beta-1 5
+P11142 Heat shock cognate 71 kDa protein 5
+P58107 Epiplakin 5
+P60842 Eukaryotic initiation factor 4A-I 5
+P62937 Peptidyl-prolyl cis-trans isomerase A 5
+P63104 14-3-3 protein zeta/delta 5
+Q92820 Gamma-glutamyl hydrolase 5
+O75342 Arachidonate 12-lipoxygenase, 12R-type 4
+P09211 Glutathione S-transferase P 4
+P31025 Lipocalin-1 4
+P48594 Serpin B4 4
+Q14574 Desmocollin-3 4
+Q5T750 Skin-specific protein 32 4
+Q6UWP8 Suprabasin 4
+O60911 Cathepsin L2 4
+P00558 Phosphoglycerate kinase 1 4
+P04075 Fructose-bisphosphate aldolase A 4
+P07384 Calpain-1 catalytic subunit 4
+P0CG05 Ig lambda-2 chain C regions 4
+P18206 Vinculin 4
+P62258 14-3-3 protein epsilon 4
+P68871 Hemoglobin subunit beta 4
+Q9C075 Keratin, type I cytoskeletal 23 4
+A8K2U0 Alpha-2-macroglobulin-like protein 1 3
+P00738 Haptoglobin 3
+P01011 Alpha-1-antichymotrypsin 3
+P02763 Alpha-1-acid glycoprotein 1 3
+P18510 Interleukin-1 receptor antagonist protein 3
+P22528 Cornifin-B 3
+P30740 Leukocyte elastase inhibitor 3
+P80188 Neutrophil gelatinase-associated lipocalin 3
+Q15828 Cystatin-M 3
+Q9HCY8 Protein S100-A14 3
+P01623 Ig kappa chain V-III region 3
+P01877 Ig alpha-2 chain C region 3
+P06396 Gelsolin 3
+P14735 Insulin-degrading enzyme 3
+P20933 N(4)-(beta-N-acetylglucosaminyl)-L-asparaginase 3
+P25788 Proteasome subunit alpha type-3 3
+P26641 Elongation factor 1-gamma 3
+P36952 Serpin B5 3
+P40926 Malate dehydrogenase, mitochondrial 3
+Q9Y6R7 IgGFc-binding protein 3
+O95274 Ly6/PLAUR domain-containing protein 3 2
+P00491 Purine nucleoside phosphorylase 2
+P04080 Cystatin-B 2
+P09972 Fructose-bisphosphate aldolase C 2
+P19012 Keratin, type I cytoskeletal 15 2
+P20930 Filaggrin 2
+Q96FX8 p53 apoptosis effector related to PMP-22 2
+Q9UIV8 Serpin B13 2
+P01625 Ig kappa chain V-IV region Len 2
+P01765 Ig heavy chain V-III region TIL 2
+P01766 Ig heavy chain V-III region BRO 2
+P01860 Ig gamma-3 chain C region 2
+P01871 Ig mu chain C region 2
+P05090 Apolipoprotein D 2
+P06870 Kallikrein-1 2
+P07858 Cathepsin B 2
+P08865 40S ribosomal protein SA 2
+P11279 Lysosome-associated membrane glycoprotein 1 2
+P13473 Lysosome-associated membrane glycoprotein 2 2
+P19971 Thymidine phosphorylase 2
+P23284 Peptidyl-prolyl cis-trans isomerase B 2
+P23396 40S ribosomal protein S3 2
+P25705 ATP synthase subunit alpha, mitochondrial 2
+P27482 Calmodulin-like protein 3 2
+P31949 Protein S100-A11 2
+P40121 Macrophage-capping protein 2
+P42357 Histidine ammonia-lyase 2
+P47756 F-actin-capping protein subunit beta 2
+P48637 Glutathione synthetase 2
+P49720 Proteasome subunit beta type-3 2
+P50395 Rab GDP dissociation inhibitor beta 2
+P59998 Actin-related protein 2/3 complex subunit 4 2
+P61160 Actin-related protein 2 2
+P61916 Epididymal secretory protein E1 2
+P04745 Alpha-amylase 1 23
+Q9NZT1 Calmodulin-like protein 5 8
+P12273 Prolactin-inducible protein 6
+Q96DA0 Zymogen granule protein 16 homolog B 5
+P01036 Cystatin-S 5
+Q8TAX7 Mucin-7 2
+P01037 Cystatin-SN 2
+P09228 Cystatin-SA 2
+P04264 Keratin, type II cytoskeletal 1 61
+P35908 Keratin, type II cytoskeletal 2 epidermal 40
+P13645 Keratin, type I cytoskeletal 10 40
+Q5D862 Filaggrin-2 14
+Q5T749 Keratinocyte proline-rich protein 13
+Q8IW75 Serpin A12 3
+P81605 Dermcidin 3
+P22531 Small proline-rich protein 2E 3
+P59666 Neutrophil defensin 3 2
+P78386 Keratin, type II cuticular Hb5 2
+
+
+
b
diff -r 000000000000 -r bd052861852b test-data/clusterProfiler_diagram_outputs__GGO.CC.png
b
Binary file test-data/clusterProfiler_diagram_outputs__GGO.CC.png has changed
b
diff -r 000000000000 -r bd052861852b test-data/clusterProfiler_text_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/clusterProfiler_text_output.tabular Thu Mar 01 10:05:18 2018 -0500
b
b'@@ -0,0 +1,378 @@\n+ID\tDescription\tCount\tGeneRatio\tgeneID\n+GO:0005886\tplasma membrane\t56\t56/153\tDSP/DSG1/ANXA2/JUP/MUC5B/ACTG1/FABP5/PIGR/ENO1/AZGP1/PLEC/TGM3/HRNR/ANXA1/HSPA5/TF/CAT/SERPINB12/CSTA/GAPDH/EEF2/MYH9/HSP90AA1/UBA52/FLNB/SOD1/HSPB1/HSPA8/EPPK1/GSTP1/DSC3/C1orf68/CTSV/CAPN1/VCL/YWHAE/IL1RN/SPRR1B/CST6/GSN/IDE/LYPD3/FLG/PERP/RPSA/LAMP1/LAMP2/RPS3/ATP5A1/PIP/MUC7/KRT1/KRT2/KRT10/SERPINA12/SPRR2E\n+GO:0005628\tprospore membrane\t0\t0/153\t\n+GO:0005789\tendoplasmic reticulum membrane\t1\t1/153\tHSPA5\n+GO:0019867\touter membrane\t2\t2/153\tARG1/UBA52\n+GO:0031090\torganelle membrane\t24\t24/153\tDSP/DSG1/ANXA2/FABP5/PIGR/LMNA/ANXA1/TF/CAT/SFN/SERPINB12/ARG1/GAPDH/SERPINA1/UBA52/DMBT1/HSPA8/YWHAZ/YWHAE/MDH2/LAMP1/LAMP2/RPS3/ATP5A1\n+GO:0034357\tphotosynthetic membrane\t0\t0/153\t\n+GO:0036362\tascus membrane\t0\t0/153\t\n+GO:0042175\tnuclear outer membrane-endoplasmic reticulum membrane network\t1\t1/153\tHSPA5\n+GO:0044425\tmembrane part\t28\t28/153\tDSP/DSG1/ANXA2/JUP/PIGR/TGM3/ANXA1/HSPA5/TF/EEF2/MYH9/TUBA1B/HSP90AA1/CTSD/DMBT1/FLNB/HSPA8/EPPK1/DSC3/CTSV/PGK1/LYPD3/PERP/LAMP1/LAMP2/RPS3/ATP5A1/PIP\n+GO:0048475\tcoated membrane\t0\t0/153\t\n+GO:0055036\tvirion membrane\t0\t0/153\t\n+GO:0098552\tside of membrane\t8\t8/153\tDSG1/JUP/TGM3/ANXA1/TF/HSPA8/CTSV/LAMP1\n+GO:0098589\tmembrane region\t7\t7/153\tANXA2/TF/EEF2/TUBA1B/CTSD/PGK1/LAMP2\n+GO:0098590\tplasma membrane region\t10\t10/153\tDSP/DSG1/ANXA2/JUP/ANXA1/TF/HSP90AA1/EPPK1/RPS3/PIP\n+GO:0098796\tmembrane protein complex\t3\t3/153\tJUP/MYH9/ATP5A1\n+GO:0098805\twhole membrane\t19\t19/153\tDSP/DSG1/ANXA2/FABP5/PIGR/ANXA1/TF/CAT/SERPINB12/ARG1/EEF2/TUBA1B/CTSD/UBA52/DMBT1/HSPA8/PGK1/LAMP1/LAMP2\n+GO:1990195\tmacrolide transmembrane transporter complex\t0\t0/153\t\n+GO:1990196\tMacAB-TolC complex\t0\t0/153\t\n+GO:1990578\tperinuclear endoplasmic reticulum membrane\t0\t0/153\t\n+GO:1990850\tH-gal-GP complex\t0\t0/153\t\n+GO:0010367\textracellular isoamylase complex\t0\t0/153\t\n+GO:0031012\textracellular matrix\t28\t28/153\tDSP/DSG1/ANXA2/JUP/ACTG1/CASP14/PLEC/LMNA/HSPA5/PKM/PRDX1/CSTA/GAPDH/EEF2/MYH9/TUBB4B/SERPINA1/CTSD/FLNB/SOD1/HSPB1/HSPA8/EIF4A1/SBSN/RPS3/ATP5A1/KRT1/DCD\n+GO:0043083\tsynaptic cleft\t0\t0/153\t\n+GO:0043230\textracellular organelle\t130\t130/153\tDSP/KRT6A/ALB/KRT16/DSG1/ANXA2/JUP/LTF/MUC5B/SERPINB3/ACTG1/KRT78/KRT17/FABP5/CASP14/PIGR/ENO1/AZGP1/PLEC/TGM3/KRT13/HRNR/KRT6B/ANXA1/HSPA5/TF/CAT/S100A7/SFN/SERPINB12/PKM/TPI1/PRDX1/CSTA/ARG1/GAPDH/HSPA1A/HSPA1B/EEF2/MYH9/TUBB4B/LYZ/TUBA1B/SERPINA1/HSP90AA1/ACTN4/GGCT/LDHA/CTSD/UBA52/TXN/DMBT1/LGALS7B/BLMH/FLNB/SOD1/HSPB1/HSPA8/EIF4A1/PPIA/YWHAZ/GGH/GSTP1/LCN1/SERPINB4/C1orf68/SBSN/CTSV/PGK1/ALDOA/CAPN1/VCL/YWHAE/HBB/A2ML1/HP/SERPINA3/ORM1/IL1RN/SPRR1B/SERPINB1/LCN2/CST6/S100A14/GSN/AGA/PSMA3/EEF1G/SERPINB5/MDH2/FCGBP/PNP/CSTB/ALDOC/KRT15/SERPINB13/APOD/KLK1/CTSB/RPSA/LAMP1/LAMP2/PPIB/RPS3/ATP5A1/CALML3/S100A11/CAPG/CAPZB/GSS/PSMB3/GDI2/ARPC4/ACTR2/NPC2/AMY1A/AMY1B/AMY1C/CALML5/PIP/ZG16B/CST4/MUC7/KRT1/KRT2/KRT10/FLG2/KPRP/DCD/DEFA3\n+GO:0044421\textracellular region part\t141\t141/153\tDSP/KRT6A/ALB/KRT16/DSG1/ANXA2/JUP/LTF/MUC5B/SERPINB3/ACTG1/KRT78/KRT17/FABP5/CASP14/PIGR/ENO1/AZGP1/PLEC/TGM3/KRT13/HRNR/KRT6B/LMNA/ANXA1/HSPA5/TF/CAT/S100A7/SFN/SERPINB12/PKM/TPI1/PRDX1/CSTA/ARG1/GAPDH/HSPA1A/HSPA1B/EEF2/MYH9/TUBB4B/SERPINB7/LYZ/TUBA1B/SERPINA1/HSP90AA1/IL36G/ACTN4/GGCT/LDHA/CTSD/UBA52/TXN/DMBT1/CPA4/LGALS7B/BLMH/PLBD1/FLNB/SOD1/HSPB1/HSPA8/EIF4A1/PPIA/YWHAZ/GGH/GSTP1/LCN1/SERPINB4/C1orf68/SBSN/CTSV/PGK1/ALDOA/CAPN1/VCL/YWHAE/HBB/A2ML1/HP/SERPINA3/ORM1/IL1RN/SPRR1B/SERPINB1/LCN2/CST6/S100A14/GSN/IDE/AGA/PSMA3/EEF1G/SERPINB5/MDH2/FCGBP/LYPD3/PNP/CSTB/ALDOC/KRT15/SERPINB13/APOD/KLK1/CTSB/RPSA/LAMP1/LAMP2/PPIB/RPS3/ATP5A1/CALML3/S100A11/CAPG/CAPZB/GSS/PSMB3/GDI2/ARPC4/ACTR2/NPC2/AMY1A/AMY1B/AMY1C/CALML5/PIP/ZG16B/CST4/MUC7/CST1/CST2/KRT1/KRT2/KRT10/FLG2/KPRP/SERPINA12/DCD/DEFA3/KRT85\n+GO:0048046\tapoplast\t0\t0/153\t\n+GO:0070062\textracellular exosome\t130\t130/153\tDSP/KRT6A/ALB/KRT16/DSG1/ANXA2/JUP/LTF/MUC5B/SERPINB3/ACTG1/KRT78/KRT17/FABP5/CASP14/PIGR/ENO1/AZGP1/PLEC/TGM3/KRT13/HRNR/KRT6B/ANXA1/HSPA5/TF/CAT/S100A7/SFN/SERPINB'..b'A2\n+GO:0043684\ttype IV secretion system complex\t0\t0/153\t\n+GO:0044099\tpolar tube\t0\t0/153\t\n+GO:0044297\tcell body\t6\t6/153\tARG1/FLNB/SOD1/CTSV/APOD/LAMP1\n+GO:0044424\tintracellular part\t133\t133/153\tDSP/KRT6A/ALB/KRT16/DSG1/ANXA2/JUP/LTF/MUC5B/SERPINB3/ACTG1/KRT78/KRT17/FABP5/CASP14/PIGR/ENO1/AZGP1/PLEC/KRT80/TGM3/KRT13/HRNR/KRT6B/LMNA/ANXA1/HSPA5/TF/CAT/S100A7/SFN/SERPINB12/PKM/TPI1/PRDX1/CSTA/ARG1/GAPDH/HSPA1A/HSPA1B/EEF2/MYH9/TUBB4B/SERPINB7/LYZ/TUBA1B/SERPINA1/HSP90AA1/ACTN4/GGCT/LDHA/CTSD/UBA52/TXN/DMBT1/LGALS7B/BLMH/PLBD1/FLNB/SOD1/HSPB1/HSPA8/EPPK1/EIF4A1/PPIA/YWHAZ/GGH/ALOX12B/GSTP1/SERPINB4/DSC3/C1orf68/CTSV/PGK1/ALDOA/CAPN1/VCL/YWHAE/HBB/KRT23/HP/SERPINA3/ORM1/IL1RN/SPRR1B/SERPINB1/LCN2/S100A14/GSN/IDE/AGA/PSMA3/EEF1G/SERPINB5/MDH2/PNP/CSTB/ALDOC/KRT15/FLG/PERP/SERPINB13/APOD/KLK1/CTSB/RPSA/LAMP1/LAMP2/TYMP/PPIB/RPS3/ATP5A1/S100A11/CAPG/HAL/CAPZB/GSS/PSMB3/GDI2/ARPC4/ACTR2/NPC2/CALML5/PIP/MUC7/KRT1/KRT2/KRT10/FLG2/KPRP/SPRR2E/DEFA3/KRT85\n+GO:0044457\tcell septum part\t0\t0/153\t\n+GO:0044462\texternal encapsulating structure part\t0\t0/153\t\n+GO:0044463\tcell projection part\t5\t5/153\tHSP90AA1/SOD1/HSPB1/YWHAE/RPS3\n+GO:0044697\tHICS complex\t0\t0/153\t\n+GO:0045177\tapical part of cell\t5\t5/153\tDSG1/ANXA1/TF/CTSV/PIP\n+GO:0045178\tbasal part of cell\t1\t1/153\tTF\n+GO:0051286\tcell tip\t0\t0/153\t\n+GO:0060187\tcell pole\t0\t0/153\t\n+GO:0061835\tventral surface of cell\t0\t0/153\t\n+GO:0070056\tprospore membrane leading edge\t0\t0/153\t\n+GO:0070258\tinner membrane complex\t0\t0/153\t\n+GO:0070331\tCD20-Lck-Fyn complex\t0\t0/153\t\n+GO:0070332\tCD20-Lck-Lyn-Fyn complex\t0\t0/153\t\n+GO:0070938\tcontractile ring\t1\t1/153\tMYH9\n+GO:0071944\tcell periphery\t59\t59/153\tDSP/DSG1/ANXA2/JUP/MUC5B/ACTG1/KRT17/FABP5/PIGR/ENO1/AZGP1/PLEC/TGM3/HRNR/ANXA1/HSPA5/TF/CAT/SERPINB12/CSTA/GAPDH/EEF2/MYH9/HSP90AA1/ACTN4/UBA52/FLNB/SOD1/HSPB1/HSPA8/EPPK1/GSTP1/DSC3/C1orf68/CTSV/CAPN1/VCL/YWHAE/IL1RN/SPRR1B/CST6/GSN/IDE/LYPD3/FLG/PERP/RPSA/LAMP1/LAMP2/RPS3/ATP5A1/ACTR2/PIP/MUC7/KRT1/KRT2/KRT10/SERPINA12/SPRR2E\n+GO:0072324\tascus epiplasm\t0\t0/153\t\n+GO:0090543\tFlemming body\t1\t1/153\tCAPG\n+GO:0090635\textracellular core region of desmosome\t0\t0/153\t\n+GO:0090636\touter dense plaque of desmosome\t0\t0/153\t\n+GO:0090637\tinner dense plaque of desmosome\t0\t0/153\t\n+GO:0097223\tsperm part\t0\t0/153\t\n+GO:0097268\tcytoophidium\t0\t0/153\t\n+GO:0097458\tneuron part\t10\t10/153\tARG1/ACTN4/FLNB/SOD1/HSPB1/HSPA8/CTSV/YWHAE/APOD/LAMP1\n+GO:0097569\tlateral shield\t0\t0/153\t\n+GO:0097574\tlateral part of cell\t0\t0/153\t\n+GO:0097610\tcell surface furrow\t1\t1/153\tMYH9\n+GO:0097613\tdinoflagellate epicone\t0\t0/153\t\n+GO:0097614\tdinoflagellate hypocone\t0\t0/153\t\n+GO:0097653\tunencapsulated part of cell\t0\t0/153\t\n+GO:0097683\tdinoflagellate apex\t0\t0/153\t\n+GO:0097684\tdinoflagellate antapex\t0\t0/153\t\n+GO:0098046\ttype V protein secretion system complex\t0\t0/153\t\n+GO:0098862\tcluster of actin-based cell projections\t4\t4/153\tPLEC/MYH9/ACTN4/FLNB\n+GO:1990015\tensheathing process\t0\t0/153\t\n+GO:1990016\tneck portion of tanycyte\t0\t0/153\t\n+GO:1990018\ttail portion of tanycyte\t0\t0/153\t\n+GO:1990225\trhoptry neck\t0\t0/153\t\n+GO:1990794\tbasolateral part of cell\t0\t0/153\t\n+GO:0031594\tneuromuscular junction\t1\t1/153\tMYH9\n+GO:0044456\tsynapse part\t3\t3/153\tHSPA8/LAMP1/ACTR2\n+GO:0060076\texcitatory synapse\t0\t0/153\t\n+GO:0060077\tinhibitory synapse\t0\t0/153\t\n+GO:0097470\tribbon synapse\t0\t0/153\t\n+GO:0098685\tSchaffer collateral - CA1 synapse\t0\t0/153\t\n+GO:0098686\thippocampal mossy fiber to CA3 synapse\t0\t0/153\t\n+GO:0098978\tglutamatergic synapse\t0\t0/153\t\n+GO:0098979\tpolyadic synapse\t0\t0/153\t\n+GO:0098981\tcholinergic synapse\t0\t0/153\t\n+GO:0098982\tGABA-ergic synapse\t0\t0/153\t\n+GO:0098984\tneuron to neuron synapse\t1\t1/153\tACTR2\n+GO:0009506\tplasmodesma\t0\t0/153\t\n+GO:0005818\taster\t0\t0/153\t\n+GO:0097740\tparaflagellar rod\t0\t0/153\t\n+GO:0097741\tmastigoneme\t0\t0/153\t\n+GO:0098644\tcomplex of collagen trimers\t0\t0/153\t\n+GO:0099081\tsupramolecular polymer\t30\t30/153\tDSP/KRT6A/KRT16/JUP/ACTG1/KRT78/KRT17/CASP14/ENO1/PLEC/KRT80/KRT13/KRT6B/LMNA/TUBB4B/TUBA1B/ACTN4/FLNB/HSPB1/EPPK1/ALDOA/VCL/KRT23/KRT15/FLG/CAPZB/KRT1/KRT2/KRT10/KRT85\n'