Repository 'mqppep_anova'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/mqppep_anova

Changeset 0:dbff53e6f75f (2022-07-11)
Next changeset 1:08678c931f5d (2022-10-28)
Commit message:
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
added:
MaxQuantProcessingScript.R
PhosphoPeptide_Upstream_Kinase_Mapping.pl
macros.xml
mqppep_anova.R
mqppep_anova.xml
mqppep_anova_script.Rmd
mqppep_mrgfltr.py
search_ppep.py
test-data/alpha_levels.tabular
test-data/pSTY_motifs.tabular
test-data/test_input_for_anova.sqlite
test-data/test_input_for_anova.tabular
test-data/test_input_for_preproc.tabular
test-data/test_kinase_substrate.tabular
test-data/test_networkin.tabular
test-data/test_regulatory_sites.tabular
test-data/test_swissprot.fasta
workflow/ppenrich_suite_wf.ga
b
diff -r 000000000000 -r dbff53e6f75f MaxQuantProcessingScript.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/MaxQuantProcessingScript.R Mon Jul 11 19:22:25 2022 +0000
[
b'@@ -0,0 +1,705 @@\n+#!/usr/bin/env Rscript\n+\n+# This is the implementation for the\n+#   "MaxQuant Phosphopeptide Localization Probability Cutoff"\n+#   Galaxy tool (mqppep_lclztn_filter)\n+# It is adapted from the MaxQuant Processing Script written by Larry Cheng.\n+\n+# libraries\n+library(optparse)\n+library(data.table)\n+library(stringr)\n+library(ggplot2)\n+\n+# title: "MaxQuant Processing Script"\n+# author: "Larry Cheng"\n+# date: "February 19, 2018"\n+#\n+# # MaxQuant Processing Script\n+# Takes MaxQuant Phospho (STY)sites.txt file as input\n+# and performs the following (in order):\n+# 1) Runs the Proteomics Quality Control software\n+# 2) Remove contaminant and reverse sequence rows\n+# 3) Filters rows based on localization probability\n+# 4) Extract the quantitative data\n+# 5) Sequences phosphopeptides\n+# 6) Merges multiply phosphorylated peptides\n+# 7) Filters out phosphopeptides based on enrichment\n+# The output file contains the phosphopeptide (first column)\n+# and the quantitative values for each sample.\n+#\n+# ## Revision History\n+# Rev. 2022-02-10 :wrap for inclusion in Galaxy\n+# Rev. 2018-02-19 :break up analysis script into "MaxQuant Processing Script"\n+#                  and "Phosphopeptide Processing Script"\n+# Rev. 2017-12-12 :added PTXQC\n+#                  added additional plots and table outputs for quality control\n+#                  allowed for more than 2 samples to be grouped together\n+#                  (up to 26 (eg, 1A, 1B, 1C, etc))\n+#                  converted from .r to .rmd file to knit report\n+#                  for quality control\n+# Rev. 2016-09-11 :automated the FDR cutoffs; removed the option to data\n+#                  impute multiple times\n+# Rev. 2016-09-09 :added filter to eliminate contaminant & reverse sequence rows\n+# Rev. 2016-09-01 :moved the collapse step from after ANOVA filter to prior to\n+#                  preANOVA file output\n+# Rev. 2016-08-22 :use regexSampleNames <- "\\\\.(\\\\d + )[AB]$"\n+#                  so that it looks at the end of string\n+# Rev. 2016-08-05 :Removed vestigial line (ppeptides <- ....)\n+# Rev. 2016-07-03 :Removed row names from the write.table() output for\n+#                  ANOVA and PreANOVA\n+# Rev. 2016-06-25 :Set default Localization Probability cutoff to 0.75\n+# Rev. 2016-06-23 :fixed a bug in filtering for pY enrichment by resetting\n+#                  the row numbers afterwards\n+# Rev. 2016-06-21 :test18 + standardized the regexpression in protocol\n+\n+\n+### FUNCTION DECLARATIONS begin ----------------------------------------------\n+\n+# Read first line of file at filePath\n+# adapted from: https://stackoverflow.com/a/35761217/15509512\n+read_first_line <- function(filepath) {\n+  con <- file(filepath, "r")\n+  line <- readLines(con, n = 1)\n+  close(con)\n+  return(line)\n+}\n+\n+# Move columns to the end of dataframe\n+# - data: the dataframe\n+# - move: a vector of column names, each of which is an element of names(data)\n+movetolast <- function(data, move) {\n+  data[c(setdiff(names(data), move), move)]\n+}\n+\n+# Generate phosphopeptide and build list when applied\n+phosphopeptide_func <- function(df) {\n+  # generate peptide sequence and list of phosphopositions\n+  phosphoprobsequence <-\n+    strsplit(as.character(df["Phospho (STY) Score diffs"]), "")[[1]]\n+  output <- vector()\n+  phosphopeptide <- ""\n+  counter <- 0 # keep track of position in peptide\n+  phosphopositions <-\n+    vector() # keep track of phosphorylation positions in peptide\n+  score_diff <- ""\n+  for (chara in phosphoprobsequence) {\n+    # build peptide sequence\n+    if (!(\n+      chara == " " ||\n+      chara == "(" ||\n+      chara == ")" ||\n+      chara == "." ||\n+      chara == "-" ||\n+      chara == "0" ||\n+      chara == "1" ||\n+      chara == "2" ||\n+      chara == "3" ||\n+      chara == "4" ||\n+      chara == "5" ||\n+      chara == "6" ||\n+      chara == "7" ||\n+      chara == "8" ||\n+      chara == "9")\n+    ) {\n+      phosphopeptide <- paste(phosphopeptide, chara, sep = "")\n+      counter <- counter + 1'..b' column\n+# ...\n+\n+\n+# Collapse multiphosphorylated peptides\n+# ---\n+quant_data_qc_collapsed <-\n+  data.table(quant_data_qc, key = "Phosphopeptide")\n+quant_data_qc_collapsed <-\n+  aggregate(. ~ Phosphopeptide, quant_data_qc, FUN = collapse_fn)\n+# ...\n+print("quant_data_qc_collapsed")\n+head(quant_data_qc_collapsed)\n+\n+# Compute (as string) % of phosphopeptides that are multiphosphorylated\n+#   (for use in next step)\n+# ---\n+pct_multiphos <-\n+  (\n+    nrow(quant_data_qc) - nrow(quant_data_qc_collapsed)\n+  ) / (2 * nrow(quant_data_qc))\n+pct_multiphos <- sprintf("%0.1f%s", 100 * pct_multiphos, "%")\n+# ...\n+\n+\n+# Compute and visualize breakdown of pY, pS, and pT before enrichment filter\n+# ---\n+py_data <-\n+  quant_data_qc_collapsed[\n+    str_detect(quant_data_qc_collapsed$Phosphopeptide, "pY"),\n+    ]\n+ps_data <-\n+  quant_data_qc_collapsed[\n+    str_detect(quant_data_qc_collapsed$Phosphopeptide, "pS"),\n+    ]\n+pt_data <-\n+  quant_data_qc_collapsed[\n+     str_detect(quant_data_qc_collapsed$Phosphopeptide, "pT"),\n+     ]\n+\n+py_num <- nrow(py_data)\n+ps_num <- nrow(ps_data)\n+pt_num <- nrow(pt_data)\n+\n+# Visualize enrichment\n+enrich_graph_data <- data.frame(group = c("pY", "pS", "pT"),\n+                                value = c(py_num, ps_num, pt_num))\n+\n+enrich_graph_data <-\n+  enrich_graph_data[\n+    enrich_graph_data$value > 0,\n+    ]\n+\n+# Plot pie chart with legend\n+# start: https://stackoverflow.com/a/62522478/15509512\n+# refine: https://www.statology.org/ggplot-pie-chart/\n+# colors: https://colorbrewer2.org/#type=diverging&scheme=BrBG&n=8\n+slices <- enrich_graph_data$value\n+phosphoresidue <- enrich_graph_data$group\n+pct    <- round(100 * slices / sum(slices))\n+lbls   <-\n+  paste(enrich_graph_data$group, "\\n", pct, "%\\n(", slices, ")", sep = "")\n+slc_ctr <- c()\n+run_tot <- 0\n+for (p in pct) {\n+  slc_ctr <- c(slc_ctr, run_tot + p / 2.0)\n+  run_tot <- run_tot + p\n+}\n+lbl_y  <- 100 - slc_ctr\n+df     <-\n+  data.frame(slices,\n+             pct,\n+             lbls,\n+             phosphoresidue = factor(phosphoresidue, levels = phosphoresidue))\n+gigi <- ggplot(df\n+               , aes(x = 1, y = pct, fill = phosphoresidue)) +\n+  geom_col(position = "stack", orientation = "x") +\n+  geom_text(aes(x = 1, y = lbl_y, label = lbls), col = "black") +\n+  coord_polar(theta = "y", direction = -1) +\n+  labs(\n+    x = NULL\n+    ,\n+    y = NULL\n+    ,\n+    title = "Percentages (and counts) of phosphosites, by type of residue"\n+    ,\n+    caption = sprintf(\n+      "Roughly %s of peptides have multiple phosphosites.",\n+      pct_multiphos\n+    )\n+  ) +\n+  labs(x = NULL, y = NULL, fill = NULL) +\n+  theme_classic() +\n+  theme(\n+    legend.position = "right"\n+    ,\n+    axis.line = element_blank()\n+    ,\n+    axis.text = element_blank()\n+    ,\n+    axis.ticks = element_blank()\n+    ,\n+    plot.title = element_text(hjust = 0.5)\n+    ,\n+    plot.subtitle = element_text(hjust = 0.5)\n+    ,\n+    plot.caption = element_text(hjust = 0.5)\n+    ,\n+    plot.title.position = "plot"\n+  ) +\n+  scale_fill_manual(breaks = phosphoresidue,\n+                    values = c("#c7eae5", "#f6e8c3", "#dfc27d"))\n+\n+pdf(enrich_graph_filename)\n+print(gigi)\n+dev.off()\n+svg(enrich_graph_filename_svg)\n+print(gigi)\n+dev.off()\n+# ...\n+\n+\n+# Filter phosphopeptides by enrichment\n+# --\n+if (enriched == "Y") {\n+  quant_data_qc_enrichment <- quant_data_qc_collapsed[\n+    str_detect(quant_data_qc_collapsed$Phosphopeptide, "pY"),\n+    ]\n+} else if (enriched == "ST") {\n+  quant_data_qc_enrichment <- quant_data_qc_collapsed[\n+    str_detect(quant_data_qc_collapsed$Phosphopeptide, "pS") |\n+    str_detect(quant_data_qc_collapsed$Phosphopeptide, "pT"),\n+    ]\n+} else {\n+  print("Error in enriched variable. Set to either \'Y\' or \'ST\'")\n+}\n+# ...\n+\n+print("quant_data_qc_enrichment")\n+head(quant_data_qc_enrichment)\n+\n+# Write phosphopeptides filtered by enrichment\n+# --\n+write.table(\n+  quant_data_qc_enrichment,\n+  file = output_filename,\n+  sep = "\\t",\n+  quote = FALSE,\n+  row.names = FALSE\n+)\n+# ...\n'
b
diff -r 000000000000 -r dbff53e6f75f PhosphoPeptide_Upstream_Kinase_Mapping.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/PhosphoPeptide_Upstream_Kinase_Mapping.pl Mon Jul 11 19:22:25 2022 +0000
[
b'@@ -0,0 +1,2192 @@\n+#!/usr/local/bin/perl\n+###############################################################################################################################\n+#    perl Kinase_enrichment_analysis_complete_v0.pl\n+#\n+#    Nick Graham, USC\n+#    2016-02-27\n+#\n+#    Built from scripts written by NG at UCLA in Tom Graeber\'s lab:\n+#        CombinePhosphoSites.pl\n+#        Retrieve_p_motifs.pl\n+#        NetworKIN_Motif_Finder_v7.pl\n+#\n+#    Given a list of phospho-peptides, find protein information and upstream kinases.\n+#    Output file can be used for KS enrichment score calculations using Enrichment_Score4Directory.pl\n+#\n+#    Updated 2022-01-13, Art Eschenlauer, UMN on behalf of Justin Drake\'s lab:\n+#        Added warnings and used strict;\n+#        fixed some code paths resulting in more NetworKIN matches;\n+#        applied Aho-Corasick algorithm (via external Python script because Perl implementation was still too slow)\n+#        to speed up "Match the non_p_peptides to the @sequences array";\n+#        added support for SQLite-formatted UniProtKB/Swiss-Prot data as an alternative to FASTA-formatted data;\n+#        added support for SQLite output in addition to tabular files.\n+#\n+#\n+###############################################################################################################################\n+\n+use strict;\n+use warnings \'FATAL\' => \'all\';\n+\n+use Getopt::Std;\n+use DBD::SQLite::Constants qw/:file_open/;\n+use DBI qw(:sql_types);\n+use File::Copy;\n+use File::Basename;\n+use POSIX qw(strftime);\n+use Time::HiRes qw(gettimeofday);\n+#use Data::Dump qw(dump);\n+\n+my $USE_SEARCH_PPEP_PY = 1;\n+#my $FAILED_MATCH_SEQ = "Failed match";\n+my $FAILED_MATCH_SEQ = \'No Sequence\';\n+my $FAILED_MATCH_GENE_NAME = \'No_Gene_Name\';\n+\n+my $dirname = dirname(__FILE__);\n+my %opts;\n+my ($file_in, $average_or_sum, $db_out, $file_out, $file_melt, $phospho_type);\n+my $dbtype;\n+my ($fasta_in, $networkin_in, $motifs_in, $PSP_Kinase_Substrate_in, $PSP_Regulatory_Sites_in);\n+my (@samples, %sample_id_lut, %ppep_id_lut, %data, @tmp_data, %n);\n+my $line = 0;\n+my @failed_match = ($FAILED_MATCH_SEQ);\n+my @failed_matches;\n+my (%all_data);\n+my (@p_peptides, @non_p_peptides);\n+my @parsed_fasta;\n+my (@accessions, @names, @sequences, @databases, $database);\n+my ($dbfile, $dbh, $stmth);\n+my @col_names;\n+my (%matched_sequences, %accessions,     %names,     %sites,   );\n+my (@tmp_matches,       @tmp_accessions, @tmp_names, @tmp_sites);\n+my (%p_residues, @tmp_p_residues, @p_sites, $left, $right, %p_motifs, @tmp_motifs_array, $tmp_motif, $tmp_site, %residues);\n+my (@kinases_observed, $kinases);\n+my (@kinases_observed_lbl, @phosphosites_observed_lbl);\n+my ($p_sequence_kinase, $p_sequence, $kinase);\n+my (@motif_sequence, @motif_description, @motif_type_key_ary, %motif_type, %motif_count);\n+my (@kinases_PhosphoSite, $kinases_PhosphoSite);\n+my ($p_sequence_kinase_PhosphoSite, $p_sequence_PhosphoSite, $kinase_PhosphoSite);\n+my (%regulatory_sites_PhosphoSite_hash);\n+my (%domain, %ON_FUNCTION, %ON_PROCESS, %ON_PROT_INTERACT, %ON_OTHER_INTERACT, %notes, %organism);\n+my (%unique_motifs);\n+my ($kinase_substrate_NetworKIN_matches, $kinase_substrate_PhosphoSite_matches);\n+my %psp_regsite_protein_2;\n+my (%domain_2, %ON_FUNCTION_2, %ON_PROCESS_2, %ON_PROT_INTERACT_2, %N_PROT_INTERACT, %ON_OTHER_INTERACT_2, %notes_2, %organism_2);\n+my @timeData;\n+my $PhosphoSitePlusCitation;\n+my (%site_description, %site_id);\n+\n+my %kinase_substrate_NetworKIN_matches;\n+my %kinase_motif_matches;\n+my $regulatory_sites_PhosphoSite;\n+my ($seq_plus5aa, $seq_plus7aa, %seq_plus7aa_2);\n+my %kinase_substrate_PhosphoSite_matches;\n+my @formatted_sequence;\n+my $pSTY_sequence;\n+my $i;\n+my @a;\n+my $use_sqlite;\n+my $verbose;\n+\n+##########\n+## opts ##\n+##########\n+  ## input files\n+    # i : path to input file, e.g., \'outputfile_STEP2.txt\'\n+    # f : path to UniProtKB/SwissProt FASTA\n+    # s : optional species argument\n+    # n : path to NetworKIN_201612_cutoffscore2.0.txt\n+    # m : path to pSTY_Mot'..b're-to-SQLite "ppep_gene_site" table\n+        }\n+        else { print OUT "\\t";}\n+    }\n+    my %wrote_motif;\n+    my $motif_parts_0;\n+    my @motif_split;\n+    my $one_motif;\n+    \n+    for my $i (0 .. $#motif_type_keys) {\n+        if (exists($kinase_motif_matches{$peptide}{$motif_type_keys[$i]})) {\n+            print OUT "X\\t";\n+            #ACE-2022.06.20 $motif_parts_0 = $motif_type{$motif_sequence[$i]}." ".$motif_sequence[$i];\n+            $motif_parts_0 = $motif_type{$motif_type_keys[$i]};\n+            @motif_split = split("[|]", $motif_parts_0);\n+            #ACE-2022.06.20 my $key = "$peptide\\t$gene_names\\t$motif_parts_0";\n+            for my $j (0 .. $#motif_split) {\n+                $one_motif = $motif_split[$j];\n+                #ACE-2022.06.20 my $key = "$peptide\\t$gene_names\\t$motif_parts_0";\n+                my $key = "$peptide\\t$gene_names\\t$one_motif";\n+                if (!exists($wrote_motif{$key})) {\n+                    $wrote_motif{$key} = $key;\n+                    print MELT "$peptide\\t$gene_names\\t$motif_description[$i]\\t$one_motif\\n";\n+                    # print "Line 657: i is $i\\t$kinase_motif_matches{$peptide}{$motif_sequence[$i]}\\n";            #debug\n+                    # begin store-to-SQLite "ppep_gene_site" table\n+                    # ---\n+                    $ppep_gene_site_stmth->bind_param(1, $ppep_id);        # ppep_gene_site.ppep_id\n+                    $ppep_gene_site_stmth->bind_param(2, $gene_names);     # ppep_gene_site.gene_names\n+                    $ppep_gene_site_stmth->bind_param(3, $one_motif);  # ppep_gene_site.kinase_map\n+                    $ppep_gene_site_stmth->bind_param(4, $site_id{$motif_description[$i]});     # ppep_gene_site.site_type_id\n+                    if (not $ppep_gene_site_stmth->execute()) {\n+                        print "Error writing tuple ($peptide,$gene_names,$one_motif): $ppep_gene_site_stmth->errstr\\n";\n+                    }\n+                    # ...\n+                    # end store-to-SQLite "ppep_gene_site" table\n+                }\n+            }\n+        }\n+        else { print OUT "\\t";}\n+    }\n+    for my $i (0 .. $#kinases_PhosphoSite) {\n+        if (exists($kinase_substrate_PhosphoSite_matches{$peptide}{$kinases_PhosphoSite[$i]})) {\n+            print MELT "$peptide\\t$gene_names\\t$site_description{$SITE_PHOSPHOSITE}\\t$phosphosites_observed_lbl[$i]\\n";\n+            if ($i < $#kinases_PhosphoSite) {\n+                print OUT "X\\t";\n+            }\n+            else {\n+                print OUT "X\\n";\n+            }\n+            # begin store-to-SQLite "ppep_gene_site" table\n+            # ---\n+            $ppep_gene_site_stmth->bind_param(1, $ppep_id);                       # ppep_gene_site.ppep_id\n+            $ppep_gene_site_stmth->bind_param(2, $gene_names);                    # ppep_gene_site.gene_names\n+            $ppep_gene_site_stmth->bind_param(3, $phosphosites_observed_lbl[$i]); # ppep_gene_site.kinase_map\n+            $ppep_gene_site_stmth->bind_param(4, $SITE_PHOSPHOSITE);              # ppep_gene_site.site_type_id\n+            if (not $ppep_gene_site_stmth->execute()) {\n+                print "Error writing tuple ($peptide,$gene_names,$phosphosites_observed_lbl[$i]): $ppep_gene_site_stmth->errstr\\n";\n+            }\n+            # ...\n+            # end store-to-SQLite "ppep_gene_site" table\n+        }\n+        else {\n+            if ($i < $#kinases_PhosphoSite) {\n+                print OUT "\\t";\n+            }\n+            elsif ($i == $#kinases_PhosphoSite) {\n+                print OUT "\\n";\n+            }\n+        }\n+    }\n+}\n+\n+close OUT;\n+close MELT;\n+$ppep_gene_site_stmth->finish;\n+print "begin DB commit at " . format_localtime_iso8601() . "\\n";\n+$dbh->{AutoCommit} = $auto_commit;\n+$dbh->disconnect if ( defined $dbh );\n+\n+print "\\nFinished writing output at " . format_localtime_iso8601() ."\\n\\n";\n+\n+###############################################################################################################################\n'
b
diff -r 000000000000 -r dbff53e6f75f macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Jul 11 19:22:25 2022 +0000
b
@@ -0,0 +1,89 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.1.13</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.56.0"  >bioconductor-preprocesscore</requirement>
+            <requirement type="package" version="1.22.2"  >numpy</requirement>
+            <requirement type="package" version="0.3.3"   >openblas</requirement>
+            <requirement type="package" version="1.4.1"   >pandas</requirement>
+            <requirement type="package" version="1.64"    >perl-dbd-sqlite</requirement>
+            <requirement type="package" version="5.26.2"  >perl</requirement>
+            <requirement type="package" version="1.4.0"   >pyahocorasick</requirement>
+            <requirement type="package" version="3.9.10"  >python</requirement>
+            <requirement type="package" version="1.14.2"  >r-data.table</requirement>
+            <requirement type="package" version="1.1.2"   >r-dbi</requirement>
+            <requirement type="package" version="3.3.5"   >r-ggplot2</requirement>
+            <requirement type="package" version="3.1.3"   >r-gplots</requirement>
+            <requirement type="package" version="0.9.4"   >r-latex2exp</requirement>
+            <requirement type="package" version="1.7.1"   >r-optparse</requirement>
+            <requirement type="package" version="1.4.4"   >r-reshape2</requirement>
+            <requirement type="package" version="2.11"    >r-rmarkdown</requirement>
+            <requirement type="package" version="2.2.8"   >r-rsqlite</requirement>
+            <requirement type="package" version="0.4.0"   >r-sass</requirement>
+            <requirement type="package" version="0.4_11"  >r-sqldf</requirement>
+            <requirement type="package" version="1.4.0"   >r-stringr</requirement>
+            <requirement type="package" version="0.37"    >r-tinytex</requirement>
+            <requirement type="package" version="0.3.7"   >r-vioplot</requirement>
+            <!--
+            It would be nice to use conda-forge/texlive-core rather than r-tinytex because the
+            former installs texlive when the package is built, but issue 23 blocked PDF-creation.
+            Also, texlive-core also gave pango font errors (output had missing symbols replaced
+            with boxes) unless I specified the build as well as the version when building a
+            conda environment, e.g.:  texlive-core=20210325=h97429d4_0
+            -->
+        </requirements>
+        <!-- I specified the versions above because it takes a VERY long time to search for package versions when they are not omitted; also, version numbers should lead to reproducible behavior.  Contrast execution times of this (about 18 seconds):
+            echo n | time conda create -n mqppep_ver -c conda-forge -c bioconda \
+              bioconductor-preprocesscore=1.56.0 \
+              numpy=1.22.2 \
+              openblas=0.3.3 \
+              pandas=1.4.1 \
+              perl-dbd-sqlite=1.64 \
+              perl-dbd-sqlite=1.64 \
+              perl=5.26.2 \
+              pyahocorasick=1.4.0 \
+              python=3.9.10 \
+              r-data.table=1.14.2 \
+              r-dbi=1.1.2 \
+              r-ggplot2=3.3.5 \
+              r-gplots=3.1.3 \
+              r-latex2exp=0.9.4 \
+              r-optparse=1.7.1 \
+              r-reshape2=1.4.4 \
+              r-rmarkdown=2.11 \
+              r-rsqlite=2.2.8 \
+              r-sass=0.4.0 \
+              r-sqldf=0.4_11 \
+              r-stringr=1.4.0 \
+              r-tinytex=0.37 \
+              r-vioplot=0.3.7
+          with this (42 or more seconds):
+            echo n | time conda create -n mqppep_nover -c conda-forge -c bioconda \
+              bioconductor-preprocesscore= \
+              numpy \
+              openblas=0.3.3 \
+              pandas \
+              perl \
+              perl-dbd-sqlite \
+              perl-dbd-sqlite \
+              pyahocorasick \
+              python \
+              r-data.table \
+              r-dbi \
+              r-ggplot2 \
+              r-gplots \
+              r-latex2exp \
+              r-optparse \
+              r-reshape2 \
+              r-rmarkdown \
+              r-rsqlite \
+              r-sass \
+              r-sqldf \
+              r-stringr \
+              r-tinytex \
+              r-vioplot
+
+        -->
+    </xml>
+</macros>
b
diff -r 000000000000 -r dbff53e6f75f mqppep_anova.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mqppep_anova.R Mon Jul 11 19:22:25 2022 +0000
[
b'@@ -0,0 +1,297 @@\n+#!/usr/bin/env Rscript\n+# libraries\n+library(optparse)\n+library(data.table)\n+library(stringr)\n+\n+# ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285\n+\n+# parse options\n+option_list <- list(\n+  make_option(\n+    c("-i", "--inputFile"),\n+    action = "store",\n+    default = NA,\n+    type = "character",\n+    help = "Phosphopeptide Intensities sparse input file path"\n+  ),\n+  make_option(\n+    c("-a", "--alphaFile"),\n+    action = "store",\n+    default = NA,\n+    type = "character",\n+    help = paste0("List of alpha cutoff values for significance testing;",\n+             " path to text file having one column and no header")\n+  ),\n+  make_option(\n+    c("-S", "--preproc_sqlite"),\n+    action = "store",\n+    default = NA,\n+    type = "character",\n+    help = "Path to \'preproc_sqlite\' produced by `mqppep_mrgfltr.py`"\n+  ),\n+  make_option(\n+    c("-K", "--ksea_sqlite"),\n+    action = "store",\n+    default = NA,\n+    type = "character",\n+    help = "Path to \'ksea_sqlite\' output produced by this tool"\n+  ),\n+  make_option(\n+    c("-f", "--firstDataColumn"),\n+    action = "store",\n+    default = "^Intensity[^_]",\n+    type = "character",\n+    help = "First column of intensity values"\n+  ),\n+  make_option(\n+    c("-m", "--imputationMethod"),\n+    action = "store",\n+    default = "random",\n+    type = "character",\n+    help = paste0("Method for missing-value imputation,",\n+             " one of c(\'group-median\',\'median\',\'mean\',\'random\')")\n+  ),\n+  make_option(\n+    c("-p", "--meanPercentile"),\n+    action = "store",\n+    default = 3,\n+    type = "integer",\n+    help = paste0("Mean percentile for randomly generated imputed values;",\n+              ", range [1,99]")\n+  ),\n+  make_option(\n+    c("-d", "--sdPercentile"),\n+    action = "store",\n+    default = 3,\n+    type = "double",\n+    help = paste0("Adjustment value for standard deviation of",\n+              " randomly generated imputed values; real")\n+  ),\n+  make_option(\n+    c("-s", "--regexSampleNames"),\n+    action = "store",\n+    default = "\\\\.(\\\\d+)[A-Z]$",\n+    type = "character",\n+    help = "Regular expression extracting sample-names"\n+  ),\n+  make_option(\n+    c("-g", "--regexSampleGrouping"),\n+    action = "store",\n+    default = "(\\\\d+)",\n+    type = "character",\n+    help = paste0("Regular expression extracting sample-group",\n+             " from an extracted sample-name")\n+  ),\n+  make_option(\n+    c("-o", "--imputedDataFile"),\n+    action = "store",\n+    default = "output_imputed.tsv",\n+    type = "character",\n+    help = "Imputed Phosphopeptide Intensities output file path"\n+  ),\n+  make_option(\n+    c("-n", "--imputedQNLTDataFile"),\n+    action = "store",\n+    default = "output_imp_qn_lt.tsv",\n+    type = "character",\n+    help =\n+      paste(\n+        "Imputed, Quantile-Normalized Log-Transformed Phosphopeptide",\n+        "Intensities output file path"\n+        )\n+  ),\n+  make_option(\n+    c("-r", "--reportFile"),\n+    action = "store",\n+    default = "QuantDataProcessingScript.html",\n+    type = "character",\n+    help = "HTML report file path"\n+  ),\n+  make_option(\n+    c("-k", "--ksea_cutoff_statistic"),\n+    action = "store",\n+    default = "FDR",\n+    type = "character",\n+    help = paste0("Method for missing-value imputation,",\n+             " one of c(\'FDR\',\'p.value\'), but don\'t expect \'p.value\' to work well.")\n+  ),\n+  make_option(\n+    c("-t", "--ksea_cutoff_threshold"),\n+    action = "store",\n+    default = 0.05,\n+    type = "double",\n+    help = paste0("Maximum score to be used to score a kinase enrichment as significant")\n+  ),\n+  make_option(\n+    c("-M", "--anova_ksea_metadata"),\n+    action = "store",\n+    default = "anova_ksea_metadata.tsv",\n+    type = "character",\n+    help = "Phosphopeptide metadata, ANOVA FDR, and KSEA enribhments"\n+  )\n+)\n+args <- parse_args(OptionParser(option_list = option_list))\n+print("args is:")\n+cat(str(args))\n+\n+# Check parameter values\n+\n+if (! file.exists(args$inputFile)) {\n+  stop(('..b' limit) {\n+  # eliminate any leading whitespace\n+  result    <- gsub("^[ \\t\\n]*", "", readChar(fname, limit))\n+  # eliminate any trailing whitespace\n+  result    <- gsub("[ \\t\\n]*$", "", result)\n+  # substitute characters escaped by Galaxy sanitizer\n+  result <- gsub("__lt__", "<",  result)\n+  result <- gsub("__le__", "<=", result)\n+  result <- gsub("__eq__", "==", result)\n+  result <- gsub("__ne__", "!=", result)\n+  result <- gsub("__gt__", ">",  result)\n+  result <- gsub("__ge__", ">=", result)\n+  result <- gsub("__sq__", "\'",  result)\n+  result <- gsub("__dq__", \'"\',  result)\n+  result <- gsub("__ob__", "[",  result)\n+  result <- gsub("__cb__", "]",  result)\n+}\n+cat(paste0("first_data_column file: ", args$firstDataColumn, "\\n"))\n+cat(paste0("regex_sample_names file: ", args$regexSampleNames, "\\n"))\n+cat(paste0("regex_sample_grouping file: ", args$regexSampleGrouping, "\\n"))\n+nc <- 1000\n+regex_sample_names <- read_config_file_string(args$regexSampleNames, nc)\n+regex_sample_grouping <- read_config_file_string(args$regexSampleGrouping, nc)\n+first_data_column <- read_config_file_string(args$firstDataColumn,  nc)\n+cat(paste0("first_data_column: ",     first_data_column,     "\\n"))\n+cat(paste0("regex_sample_names: ",    regex_sample_names,    "\\n"))\n+cat(paste0("regex_sample_grouping: ", regex_sample_grouping, "\\n"))\n+\n+# from: https://github.com/molgenis/molgenis-pipelines/wiki/\n+#   How-to-source-another_file.R-from-within-your-R-script\n+# Function location_of_this_script returns the location of this .R script\n+#   (may be needed to source other files in same dir)\n+location_of_this_script <- function() {\n+    this_file <- NULL\n+    # This file may be \'sourced\'\n+    for (i in - (1:sys.nframe())) {\n+        if (identical(sys.function(i), base::source)) {\n+            this_file <- (normalizePath(sys.frame(i)$ofile))\n+        }\n+    }\n+\n+    if (!is.null(this_file)) return(dirname(this_file))\n+\n+    # But it may also be called from the command line\n+    cmd_args <- commandArgs(trailingOnly = FALSE)\n+    cmd_args_trailing <- commandArgs(trailingOnly = TRUE)\n+    cmd_args <- cmd_args[\n+      seq.int(\n+        from = 1,\n+        length.out = length(cmd_args) - length(cmd_args_trailing)\n+        )\n+      ]\n+    res <- gsub("^(?:--file=(.*)|.*)$", "\\\\1", cmd_args)\n+\n+    # If multiple --file arguments are given, R uses the last one\n+    res <- tail(res[res != ""], 1)\n+    if (0 < length(res)) return(dirname(res))\n+\n+    # Both are not the case. Maybe we are in an R GUI?\n+    return(NULL)\n+}\n+\n+script_dir <-  location_of_this_script()\n+\n+rmarkdown_params <- list(\n+    inputFile = input_file\n+  , alphaFile = alpha_file\n+  , preprocDb = preproc_sqlite\n+  , firstDataColumn = first_data_column\n+  , imputationMethod = imputation_method\n+  , meanPercentile = mean_percentile\n+  , sdPercentile = sd_percentile\n+  , regexSampleNames = regex_sample_names\n+  , regexSampleGrouping = regex_sample_grouping\n+  , imputedDataFilename = imputed_data_file_name\n+  , imputedQNLTDataFile = imp_qn_lt_data_filenm\n+  , anovaKseaMetadata = anova_ksea_metadata\n+  , kseaAppPrepDb = ksea_sqlite\n+  , kseaCutoffThreshold = ksea_cutoff_threshold\n+  , kseaCutoffStatistic = ksea_cutoff_statistic\n+  )\n+\n+print("rmarkdown_params")\n+str(rmarkdown_params)\n+\n+# freeze the random number generator so the same results will be produced\n+#  from run to run\n+set.seed(28571)\n+\n+# BUG (or "opportunity")\n+# To render as PDF for the time being requires installing the conda\n+# package `r-texlive` until this issue in `texlive-core` is resolved:\n+#   https://github.com/conda-forge/texlive-core-feedstock/issues/19\n+# This workaround is detailed in the fourth comment of:\n+#   https://github.com/conda-forge/texlive-core-feedstock/issues/61\n+\n+library(tinytex)\n+tinytex::install_tinytex()\n+rmarkdown::render(\n+  input = paste(script_dir, "mqppep_anova_script.Rmd", sep = "/")\n+, output_format = rmarkdown::pdf_document(toc = TRUE)\n+, output_file = report_file_name\n+, params = rmarkdown_params\n+)\n'
b
diff -r 000000000000 -r dbff53e6f75f mqppep_anova.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mqppep_anova.xml Mon Jul 11 19:22:25 2022 +0000
[
b'@@ -0,0 +1,341 @@\n+<tool\n+  id="mqppep_anova"\n+  name="MaxQuant Phosphopeptide ANOVA"\n+  version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"\n+  profile="21.05"\n+  >\n+    <description>Runs ANOVA and KSEA for phosphopeptides.</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <edam_topics>\n+        <edam_topic>topic_0121</edam_topic><!-- proteomics -->\n+        <edam_topic>topic_3520</edam_topic><!-- proteomics experiment-->\n+    </edam_topics>\n+    <edam_operations>\n+        <edam_operation>operation_0276</edam_operation><!-- Analyse a network of protein interactions. -->\n+        <edam_operation>operation_0531</edam_operation><!-- Heat map generation -->\n+        <edam_operation>operation_2938</edam_operation><!-- Dendrogram generation -->\n+        <edam_operation>operation_2938</edam_operation><!-- Imputation -->\n+        <edam_operation>operation_3435</edam_operation><!-- Standardisation and normalisation -->\n+        <edam_operation>operation_3501</edam_operation><!-- Enrichment analysis -->\n+        <edam_operation>operation_3658</edam_operation><!-- Statistical inference -->\n+    </edam_operations>\n+    <expand macro="requirements"/>\n+    <!--\n+      The weird invocation used here is because knitr and install_tinytex\n+      both need access to a writeable directory, but most directories in a\n+      biocontainer are read-only, so this builds a pseudo-home under /tmp\n+    -->\n+    <command detect_errors="exit_code"><![CDATA[\n+      cp \'$__tool_directory__/mqppep_anova_script.Rmd\' . &&\n+      cp \'$__tool_directory__/mqppep_anova.R\'          . &&\n+      Rscript mqppep_anova.R\n+        --inputFile \'$input_file\'\n+        --alphaFile \'$alpha_file\'\n+        --preproc_sqlite \'$preproc_sqlite\'\n+        --firstDataColumn $intensity_column_regex_f\n+        --imputationMethod $imputation.imputation_method\n+        #if $imputation.imputation_method == "random"\n+          --meanPercentile \'$imputation.meanPercentile\'\n+          --sdPercentile   \'$imputation.sdPercentile\'\n+        #end if\n+        --regexSampleNames $sample_names_regex_f\n+        --regexSampleGrouping $sample_grouping_regex_f\n+        --imputedDataFile $imputed_data_file\n+        --imputedQNLTDataFile \'$imp_qn_lt_file\'\n+        --ksea_sqlite \'$ksea_sqlite\'\n+        --ksea_cutoff_threshold \'$ksea_cutoff_threshold\'\n+        --ksea_cutoff_statistic \'FDR\'\n+        --reportFile \'$report_file\'\n+        --anova_ksea_metadata \'$anova_ksea_metadata\'\n+    ]]></command>\n+    <configfiles>\n+      <configfile name="sample_names_regex_f">\n+        $sample_names_regex\n+      </configfile>\n+      <configfile name="sample_grouping_regex_f">\n+        $sample_grouping_regex\n+      </configfile>\n+      <configfile name="intensity_column_regex_f">\n+        $intensity_column_regex\n+      </configfile>\n+    </configfiles>\n+    <inputs>\n+        <param name="input_file" type="data" format="tabular" label="Filtered Phosphopeptide Intensities"\n+               help="Phosphopeptide intensities filtered for minimal quality.  First column label \'Phosphopeptide\'; sample-intensities must begin in column 10 and must have column labels to match argument [sample_names_regex]"\n+        />\n+        <param name="alpha_file" type="data" format="tabular" label="ANOVA alpha cutoff level"\n+               help="ANOVA alpha cutoff values for significance testing: tabular data having one column and no header"\n+        />\n+        <param name="preproc_sqlite" type="data" format="sqlite" label="preproc_sqlite dataset from mqppep_preproc"\n+               help="\'preproc_sqlite\' dataset produced by \'MaxQuant Phosphopeptide Preprocessing\' tool"\n+                />\n+        <param name="intensity_column_regex" type="text" value="^Intensity[^_]"\n+               label="Intensity-column pattern"\n+               help="Pattern matching columns that have peptide intensity data (PERL-compatible regular expression matching column label)"\n+        />\n+        <!-- imputation_method <- c("group-median'..b'oproteomic Enrichment Pipeline Merge and Filter"\n+  (``mqppep_mrgflt``) tool.\n+\n+``ANOVA alpha cutoff level``\n+  List of alpha cutoff values for significance testing; text file having one column and no header.  For example:\n+\n+::\n+\n+  0.2\n+  0.1\n+  0.05\n+\n+**Input parameters**\n+\n+``Intensity-column pattern``\n+  First column of ``input_file`` having intensity values (integer or PERL-compatible regular expression matching column label). Default: **Intensity**\n+\n+``Imputation method``\n+  Impute missing values by:\n+\n+    1. ``group-median`` - use median for each sample-group;\n+    2. ``mean`` - use mean across all samples; or\n+    3. ``median`` - use median across all samples;\n+    4. ``random`` - use randomly generated values where:\n+\n+      - ``Mean percentile for random values`` specifies the percentile among non-missing values to be used as mean of random values, and\n+      - ``Percentile std. dev. for random values`` specifies the factor to be multiplied by the standard deviation among the non-missing values (across all samples) to determine the standard deviation of random values.\n+\n+``Sample-extraction pattern``\n+  PERL-compatible regular expression extracting the sample-name from the the name of a column of instensities (from ``input_file``) for one sample.\n+\n+    - For example, ``"\\.\\d+[A-Z]$"`` applied to ``Intensity.splunge.10A`` would produce ``.10A``\n+    - Note that *this is case sensitive* by default.\n+\n+``Group-extraction pattern``\n+  PERL-compatible regular expression extracting the sample-grouping from the sample-name that was extracted with ``sample_names_regex`` from a column of intensites (from ``input_file``).\n+\n+    - For example, ``"\\d+$"`` applied to ``.10A`` would produce ``10``\n+    - Note that *this is case sensitive* by default.\n+\n+``KSEA threshold level``\n+  Specifies minimum FDR at which a kinase will be considered to be enriched; the default choice of 0.05 is arbitrary.\n+\n+**Outputs**\n+\n+``imputed_intensities (input_file.imputation_method-imputed_intensities)``\n+  Phosphopeptide MS intensities where missing values have been **imputed** by the chosen method, in tabular format.\n+\n+``imputed_QN_LT_intensities (input_file.imputation_method-imputed_QN_LT_intensities)``\n+  Phosphopeptide MS intensities where missing values have been **imputed** by the chosen method, quantile-normalized (**QN**), and log10-transformed (**LT**), in tabular format.\n+\n+``report_file (input_file.imputation_method-imputed_report)``\n+  Summary report for normalization, imputation, and **ANOVA**, in PDF format.\n+\n+``anova_ksea_metadata (input_file.imputation_method-imputed_anova_ksea_metadata)``\n+  Phosphopeptide metadata including ANOVA significance and KSEA enrichments.\n+\n+``ksea_sqlite (input_file.imputation_method-imputed_ksea_sqlite)``\n+  SQLite database for ad-hoc report creation.\n+\n+**Algorithm**\n+\n+The KSEA algorithm used here is as in the KSEAapp package as reported in [Wiredja 2017].\n+The code is adapted from "Danica D. Wiredja (2017). KSEAapp: Kinase-Substrate Enrichment Analysis. R package version 0.99.0." to work with output from the "MaxQuant Phosphopeptide Preprocessing" Galaxy tool.\n+\n+**Authors**\n+\n+``Larry C. Cheng``\n+  (`ORCiD 0000-0002-6922-6433 <https://orcid.org/0000-0002-6922-6433>`_) wrote the original script.\n+\n+``Arthur C. Eschenlauer``\n+  (`ORCiD 0000-0002-2882-0508 <https://orcid.org/0000-0002-2882-0508>`_) adapted the script to run in Galaxy.\n+\n+===================================\n+PERL-compatible regular expressions\n+===================================\n+\n+Note that the PERL-compatible regular expressions accepted by this tool are documented at http://rdrr.io/r/base/regex.html\n+\n+    ]]></help>\n+    <citations>\n+        <!-- Cheng_2018 "Phosphopeptide Enrichment ..." PMID: 30124664 -->\n+        <citation type="doi">10.3791/57996</citation>\n+        <!-- Wiredja_2017 "The KSEA App ..." PMID: 28655153 -->\n+        <citation type="doi">10.1093/bioinformatics/btx415</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r dbff53e6f75f mqppep_anova_script.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mqppep_anova_script.Rmd Mon Jul 11 19:22:25 2022 +0000
[
b'@@ -0,0 +1,3536 @@\n+---\n+title: "MaxQuant Phosphoproteomic Enrichment Pipeline ANOVA/KSEA"\n+author:\n+- "Nick Graham^[ORCiD 0000-0002-6811-1941, University of Southern California: Los Angeles, CA, US]"\n+- "Larry Cheng^[ORCiD 0000-0002-6922-6433, Rutgers School of Graduate Studies: New Brunswick, NJ, US]"\n+- "Art Eschenlauer^[ORCiD 0000-0002-2882-0508, University of Minnesota: Minneapolis, Minnesota, US]"\n+date:\n+- "May 28, 2018"\n+- "; revised June 23, 2022"\n+output:\n+  pdf_document:\n+    toc: true\n+    toc_depth: 3\n+    keep_tex: true\n+header-includes:\n+  - \\usepackage{longtable}\n+  - \\newcommand\\T{\\rule{0pt}{2.6ex}}       % Top strut\n+  - \\newcommand\\B{\\rule[-1.2ex]{0pt}{0pt}} % Bottom strut\n+params:\n+  alphaFile:            "test-data/alpha_levels.tabular"\n+  inputFile:            "test-data/test_input_for_anova.tabular"\n+  preprocDb:            "test-data/test_input_for_anova.sqlite"\n+  kseaAppPrepDb:        !r c(":memory:", "test-data/mqppep.sqlite")[2]\n+  show_toc:             true\n+  firstDataColumn:      "^Intensity[^_]"\n+  imputationMethod:     !r c("group-median", "median", "mean", "random")[1]\n+  meanPercentile:       1\n+  sdPercentile:         1.0\n+  regexSampleNames:     "\\\\.\\\\d+[A-Z]$"\n+  regexSampleGrouping:  "\\\\d+"\n+  imputedDataFilename:  "test-data/limbo/imputedDataFilename.txt"\n+  imputedQNLTDataFile:  "test-data/limbo/imputedQNLTDataFile.txt"\n+  anovaKseaMetadata:    "test-data/limbo/anovaKseaMetadata.txt"\n+  oneWayManyCategories: !r c("aov", "kruskal.test", "oneway.test")[1]\n+  oneWayTwoCategories:  !r c("aov", "kruskal.test", "oneway.test")[3]\n+  kseaCutoffStatistic:  !r c("p.value", "FDR")[2]\n+  kseaCutoffThreshold:  !r c( 0.1, 0.05)[2]\n+  kseaMinKinaseCount:   1\n+  intensityHeatmapRows: 75\n+---\n+<!--\n+  kseaCutoffStatistic:  !r c("p.value", "FDR")[2]\n+  kseaCutoffThreshold:  !r c(0.05, 0.1)[1]\n+\n+  alphaFile:            "test-data/alpha_levels.tabular"\n+  inputFile:            "test-data/test_input_for_anova.tabular"\n+  preprocDb:            "test-data/test_input_for_anova.sqlite"\n+  kseaAppPrepDb:        !r c(":memory:", "test-data/mqppep.sqlite")[2]\n+\n+  alphaFile:            "test-data/alpha_levels.tabular"\n+  inputFile:            "test-data/UT_phospho_ST_sites.preproc.tabular"\n+  preprocDb:            "test-data/UT_phospho_ST_sites.preproc.sqlite"\n+  kseaAppPrepDb:        !r c(":memory:", "test-data/UT_phospho_ST_sites.ksea.sqlite")[2]\n+\n+  alphaFile:            "test-data/alpha_levels.tabular"\n+  inputFile:            "test-data/pY_Sites_NancyDu.txt.ppep_intensities.ppep_map.preproc.tabular"\n+  preprocDb:            "test-data/pY_Sites_NancyDu.txt.ppep_intensities.ppep_map.preproc.sqlite"\n+  kseaAppPrepDb:        !r c(":memory:", "test-data/pST_Sites_NancyDu.ksea.sqlite")[2]\n+\n+  alphaFile:            "test-data/alpha_levels.tabular"\n+  inputFile:            "test-data/pST_Sites_NancyDu.txt.preproc.tabular"\n+  preprocDb:            "test-data/pST_Sites_NancyDu.txt.preproc.sqlite"\n+  kseaAppPrepDb:        !r c(":memory:", "test-data/pST_Sites_NancyDu.ksea.sqlite")[2]\n+\n+  inputFile:            "test-data/density_failure.preproc_tab.tabular"\n+  kseaAppPrepDb:        !r c(":memory:", "mqppep.sqlite")[2]\n+  latex_document: default\n+-->\n+```{r setup, include = FALSE}\n+#ref for debugging: https://yihui.org/tinytex/r/#debugging\n+options(tinytex.verbose = TRUE)\n+\n+# ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285\n+# ref for top and bottom struts: https://tex.stackexchange.com/a/50355\n+knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10))\n+\n+# freeze the random number generator so the same results will be produced\n+#  from run to run\n+set.seed(28571)\n+\n+### LIBRARIES\n+library(gplots)\n+library(DBI)\n+library(RSQLite)\n+# Suppress "Warning: no DISPLAY variable so Tk is not available"\n+suppressWarnings(suppressMessages(library(sqldf)))\n+\n+# required but not added to search list:\n+# - DBI\n+# - RSQLite\n+# - ggplot2\n+# - knitr\n+# - latex2exp\n+# - preprocessCore\n+# - reshape2\n+# - vioplot'..b's_p f\n+      LEFT JOIN kinase_ppep_label k\n+        ON f.Phosphopeptide = k.ppep,\n+    impish q\n+  WHERE\n+    f.Phosphopeptide = q.Phosphopeptide\n+  "\n+data_table_imputed <- sqldf(data_table_imputed_sql)\n+# Zap the duplicated \'Phosphopeptide\' column named \'ppep\'\n+data_table_imputed <-\n+    data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))]\n+\n+# Output with imputed, un-normalized data\n+\n+write.table(\n+    data_table_imputed\n+  , file = imputed_data_filename\n+  , sep = "\\t"\n+  , col.names = TRUE\n+  , row.names = FALSE\n+  , quote = FALSE\n+  )\n+\n+\n+#output quantile normalized data\n+impish <- cbind(rownames(quant_data_imp_qn_log), quant_data_imp_qn_log)\n+colnames(impish)[1] <- "Phosphopeptide"\n+data_table_imputed <- sqldf(data_table_imputed_sql)\n+# Zap the duplicated \'Phosphopeptide\' column named \'ppep\'\n+data_table_imputed <-\n+    data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))]\n+write.table(\n+  data_table_imputed,\n+  file = imp_qn_lt_data_filenm,\n+  sep = "\\t",\n+  col.names = TRUE,\n+  row.names = FALSE,\n+  quote = FALSE\n+)\n+\n+ppep_kinase <- sqldf("\n+  SELECT DISTINCT k.ppep, k.kinase\n+    FROM (\n+      SELECT DISTINCT gene AS kinase, SUB_MOD_RSD AS ppep\n+        FROM pseudo_ksdata\n+        WHERE GENE IN (SELECT kinase FROM enriched_kinases)\n+      ) k\n+    ORDER BY k.ppep, k.kinase\n+  ")\n+\n+RSQLite::dbWriteTable(\n+  conn = db,\n+  name = "ksea_enriched_ks",\n+  value = ppep_kinase,\n+  append = FALSE\n+  )\n+\n+RSQLite::dbWriteTable(\n+  conn = db,\n+  name = "anova_signif",\n+  value = p_value_data,\n+  append = FALSE\n+  )\n+\n+  ddl_exec(db, "\n+    DROP VIEW IF EXISTS stats_metadata_v;\n+    "\n+  )\n+  dml_no_rows_exec(db, "\n+      CREATE VIEW stats_metadata_v\n+        AS\n+      SELECT DISTINCT  m.*,\n+          p.raw_anova_p,\n+          p.fdr_adjusted_anova_p,\n+          kek.kinase AS ksea_enrichments\n+        FROM\n+          mrgfltr_metadata_view m\n+            LEFT JOIN anova_signif p\n+              ON m.phospho_peptide = p.phosphopeptide\n+            LEFT JOIN ksea_enriched_ks kek\n+              ON m.phospho_peptide = kek.ppep\n+      ;\n+    "\n+  )\n+\n+write.table(\n+  dbReadTable(db, "stats_metadata_v"),\n+  file = anova_ksea_mtdt_file,\n+  sep = "\\t",\n+  col.names = TRUE,\n+  row.names = FALSE,\n+  quote = FALSE\n+  )\n+\n+\n+```\n+\n+```{r parmlist, echo = FALSE, fig.dim = c(9, 10), results = \'asis\'}\n+cat("\\\\leavevmode\\n\\n\\n")\n+\n+# write parameters to report\n+\n+param_unlist <- unlist(as.list(params))\n+param_df <- data.frame(\n+  parameter = paste0("\\\\verb@", names(param_unlist), "@"),\n+  value = paste0("\\\\verb@", gsub("$", "\\\\$", param_unlist, fixed = TRUE), "@")\n+  )\n+\n+data_frame_latex(\n+  x = param_df,\n+  justification = "p{0.35\\\\linewidth} p{0.6\\\\linewidth}",\n+  centered = TRUE,\n+  caption = "Input parameters",\n+  anchor = const_table_anchor_bp,\n+  underscore_whack = FALSE\n+  )\n+\n+# write parameters to SQLite output\n+\n+mqppep_anova_script_param_df <- data.frame(\n+  script    = "mqppep_anova_script.Rmd",\n+  parameter = names(param_unlist),\n+  value     = param_unlist\n+  )\n+ddl_exec(db, "\n+  DROP TABLE IF EXISTS script_parameter;\n+  "\n+)\n+ddl_exec(db, "\n+  CREATE TABLE IF NOT EXISTS script_parameter(\n+    script    TEXT,\n+    parameter TEXT,\n+    value     ANY,\n+    UNIQUE (script, parameter) ON CONFLICT REPLACE\n+    )\n+    ;\n+  "\n+)\n+RSQLite::dbWriteTable(\n+  conn = db,\n+  name = "script_parameter",\n+  value = mqppep_anova_script_param_df,\n+  append = TRUE\n+)\n+\n+# We are done with output\n+RSQLite::dbDisconnect(db)\n+```\n+<!--\n+There\'s gotta be a better way...\n+\n+loaded_packages_df <-  sessioninfo::package_info("loaded")\n+loaded_packages_df[, "library"] <- as.character(loaded_packages_df$library)\n+loaded_packages_df <- data.frame(\n+  package = loaded_packages_df$package,\n+  version = loaded_packages_df$loadedversion,\n+  date    = loaded_packages_df$date\n+  )\n+data_frame_latex(\n+  x = loaded_packages_df,\n+  justification = "l | l l",\n+  centered = FALSE,\n+  caption = "Loaded R packages",\n+  anchor = const_table_anchor_bp\n+  )\n+-->\n'
b
diff -r 000000000000 -r dbff53e6f75f mqppep_mrgfltr.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mqppep_mrgfltr.py Mon Jul 11 19:22:25 2022 +0000
[
b'@@ -0,0 +1,1551 @@\n+#!/usr/bin/env python\n+\n+# Import the packages needed\n+import argparse\n+import operator  # for operator.itemgetter\n+import os.path\n+import re\n+import shutil  # for shutil.copyfile(src, dest)\n+import sqlite3 as sql\n+import sys  # import the sys module for exc_info\n+import time\n+import traceback  # for formatting stack-trace\n+from codecs import getreader as cx_getreader\n+\n+import numpy as np\n+import pandas\n+\n+# global constants\n+N_A = "N/A"\n+\n+\n+# ref: https://stackoverflow.com/a/8915613/15509512\n+#   answers: "How to handle exceptions in a list comprehensions"\n+#   usage:\n+#       from math import log\n+#       eggs = [1,3,0,3,2]\n+#       print([x for x in [catch(log, egg) for egg in eggs] if x is not None])\n+#   producing:\n+#       for <built-in function log>\n+#         with args (0,)\n+#         exception: math domain error\n+#       [0.0, 1.0986122886681098, 1.0986122886681098, 0.6931471805599453]\n+def catch(func, *args, handle=lambda e: e, **kwargs):\n+\n+    try:\n+        return func(*args, **kwargs)\n+    except Exception as e:\n+        print("For %s" % str(func))\n+        print("  with args %s" % str(args))\n+        print("  caught exception: %s" % str(e))\n+        (ty, va, tb) = sys.exc_info()\n+        print("  stack trace: " + str(traceback.format_exception(ty, va, tb)))\n+        exit(-1)\n+        return None\n+\n+\n+def whine(func, *args, handle=lambda e: e, **kwargs):\n+\n+    try:\n+        return func(*args, **kwargs)\n+    except Exception as e:\n+        print("Warning: For %s" % str(func))\n+        print("  with args %s" % str(args))\n+        print("  caught exception: %s" % str(e))\n+        (ty, va, tb) = sys.exc_info()\n+        print("  stack trace: " + str(traceback.format_exception(ty, va, tb)))\n+        return None\n+\n+\n+def ppep_join(x):\n+    x = [i for i in x if N_A != i]\n+    result = "%s" % " | ".join(x)\n+    if result != "":\n+        return result\n+    else:\n+        return N_A\n+\n+\n+def melt_join(x):\n+    tmp = {key.lower(): key for key in x}\n+    result = "%s" % " | ".join([tmp[key] for key in tmp])\n+    return result\n+\n+\n+def __main__():\n+    # Parse Command Line\n+    parser = argparse.ArgumentParser(\n+        description="Phopsphoproteomic Enrichment Pipeline Merge and Filter."\n+    )\n+\n+    # inputs:\n+    #   Phosphopeptide data for experimental results, including the intensities\n+    #   and the mapping to kinase domains, in tabular format.\n+    parser.add_argument(\n+        "--phosphopeptides",\n+        "-p",\n+        nargs=1,\n+        required=True,\n+        dest="phosphopeptides",\n+        help="Phosphopeptide data for experimental results, including the intensities and the mapping to kinase domains, in tabular format",\n+    )\n+    #   UniProtKB/SwissProt DB input, SQLite\n+    parser.add_argument(\n+        "--ppep_mapping_db",\n+        "-d",\n+        nargs=1,\n+        required=True,\n+        dest="ppep_mapping_db",\n+        help="UniProtKB/SwissProt SQLite Database",\n+    )\n+    #   species to limit records chosed from PhosPhositesPlus\n+    parser.add_argument(\n+        "--species",\n+        "-x",\n+        nargs=1,\n+        required=False,\n+        default=[],\n+        dest="species",\n+        help="limit PhosphoSitePlus records to indicated species (field may be empty)",\n+    )\n+\n+    # outputs:\n+    #   tabular output\n+    parser.add_argument(\n+        "--mrgfltr_tab",\n+        "-o",\n+        nargs=1,\n+        required=True,\n+        dest="mrgfltr_tab",\n+        help="Tabular output file for results",\n+    )\n+    #   CSV output\n+    parser.add_argument(\n+        "--mrgfltr_csv",\n+        "-c",\n+        nargs=1,\n+        required=True,\n+        dest="mrgfltr_csv",\n+        help="CSV output file for results",\n+    )\n+    #   SQLite output\n+    parser.add_argument(\n+        "--mrgfltr_sqlite",\n+        "-S",\n+        nargs=1,\n+        required=True,\n+        dest="mrgfltr_sqlite",\n+        help="SQLite output file for results",\n+    )\n+\n+    # "Make it so!" (parse the arguments)\n+    optio'..b'cur.execute(\n+            CITATION_INSERT_STMT,\n+            ("mrgfltr_metadata_view", CITATION_INSERT_PSP_REF),\n+        )\n+        cur.execute(\n+            CITATION_INSERT_STMT, ("mrgfltr_metadata", CITATION_INSERT_PSP_REF)\n+        )\n+\n+        # Read ppep-to-sequence LUT\n+        ppep_lut_df = pandas.read_sql_query(PPEP_ID_SQL, conn)\n+        # write only metadata for merged/filtered records to SQLite\n+        mrgfltr_metadata_df = output_df.copy()\n+        # replace phosphopeptide seq with ppep.id\n+        mrgfltr_metadata_df = ppep_lut_df.merge(\n+            mrgfltr_metadata_df,\n+            left_on="ppep_seq",\n+            right_on=PHOSPHOPEPTIDE,\n+            how="inner",\n+        )\n+        mrgfltr_metadata_df.drop(\n+            columns=[PHOSPHOPEPTIDE, "ppep_seq"], inplace=True\n+        )\n+        # rename columns\n+        mrgfltr_metadata_df.columns = MRGFLTR_METADATA_COLUMNS\n+        mrgfltr_metadata_df.to_sql(\n+            "mrgfltr_metadata",\n+            con=conn,\n+            if_exists="append",\n+            index=False,\n+            method="multi",\n+        )\n+\n+        # Close SwissProt SQLite database\n+        conn.close()\n+        # ----------- Write merge/filter metadata to SQLite database (finish) -----------\n+\n+        output_df = output_df.merge(\n+            quant_data,\n+            how="right",\n+            left_on=PHOSPHOPEPTIDE,\n+            right_on=PHOSPHOPEPTIDE_MATCH,\n+        )\n+        output_cols = output_df.columns.tolist()\n+        output_cols = output_cols[:-1]\n+        output_df = output_df[output_cols]\n+\n+        # cosmetic changes to Upstream column\n+        output_df[PUTATIVE_UPSTREAM_DOMAINS] = output_df[\n+            PUTATIVE_UPSTREAM_DOMAINS\n+        ].fillna(\n+            ""\n+        )  # fill the NaN with "" for those Phosphopeptides that got a "WARNING: Failed match for " in the upstream mapping\n+        us_series = pandas.Series(output_df[PUTATIVE_UPSTREAM_DOMAINS])\n+        i = 0\n+        while i < len(us_series):\n+            # turn blanks into N_A to signify the info was searched for but cannot be found\n+            if us_series[i] == "":\n+                us_series[i] = N_A\n+            i += 1\n+        output_df[PUTATIVE_UPSTREAM_DOMAINS] = us_series\n+\n+        end_time = time.process_time()  # timer\n+        print(\n+            "%0.6f establisheed output [3]" % (end_time - start_time,),\n+            file=sys.stderr,\n+        )  # timer\n+\n+        (output_rows, output_cols) = output_df.shape\n+\n+        output_df = output_df.convert_dtypes(convert_integer=True)\n+\n+        # Output onto Final CSV file\n+        output_df.to_csv(output_filename_csv, index=False)\n+        output_df.to_csv(\n+            output_filename_tab, quoting=None, sep="\\t", index=False\n+        )\n+\n+        end_time = time.process_time()  # timer\n+        print(\n+            "%0.6f wrote output [4]" % (end_time - start_time,),\n+            file=sys.stderr,\n+        )  # timer\n+\n+        print(\n+            "{:>10} phosphopeptides written to output".format(str(output_rows))\n+        )\n+\n+        end_time = time.process_time()  # timer\n+        print(\n+            "%0.6f seconds of non-system CPU time were consumed"\n+            % (end_time - start_time,),\n+            file=sys.stderr,\n+        )  # timer\n+\n+        # Rev. 7/1/2016\n+        # Rev. 7/3/2016 : fill NaN in Upstream column to replace to N/A\'s\n+        # Rev. 7/3/2016:  renamed Upstream column to PUTATIVE_UPSTREAM_DOMAINS\n+        # Rev. 12/2/2021: Converted to Python from ipynb; use fast Aho-Corasick searching; \\\n+        #                read from SwissProt SQLite database\n+        # Rev. 12/9/2021: Transfer code to Galaxy tool wrapper\n+\n+        #\n+        # copied from Excel Output Script.ipynb END #\n+        #\n+\n+    try:\n+        catch(\n+            mqpep_getswissprot,\n+        )\n+        exit(0)\n+    except Exception as e:\n+        exit("Internal error running mqpep_getswissprot(): %s" % (e))\n+\n+\n+if __name__ == "__main__":\n+    __main__()\n'
b
diff -r 000000000000 -r dbff53e6f75f search_ppep.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/search_ppep.py Mon Jul 11 19:22:25 2022 +0000
[
b'@@ -0,0 +1,560 @@\n+#!/usr/bin/env python\n+# Search and memoize phosphopeptides in Swiss-Prot SQLite table UniProtKB\n+\n+import argparse\n+import os.path\n+import re\n+import sqlite3\n+import sys  # import the sys module for exc_info\n+import time\n+import traceback  # import the traceback module for format_exception\n+from codecs import getreader as cx_getreader\n+\n+# For Aho-Corasick search for fixed set of substrings\n+# - add_word\n+# - make_automaton\n+# - iter\n+import ahocorasick\n+\n+\n+# ref: https://stackoverflow.com/a/8915613/15509512\n+#   answers: "How to handle exceptions in a list comprehensions"\n+#   usage:\n+#       from math import log\n+#       eggs = [1,3,0,3,2]\n+#       print([x for x in [catch(log, egg) for egg in eggs] if x is not None])\n+#   producing:\n+#       for <built-in function log>\n+#         with args (0,)\n+#         exception: math domain error\n+#       [0.0, 1.0986122886681098, 1.0986122886681098, 0.6931471805599453]\n+def catch(func, *args, handle=lambda e: e, **kwargs):\n+\n+    try:\n+        return func(*args, **kwargs)\n+    except Exception as e:\n+        print("For %s" % str(func))\n+        print("  with args %s" % str(args))\n+        print("  caught exception: %s" % str(e))\n+        (ty, va, tb) = sys.exc_info()\n+        print("  stack trace: " + str(traceback.format_exception(ty, va, tb)))\n+        # exit(-1)\n+        return None  # was handle(e)\n+\n+\n+def __main__():\n+\n+    DROP_TABLES_SQL = """\n+        DROP VIEW  IF EXISTS ppep_gene_site_view;\n+        DROP VIEW  IF EXISTS uniprot_view;\n+        DROP VIEW  IF EXISTS uniprotkb_pep_ppep_view;\n+        DROP VIEW  IF EXISTS ppep_intensity_view;\n+        DROP VIEW  IF EXISTS ppep_metadata_view;\n+\n+        DROP TABLE IF EXISTS sample;\n+        DROP TABLE IF EXISTS ppep;\n+        DROP TABLE IF EXISTS site_type;\n+        DROP TABLE IF EXISTS deppep_UniProtKB;\n+        DROP TABLE IF EXISTS deppep;\n+        DROP TABLE IF EXISTS ppep_gene_site;\n+        DROP TABLE IF EXISTS ppep_metadata;\n+        DROP TABLE IF EXISTS ppep_intensity;\n+    """\n+\n+    CREATE_TABLES_SQL = """\n+        CREATE TABLE deppep\n+          ( id INTEGER PRIMARY KEY\n+          , seq TEXT UNIQUE                            ON CONFLICT IGNORE\n+          )\n+          ;\n+        CREATE TABLE deppep_UniProtKB\n+          ( deppep_id    INTEGER REFERENCES deppep(id) ON DELETE CASCADE\n+          , UniProtKB_id TEXT REFERENCES UniProtKB(id) ON DELETE CASCADE\n+          , pos_start    INTEGER\n+          , pos_end      INTEGER\n+          , PRIMARY KEY (deppep_id, UniProtKB_id, pos_start, pos_end)\n+                                                       ON CONFLICT IGNORE\n+          )\n+          ;\n+        CREATE TABLE ppep\n+          ( id        INTEGER PRIMARY KEY\n+          , deppep_id INTEGER REFERENCES deppep(id)    ON DELETE CASCADE\n+          , seq       TEXT UNIQUE                      ON CONFLICT IGNORE\n+          , scrubbed  TEXT\n+          );\n+        CREATE TABLE site_type\n+          ( id        INTEGER PRIMARY KEY\n+          , type_name TEXT UNIQUE                      ON CONFLICT IGNORE\n+          );\n+        CREATE INDEX idx_ppep_scrubbed on ppep(scrubbed)\n+          ;\n+        CREATE TABLE sample\n+          ( id        INTEGER PRIMARY KEY\n+          , name      TEXT UNIQUE                      ON CONFLICT IGNORE\n+          )\n+          ;\n+        CREATE VIEW uniprot_view AS\n+          SELECT DISTINCT\n+              Uniprot_ID\n+            , Description\n+            , Organism_Name\n+            , Organism_ID\n+            , Gene_Name\n+            , PE\n+            , SV\n+            , Sequence\n+            , Description ||\n+                CASE WHEN Organism_Name = \'N/A\'\n+                     THEN \'\'\n+                     ELSE \' OS=\'|| Organism_Name\n+                     END ||\n+                CASE WHEN Organism_ID = -1\n+                     THEN \'\'\n+                     ELSE \' OX=\'|| Organism_ID\n+                     END ||\n+                CASE WHEN Gene_Name = \'N/A\'\n+                     THEN '..b'    "\\nEach of the following sequences is associated with several accession IDs (which are listed in the first column) but the same gene ID (which is listed in the second column)."\n+            )\n+        if row[2] != old_seq:\n+            old_seq = row[2]\n+            duplicate_count += 1\n+            if options.warn_duplicates:\n+                print("\\n%s\\t%s\\t%s" % row)\n+        else:\n+            if options.warn_duplicates:\n+                print("%s\\t%s" % (row[0], row[1]))\n+    if duplicate_count > 0:\n+        print(\n+            "\\n%d sequences have duplicated accession IDs\\n" % duplicate_count\n+        )\n+\n+    print("%s accession sequences will be searched\\n" % sequence_count)\n+\n+    # print(auto.dump())\n+\n+    # Convert the trie to an automaton (a finite-state machine)\n+    auto.make_automaton()\n+\n+    # Execute query for seqs and metadata without fetching the results yet\n+    uniprot_seq_and_id = cur.execute(UNIPROT_SEQ_AND_ID_SQL)\n+    while 1:\n+        batch = uniprot_seq_and_id.fetchmany(size=50)\n+        if not batch:\n+            break\n+        for Sequence, UniProtKB_id in batch:\n+            if Sequence is not None:\n+                for end_index, (insert_order, original_value) in auto.iter(\n+                    Sequence\n+                ):\n+                    ker.execute(\n+                        """\n+                      INSERT INTO deppep_UniProtKB\n+                        (deppep_id,UniProtKB_id,pos_start,pos_end)\n+                      VALUES (?,?,?,?)\n+                      """,\n+                        (\n+                            insert_order,\n+                            UniProtKB_id,\n+                            1 + end_index - len(original_value),\n+                            end_index,\n+                        ),\n+                    )\n+            else:\n+                raise ValueError(\n+                    "UniProtKB_id %s, but Sequence is None: Check whether SwissProt file is missing sequence for this ID"\n+                    % (UniProtKB_id,)\n+                )\n+    ker.execute(\n+        """\n+        SELECT   count(*) || \' accession-peptide-phosphopeptide combinations were found\'\n+        FROM     uniprotkb_pep_ppep_view\n+        """\n+    )\n+    for row in ker.fetchall():\n+        print(row[0])\n+\n+    ker.execute(\n+        """\n+      SELECT   count(*) || \' accession matches were found\', count(*) AS accession_count\n+      FROM     (\n+        SELECT   accession\n+        FROM     uniprotkb_pep_ppep_view\n+        GROUP BY accession\n+        )\n+      """\n+    )\n+    for row in ker.fetchall():\n+        print(row[0])\n+\n+    ker.execute(\n+        """\n+      SELECT   count(*) || \' peptide matches were found\'\n+      FROM     (\n+        SELECT   peptide\n+        FROM     uniprotkb_pep_ppep_view\n+        GROUP BY peptide\n+        )\n+      """\n+    )\n+    for row in ker.fetchall():\n+        print(row[0])\n+\n+    ker.execute(\n+        """\n+      SELECT   count(*) || \' phosphopeptide matches were found\', count(*) AS phosphopeptide_count\n+      FROM     (\n+        SELECT   phosphopeptide\n+        FROM     uniprotkb_pep_ppep_view\n+        GROUP BY phosphopeptide\n+        )\n+      """\n+    )\n+    for row in ker.fetchall():\n+        print(row[0])\n+\n+    # link peptides not found in sequence database to a dummy sequence-record\n+    ker.execute(\n+        """\n+        INSERT INTO deppep_UniProtKB(deppep_id,UniProtKB_id,pos_start,pos_end)\n+          SELECT id, \'No Uniprot_ID\', 0, 0\n+          FROM   deppep\n+          WHERE  id NOT IN (SELECT deppep_id FROM deppep_UniProtKB)\n+        """\n+    )\n+\n+    con.commit()\n+    ker.execute("vacuum")\n+    con.close()\n+\n+\n+if __name__ == "__main__":\n+    wrap_start_time = time.perf_counter()\n+    __main__()\n+    wrap_stop_time = time.perf_counter()\n+    # print(wrap_start_time)\n+    # print(wrap_stop_time)\n+    print(\n+        "\\nThe matching process took %d milliseconds to run.\\n"\n+        % ((wrap_stop_time - wrap_start_time) * 1000),\n+    )\n+\n+# vim: sw=4 ts=4 et ai :\n'
b
diff -r 000000000000 -r dbff53e6f75f test-data/alpha_levels.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/alpha_levels.tabular Mon Jul 11 19:22:25 2022 +0000
b
@@ -0,0 +1,3 @@
+0.05
+0.1
+0.2
b
diff -r 000000000000 -r dbff53e6f75f test-data/pSTY_motifs.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pSTY_motifs.tabular Mon Jul 11 19:22:25 2022 +0000
b
b'@@ -0,0 +1,355 @@\n+"counter"\t"pcre"\t"symbol"\t"description"\t"pubmed_id"\t"classification"\t"source"\n+"1"\t"R.R..(pS|pT)(F|L)"\t"PKB_group"\t"Akt kinase"\t"https://pubmed.ncbi.nlm.nih.gov/?term=8985174"\t"kinase substrate"\t"HPRD"\n+"2"\t"R.R..(pS|pT)"\t"PKB_group"\t"Akt kinase"\t"https://pubmed.ncbi.nlm.nih.gov/?term=10945990"\t"kinase substrate"\t"HPRD"\n+"3"\t"GRART(S|T)pSFAE"\t"PKB_group"\t"Akt kinase"\t"https://pubmed.ncbi.nlm.nih.gov/?term=8524413"\t"kinase substrate"\t"HPRD"\n+"4"\t"(R|Q|K)(R|K|N|Q|P|H)(R|K)(R|S|T)(N|K|Q|H|D|P)pS(F|W|I|M|N|S)(S|T|H)(R|S|K)(S|T|P|Q)"\t"PKB_group"\t"Akt kinase"\t"https://pubmed.ncbi.nlm.nih.gov/?term=15782149"\t"kinase substrate"\t"HPRD"\n+"5"\t"(R|K).(R|K)(S|T).pS"\t"PKB_group"\t"Akt kinase"\t"https://pubmed.ncbi.nlm.nih.gov/?term=16273072"\t"kinase substrate"\t"HPRD"\n+"6"\t"(M|V|L|I|F)(R|K|H)...(pS|pT)...(M|V|L|I|F)"\t"AMPK_group"\t"AMP-activated protein kinase"\t"https://pubmed.ncbi.nlm.nih.gov/?term=7902296,7698321"\t"kinase substrate"\t"HPRD"\n+"7"\t"(M|V|L|I)..(R|K|H).(pS|pT)...(M|V|L|I)"\t"AMPK_group"\t"AMP-activated protein kinase"\t"https://pubmed.ncbi.nlm.nih.gov/?term=7902296"\t"kinase substrate"\t"HPRD"\n+"8"\t"(M|V|L|I|F)(R|K|H)..(pS|pT)...(M|V|L|I|F)"\t"AMPK_group"\t"AMP-activated protein kinase"\t"https://pubmed.ncbi.nlm.nih.gov/?term=7698321"\t"kinase substrate"\t"HPRD"\n+"9"\t"(R|K).R..pS...(R|K)"\t"AMPK_group"\t"AMP-activated protein kinase 2 substrate motif"\t"https://pubmed.ncbi.nlm.nih.gov/?term=7698321"\t"kinase substrate"\t"HPRD"\n+"10"\t"(P|L|I|M).(L|I|D|E)pSQ"\t"ATM"\t"ATM kinase"\t"https://pubmed.ncbi.nlm.nih.gov/?term=10608806"\t"kinase substrate"\t"HPRD"\n+"11"\t"LpSQE"\t"ATM"\t"ATM kinase"\t"https://pubmed.ncbi.nlm.nih.gov/?term=10801797,11544175"\t"kinase substrate"\t"HPRD"\n+"12"\t"pSQ"\t"ATM"\t"ATM kinase"\t"https://pubmed.ncbi.nlm.nih.gov/?term=16273072"\t"kinase substrate"\t"HPRD"\n+"13"\t"(R|K|N)R.(pS|pT)(M|L|V|I)"\t"Aurora A"\t"Aurora-A kinase"\t"https://pubmed.ncbi.nlm.nih.gov/?term=16083426"\t"kinase substrate"\t"HPRD"\n+"14"\t"(D|E)(pS|pT)..."\t"GRK-2"\t"b-Adrenergic Receptor kinase"\t"https://pubmed.ncbi.nlm.nih.gov/?term=1645191"\t"kinase substrate"\t"HPRD"\n+"15"\t"HpSTSDD"\t"BCKDK"\t"Branched chain alpha-ketoacid dehydrogenase kinase"\t"https://pubmed.ncbi.nlm.nih.gov/?term=3947057"\t"kinase substrate"\t"HPRD"\n+"16"\t"YRpSVDE"\t"BCKDK"\t"Branched chain alpha-ketoacid dehydrogenase kinase"\t"https://pubmed.ncbi.nlm.nih.gov/?term=3947057"\t"kinase substrate"\t"HPRD"\n+"17"\t"(M|V|L|I|F).R..(pS|pT)...(M|V|L|I|F)"\t"CaM-KI_group"\t"Calmodulin-dependent protein kinase I substrate motif"\t"https://pubmed.ncbi.nlm.nih.gov/?term=9452427,7698321,8022798"\t"kinase substrate"\t"HPRD"\n+"18"\t"(M|I|L|V|F|Y).R..(pS|pT)(M|I|L|V|F|Y)"\t"CaM-KII_alpha"\t"Calmodulin-dependent protein kinase II alpha substrate motif"\t"https://pubmed.ncbi.nlm.nih.gov/?term=9452427"\t"kinase substrate"\t"HPRD"\n+"19"\t"R..(pS|pT)"\t"CaM-KII_group"\t"Calmodulin-dependent protein kinase II substrate motif"\t"https://pubmed.ncbi.nlm.nih.gov/?term=1956339"\t"kinase substrate"\t"HPRD"\n+"20"\t"(K|F)(R|K)(Q|M)(Q|M|K|L|F)pS(F|I|M|L|V)(D|E|I)(L|M|K|I)(F|K)"\t"CaM-KII_group"\t"Calmodulin-dependent protein kinase II substrate motif"\t"https://pubmed.ncbi.nlm.nih.gov/?term=8887677"\t"kinase substrate"\t"HPRD"\n+"21"\t"(M|V|L|I|F).(R|K)..(pS|pT).."\t"CaM-KII_group"\t"Calmodulin-dependent protein kinase II substrate motif"\t"https://pubmed.ncbi.nlm.nih.gov/?term=8280084"\t"kinase substrate"\t"HPRD"\n+"22"\t"R..pS"\t"CaM-KII_group"\t"Calmodulin-dependent protein kinase II substrate motif"\t"https://pubmed.ncbi.nlm.nih.gov/?term=16273072"\t"kinase substrate"\t"HPRD"\n+"23"\t"VPGKARKKpSSCQLL"\t"CaM-KIV"\t"Calmodulin-dependent protein kinase IV substrate motif"\t"https://pubmed.ncbi.nlm.nih.gov/?term=1901412"\t"kinase substrate"\t"HPRD"\n+"24"\t"PLARTLpSVAGLP"\t"CaM-KIV"\t"Calmodulin-dependent protein kinase IV substrate motif"\t"https://pubmed.ncbi.nlm.nih.gov/?term=1309765"\t"kinase substrate"\t"HPRD"\n+"25"\t"(M|I|L|V|F|Y).R..(pS|pT)"\t"CaM-KIV"\t"Calmodulin-dependent protein kinase IV substrate motif"\t"https://pubmed.ncbi.nlm.nih.gov/?term=9452427"\t"kinase substr'..b'B domain binding motif"\t"https://pubmed.ncbi.nlm.nih.gov/?term=8662772"\t"domain binding"\t"HPRD"\n+"63"\t"HN(M|L|V|I)(M|L|V|I|N)NP(S|T)pY"\t"ShcC PTB"\t"ShcC PTB domain binding motif"\t"https://pubmed.ncbi.nlm.nih.gov/?term=8662772"\t"domain binding"\t"HPRD"\n+"1"\t"R.(pS|pT)"\t"PKA_group"\t"PKA"\t"https://pubmed.ncbi.nlm.nih.gov/1956339"\t"kinase substrate"\t"Phosida"\n+"2"\t"R(R|K).(pS|pT)"\t"PKA_group"\t"PKA"\t"https://pubmed.ncbi.nlm.nih.gov/1956339"\t"kinase substrate"\t"Phosida"\n+"3"\t"KR..(pS|pT)"\t"PKA_group"\t"PKA"\t"https://pubmed.ncbi.nlm.nih.gov/1956339"\t"kinase substrate"\t"Phosida"\n+"4"\t"S..(pS|pT)"\t"CK1_group"\t"CK1"\t"https://pubmed.ncbi.nlm.nih.gov/1956339"\t"kinase substrate"\t"Phosida"\n+"5"\t"(S|T)...pS"\t"CK1_group"\t"CK1"\t"https://pubmed.ncbi.nlm.nih.gov/1956339"\t"kinase substrate"\t"Phosida"\n+"6"\t"(pS|pT)..E"\t"CK2_group"\t"CK2"\t"https://pubmed.ncbi.nlm.nih.gov/1956339"\t"kinase substrate"\t"Phosida"\n+"7"\t"pS...S"\t"GSK3"\t"GSK3"\t"https://pubmed.ncbi.nlm.nih.gov/2156841"\t"kinase substrate"\t"Phosida"\n+"8"\t"(pS|pT)P.(K|R)"\t"CDK2"\t"CDK2"\t"https://pubmed.ncbi.nlm.nih.gov/1956339"\t"kinase substrate"\t"Phosida"\n+"9"\t"R..(pS|pT)"\t"CaM-KII_group"\t"CAMK2"\t"https://pubmed.ncbi.nlm.nih.gov/1956339"\t"kinase substrate"\t"Phosida"\n+"10"\t"R..(pS|pT)V"\t"CaM-KII_group"\t"CAMK2"\t"https://pubmed.ncbi.nlm.nih.gov/1956339"\t"kinase substrate"\t"Phosida"\n+"11"\t"P.(pS|pT)P"\t"MAP2K_group"\t"ERK/MAPK"\t"https://pubmed.ncbi.nlm.nih.gov/8325833"\t"kinase substrate"\t"Phosida"\n+"12"\t"V.(pS|pT)P"\t"MAP2K_group"\t"ERK/MAPK"\t"https://pubmed.ncbi.nlm.nih.gov/8325833"\t"kinase substrate"\t"Phosida"\n+"13"\t"PE(pS|pT)P"\t"MAP2K_group"\t"ERK/MAPK"\t"https://pubmed.ncbi.nlm.nih.gov/8325833"\t"kinase substrate"\t"Phosida"\n+"14"\t"R(R|S|T).(pS|pT).(S|T)"\t"PKB_group"\t"PKB/AKT"\t"https://pubmed.ncbi.nlm.nih.gov/15789031"\t"kinase substrate"\t"Phosida"\n+"15"\t"R.R..(pS|pT)"\t"PKB_group"\t"PKB/AKT"\t"https://pubmed.ncbi.nlm.nih.gov/15789031"\t"kinase substrate"\t"Phosida"\n+"16"\t"R..(pS|pT).R"\t"PKC_group"\t"PKC"\t"https://pubmed.ncbi.nlm.nih.gov/15782149"\t"kinase substrate"\t"Phosida"\n+"17"\t"(L|V|I).(R|K)..(pS|pT)"\t"PKD"\t"PKD"\t"https://pubmed.ncbi.nlm.nih.gov/15782149"\t"kinase substrate"\t"Phosida"\n+"18"\t"(I|E|V)pY(E|G)(E|D|P|N)(I|V|L)"\t"Lck"\t"LCK"\t"https://pubmed.ncbi.nlm.nih.gov/7845468"\t"kinase substrate"\t"Phosida"\n+"19"\t"(I|V|L)pY..(P|F)"\t"ABL1"\t"ABL"\t"https://pubmed.ncbi.nlm.nih.gov/7845468"\t"kinase substrate"\t"Phosida"\n+"20"\t"(E|D)..pY..(D|E|A|G|S|T)"\t"SRC_group"\t"SRC"\t"https://pubmed.ncbi.nlm.nih.gov/16273072"\t"kinase substrate"\t"Phosida"\n+"21"\t"pY..(I|L|V|M)"\t"ALK"\t"ALK"\t"https://pubmed.ncbi.nlm.nih.gov/16273072"\t"kinase substrate"\t"Phosida"\n+"22"\t"(D|P|S|A|E|N).pY(V|L|D|E|I|N|P)"\t"EGFR"\t"EGFR"\t"https://pubmed.ncbi.nlm.nih.gov/16381900"\t"kinase substrate"\t"Phosida"\n+"23"\t"(pS|pT)P.(K|R)"\t"CDK1"\t"CDK1"\t"https://pubmed.ncbi.nlm.nih.gov/12501191"\t"kinase substrate"\t"Phosida"\n+"24"\t"(pS|pT)P(K|R)"\t"CDK1"\t"CDK1"\t"https://pubmed.ncbi.nlm.nih.gov/12501191"\t"kinase substrate"\t"Phosida"\n+"25"\t"(R|K).(pS|pT)(I|L|V)"\t"Aurora A"\t"AURORA"\t"https://pubmed.ncbi.nlm.nih.gov/12408861"\t"kinase substrate"\t"Phosida"\n+"26"\t"(R|K|N)R.(pS|pT)(M|L|V|I)"\t"Aurora A"\t"AURORA-A"\t"https://pubmed.ncbi.nlm.nih.gov/16083426"\t"kinase substrate"\t"Phosida"\n+"27"\t"(D|E).(pS|pT)(V|I|L|M).(D|E)"\t"PLK"\t"PLK"\t"https://pubmed.ncbi.nlm.nih.gov/12738781"\t"kinase substrate"\t"Phosida"\n+"28"\t"(E|D).(pS|pT)(F|L|I|Y|W|V|M)"\t"PLK"\t"PLK1"\t"https://pubmed.ncbi.nlm.nih.gov/12738781"\t"kinase substrate"\t"Phosida"\n+"29"\t"L..(pS|pT)"\t"NEK6"\t"NEK6"\t"https://pubmed.ncbi.nlm.nih.gov/12023960"\t"kinase substrate"\t"Phosida"\n+"30"\t"L.R..(pS|pT)"\t"CHK1"\t"CHK1/2"\t"https://pubmed.ncbi.nlm.nih.gov/17464182"\t"kinase substrate"\t"Phosida"\n+"31"\t"(M|I|L|V).(R|K)..(pS|pT)"\t"CHK1"\t"CHK1"\t"https://pubmed.ncbi.nlm.nih.gov/10648819"\t"kinase substrate"\t"Phosida"\n+"32"\t"F..F(pS|pT)(F|Y)"\t"PDK1"\t"PDK1"\t"https://pubmed.ncbi.nlm.nih.gov/11516946"\t"kinase substrate"\t"Phosida"\n+"33"\t"(F|L|M)(R|K)(R|K)(pS|pT)"\t"NIMA"\t"NIMA"\t"https://pubmed.ncbi.nlm.nih.gov/8887677"\t"kinase substrate"\t"Phosida"\n'
b
diff -r 000000000000 -r dbff53e6f75f test-data/test_input_for_anova.sqlite
b
Binary file test-data/test_input_for_anova.sqlite has changed
b
diff -r 000000000000 -r dbff53e6f75f test-data/test_input_for_anova.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_input_for_anova.tabular Mon Jul 11 19:22:25 2022 +0000
b
b'@@ -0,0 +1,24 @@\n+Phosphopeptide\tSequence10\tSequence7\tGene_Name\tPhosphoresidue\tUniProt_ID\tDescription\tFunction Phosphoresidue(PSP=PhosphoSitePlus.org)\tPutative Upstream Kinases(PSP=PhosphoSitePlus.org)/Phosphatases/Binding Domains\tIntensity.shL.1A\tIntensity.shL.1B\tIntensity.shL.1C\tIntensity.shR.2A\tIntensity.shR.2B\tIntensity.shR.2C\n+AAAAPDSRVpSEEENLK\tMAAAAPDSRVpSEEENLKKTPK\tAAPDSRVsEEENLKK\tRRP15\tpS11\tQ9Y3B9\tRRP15_HUMAN RRP15-like protein OS=Homo sapiens OX=9606 GN=RRP15 PE=1 SV=2\tN/A\tCK2alpha | BARD1 Q99728\t38150000\t39445000\t56305000\t55338000\t7010600\t70203000\n+AAAITDMADLEELSRLpSPLPPGpSPGSAAR\tMADLEELSRLpSPLPPGSPGSA; LSRLSPLPPGpSPGSAARGRAE\tLEELSRLsPLPPGSP | LSPLPPGsPGSAARG\tAEBP2; AEBP2\tpS18, pS24; pS18, pS24\tQ6ZN18; Q6ZN18-2\tAEBP2_HUMAN Zinc finger protein AEBP2 OS=Homo sapiens OX=9606 GN=AEBP2 PE=1 SV=2; AEBP2_HUMAN Isoform 2 of Zinc finger protein AEBP2 OS=Homo sapiens OX=9606 GN=AEBP2\tN/A\tN/A\t5416400\t7101800\t385280000\t208060000\t41426000\t352400000\n+ADALQAGASQFETpSAAK\tLQAGASQFETpSAAKLKRKYWW\tGASQFETsAAKLKRK\tVAMP2; VAMP3\tpS80; pS63\tP63027; Q15836\tVAMP2_HUMAN_Vesicle-associated membrane protein 2 OS=Homo sapiens OX=9606 GN=VAMP2 PE=1 SV=3; VAMP3_HUMAN_Vesicle-associated membrane protein 3 OS=Homo sapiens OX=9606 GN=VAMP3 PE=1 SV=3\tN/A\tPKD3 | PKCiota\t44627000\t41445000\t69094000\t42521000\t5738000\t61819000\n+DQKLpSELDDR\tDKVLERDQKLpSELDDRADALQ\tLERDQKLsELDDRAD\tVAMP1; VAMP1; VAMP1; VAMP2; VAMP3\tpS63; pS63; pS63; pS61; pS44\tP23763; P23763-2; P23763-3; P63027; Q15836\tVAMP1_HUMAN_Vesicle-associated membrane protein 1 OS=Homo sapiens OX=9606 GN=VAMP1 PE=1 SV=1; VAMP1_HUMAN_Isoform 3 of Vesicle-associated membrane protein 1 OS=Homo sapiens OX=9606 GN=VAMP1; VAMP1_HUMAN_Isoform 2 of Vesicle-associated membrane protein 1 OS=Homo sapiens OX=9606 GN=VAMP1; VAMP2_HUMAN_Vesicle-associated membrane protein 2 OS=Homo sapiens OX=9606 GN=VAMP2 PE=1 SV=3; VAMP3_HUMAN_Vesicle-associated membrane protein 3 OS=Homo sapiens OX=9606 GN=VAMP3 PE=1 SV=3\tN/A\tCK2alpha | PKAbeta | PKAgamma | PKCiota | PDHK1\t75542000\t44814000\t32924000\t35016000\t11023000\t4669900\n+EFVpSSDESSSGENK\tSESFKSKEFVpSSDESSSGENK\tFKSKEFVsSDESSSG\tSSRP1\tpS667\tQ08945\tSSRP1_HUMAN FACT complex subunit SSRP1 OS=Homo sapiens OX=9606 GN=SSRP1 PE=1 SV=1\tN/A\tCK2alpha | CK2a2 | CDK7 | GSK3\t12562000\t16302000\t23000000\t7857800\t0\t18830000\n+EGMNPSYDEYADpSDEDQHDAYLER\tMNPSYDEYADpSDEDQHDAYLE\tSYDEYADsDEDQHDA\tSSRP1\tpS444\tQ08945\tSSRP1_HUMAN FACT complex subunit SSRP1 OS=Homo sapiens OX=9606 GN=SSRP1 PE=1 SV=1\tN/A\tCK2alpha | CK2a2 | CDK7 | CK1alpha | GRK-2 | PDHK1\t0\t0\t0\t0\t0\t0\n+IGNEEpSDLEEACILPHpSPINVDK\tDDEEKIGNEEpSDLEEACILPH; DLEEACILPHpSPINVDKRPIA\tEKIGNEEsDLEEACI | EACILPHsPINVDKR\tHERC2\tpS1577, pS1588\tO95714\tHERC2_HUMAN E3 ubiquitin-protein ligase HERC2 OS=Homo sapiens OX=9606 GN=HERC2 PE=1 SV=2\tN/A\tCK2alpha | GRK-2 | DOC_WW_Pin1_4 | NEK6\t167764000\t121218000\t155736000\t140640000\t83642000\t128468000\n+IRAEEEDLAAVPFLApSDNEEEEDEK\tEDLAAVPFLApSDNEEEEDEKG\tAAVPFLAsDNEEEED\tHERC2\tpS2928\tO95714\tHERC2_HUMAN E3 ubiquitin-protein ligase HERC2 OS=Homo sapiens OX=9606 GN=HERC2 PE=1 SV=2\tN/A\tCK2alpha\t22562000\t18225000\t9119700\t11689000\t0\t0\n+KGLLApTpSGNDGTIR\tVWCNKKGLLApTSGNDGTIRVW; WCNKKGLLATpSGNDGTIRVWN\tNKKGLLAtSGNDGTI | KKGLLATsGNDGTIR\tHERC1\tpT3445, pS3446\tQ15751\tHERC1_HUMAN Probable E3 ubiquitin-protein ligase HERC1 OS=Homo sapiens OX=9606 GN=HERC1 PE=1 SV=2\tN/A\tN/A\t7843600\t0\t241700000\t0\t0\t10042600\n+KpSSLVTSK\tPTPQDLPQRKpSSLVTSKLAGG; PTPQDLPQRKpSSLVTSKLAG\tQDLPQRKsSLVTSKL\tENSA; ENSA; ENSA; ENSA; ENSA; ENSA; ENSA; ENSA\tpS108; pS108; pS124; pS131; pS104; pS104; pS120; pS124\tO43768; O43768-2; O43768-3; O43768-4; O43768-5; O43768-6; O43768-7; O43768-9\tENSA_HUMAN Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA PE=1 SV=1; ENSA_HUMAN Isoform 2 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 3 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 4 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 5 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 6 of Al'..b'ENPAEETGEEK\tMpSQKQEEENPAE\t______MsQKQEEEN\tENSA; ENSA; ENSA; ENSA; ENSA; ENSA\tpS2; pS2; pS2; pS2; pS2; pS2\tO43768; O43768-2; O43768-3; O43768-4; O43768-8; O43768-9\tENSA_HUMAN Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA PE=1 SV=1; ENSA_HUMAN Isoform 2 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 3 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 4 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 8 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 9 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA\tN/A\tN/A\t0\t0\t8765300\t0\t2355900\t14706000\n+pTYVDPFTpYEDPNQAVR\tEEKHLNQGVRpTYVDPFTYEDP; GVRTYVDPFTpYEDPNQAVREF\tHLNQGVRtYVDPFTY | TYVDPFTyEDPNQAV\tEPHA4; EPHA4\tpT595, pY602; pT544, pY551\tP54764; P54764-2\tEPHA4_HUMAN Ephrin type-A receptor 4 OS=Homo sapiens OX=9606 GN=EPHA4 PE=1 SV=1; EPHA4_HUMAN Isoform 2 of Ephrin type-A receptor 4 OS=Homo sapiens OX=9606 GN=EPHA4\tN/A\tEPHA4 | EphA1 | EphA2 | EphA3 | EphA5 | EphA7 | EphA6 | Abl | EphA8 | Fgr | Yes | BLK | HCK | EphB6 | EphB3\t725460\t0\t1651300\t655850\t646420\t0\n+QLSEpSFK\tSKSSSRQLSEpSFKSKEFVSSD\tSSRQLSEsFKSKEFV\tSSRP1\tpS659\tQ08945\tSSRP1_HUMAN FACT complex subunit SSRP1 OS=Homo sapiens OX=9606 GN=SSRP1 PE=1 SV=1\tN/A\tCK2a2 | CDK7 | PKCalpha | PKCbeta | DNAPK | NEK6\t68201000\t87774000\t138300000\t95357000\t19966000\t149110000\n+RGpSLEMSSDGEPLSR\tSSATSGGRRGpSLEMSSDGEPL\tTSGGRRGsLEMSSDG\tAEBP2; AEBP2\tpS206; pS206\tQ6ZN18; Q6ZN18-2\tAEBP2_HUMAN Zinc finger protein AEBP2 OS=Homo sapiens OX=9606 GN=AEBP2 PE=1 SV=2; AEBP2_HUMAN Isoform 2 of Zinc finger protein AEBP2 OS=Homo sapiens OX=9606 GN=AEBP2\tN/A\tGSK3\t19262000\t11103000\t19454000\t0\t1816900\t22028000\n+SDGpSLEDGDDVHR\tIEDGGARSDGpSLEDGDDVHRA\tGGARSDGsLEDGDDV\tSERINC1\tpS364\tQ9NRX5\tSERC1_HUMAN Serine incorporator 1 OS=Homo sapiens OX=9606 GN=SERINC1 PE=1 SV=1\tN/A\tPLK1 | PDHK1\t31407000\t17665000\t20892000\t23194000\t5132400\t54893000\n+SEpSLTAESR\tEGGGLMTRSEpSLTAESRLVHT\tGLMTRSEsLTAESRL\tHERC1\tpS1491\tQ15751\tHERC1_HUMAN Probable E3 ubiquitin-protein ligase HERC1 OS=Homo sapiens OX=9606 GN=HERC1 PE=1 SV=2\tN/A\tGRK-2\t11766000\t13176000\t20540000\t16963000\t4364700\t21308000\n+STGPTAATGpSNRR\tMSTGPTAATGpSNRRLQQTQNQ\tGPTAATGsNRRLQQT\tVAMP3\tpS11\tQ15836\tVAMP3_HUMAN_Vesicle-associated membrane protein 3 OS=Homo sapiens OX=9606 GN=VAMP3 PE=1 SV=3\tN/A\tPKCalpha | PKCbeta | PKCzeta\t3057100\t4718800\t12052000\t5047700\t1070900\t8333500\n+TEDLEATpSEHFK\tRNKTEDLEATpSEHFKTTSQKV\tTEDLEATsEHFKTTS\tVAMP8\tpS55\tQ9BV40\tVAMP8_HUMAN_Vesicle-associated membrane protein 8 OS=Homo sapiens OX=9606 GN=VAMP8 PE=1 SV=1\tactivity, inhibited; abolish function in SNARE complex during mast cell secretion, reduces in vitro ensemble vesicle fusion\tN/A\t20400000\t9738500\t7862300\t0\t0\t76518000\n+TFWpSPELK\tSSMNSIKTFWpSPELKKERVLR\tNSIKTFWsPELKKER\tERC2\tpS187\tO15083\tERC2_HUMAN ERC protein 2 OS=Homo sapiens OX=9606 GN=ERC2 PE=1 SV=3\tN/A\tIKKalpha | IKKbeta | HIPK2 | DOC_WW_Pin1_4\t29764000\t20957000\t24855000\t30752000\t8304800\t23771000\n+YFDpSGDYNMAK\tCADEMQKYFDpSGDYNMAKAKM; RLQKGQKYFDpSGDYNMAKAKM; MKSVEQKYFDpSGDYNMAKAKM\tEMQKYFDsGDYNMAK | KGQKYFDsGDYNMAK | VEQKYFDsGDYNMAK\tENSA; ENSA; ENSA; ENSA; ENSA; ENSA; ENSA; ENSA\tpS67; pS67; pS83; pS90; pS63; pS63; pS79; pS83\tO43768; O43768-2; O43768-3; O43768-4; O43768-5; O43768-6; O43768-7; O43768-9\tENSA_HUMAN Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA PE=1 SV=1; ENSA_HUMAN Isoform 2 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 3 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 4 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 5 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 6 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 7 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA; ENSA_HUMAN Isoform 9 of Alpha-endosulfine OS=Homo sapiens OX=9606 GN=ENSA\tmolecular association, regulation; cell cycle regulation; PPP2CA(INDUCES)\tGRK-2\t323250000\t127970000\t0\t67123000\t12790000\t71378000\n'
b
diff -r 000000000000 -r dbff53e6f75f test-data/test_input_for_preproc.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_input_for_preproc.tabular Mon Jul 11 19:22:25 2022 +0000
[
b'@@ -0,0 +1,39 @@\n+Proteins\tPositions within proteins\tLeading proteins\tProtein\tFasta headers\tLocalization prob\tScore diff\tPEP\tScore\tDelta score\tScore for localization\tLocalization prob shL.1A\tScore diff shL.1A\tPEP shL.1A\tScore shL.1A\tLocalization prob shL.1B\tScore diff shL.1B\tPEP shL.1B\tScore shL.1B\tLocalization prob shL.1C\tScore diff shL.1C\tPEP shL.1C\tScore shL.1C\tLocalization prob shR.2A\tScore diff shR.2A\tPEP shR.2A\tScore shR.2A\tLocalization prob shR.2B\tScore diff shR.2B\tPEP shR.2B\tScore shR.2B\tLocalization prob shR.2C\tScore diff shR.2C\tPEP shR.2C\tScore shR.2C\tDiagnostic peak\tNumber of Phospho (STY)\tAmino acid\tSequence window\tModification window\tPeptide window coverage\tPhospho (STY) Probabilities\tPhospho (STY) Score diffs\tPosition in peptide\tCharge\tMass error [ppm]\tIdentification type shL.1A\tIdentification type shL.1B\tIdentification type shL.1C\tIdentification type shR.2A\tIdentification type shR.2B\tIdentification type shR.2C\tIntensity\tIntensity___1\tIntensity___2\tIntensity___3\tRatio mod/base\tIntensity shL.1A\tIntensity shL.1B\tIntensity shL.1C\tIntensity shR.2A\tIntensity shR.2B\tIntensity shR.2C\tRatio mod/base shL.1A\tRatio mod/base shL.1B\tRatio mod/base shL.1C\tRatio mod/base shR.2A\tRatio mod/base shR.2B\tRatio mod/base shR.2C\tIntensity shL.1A___1\tIntensity shL.1A___2\tIntensity shL.1A___3\tIntensity shL.1B___1\tIntensity shL.1B___2\tIntensity shL.1B___3\tIntensity shL.1C___1\tIntensity shL.1C___2\tIntensity shL.1C___3\tIntensity shR.2A___1\tIntensity shR.2A___2\tIntensity shR.2A___3\tIntensity shR.2B___1\tIntensity shR.2B___2\tIntensity shR.2B___3\tIntensity shR.2C___1\tIntensity shR.2C___2\tIntensity shR.2C___3\tOccupancy shL.1A\tOccupancy ratioshL.1A\tOccupancy error scale shL.1A\tOccupancy shL.1B\tOccupancy ratioshL.1B\tOccupancy error scale shL.1B\tOccupancy shL.1C\tOccupancy ratioshL.1C\tOccupancy error scale shL.1C\tOccupancy shR.2A\tOccupancy ratioshR.2A\tOccupancy error scale shR.2A\tOccupancy shR.2B\tOccupancy ratioshR.2B\tOccupancy error scale shR.2B\tOccupancy shR.2C\tOccupancy ratioshR.2C\tOccupancy error scale shR.2C\tReverse\tPotential contaminant\tid\tProtein group IDs\tPositions\tPosition\tPeptide IDs\tMod. peptide IDs\tEvidence IDs\tMS/MS IDs\tBest localization evidence ID\tBest localization MS/MS ID\tBest localization raw file\tBest localization scan number\tBest score evidence ID\tBest score MS/MS ID\tBest score raw file\tBest score scan number\tBest PEP evidence ID\tBest PEP MS/MS ID\tBest PEP raw file\tBest PEP scan number\n+sp|O43768-2|ENSA_HUMAN;sp|O43768|ENSA_HUMAN;sp|O43768-9|ENSA_HUMAN;sp|O43768-3|ENSA_HUMAN;sp|O43768-4|ENSA_HUMAN;sp|O43768-6|ENSA_HUMAN;sp|O43768-5|ENSA_HUMAN;sp|O43768-7|ENSA_HUMAN\t108;108;124;124;131;104;104;120\tsp|O43768-2|ENSA_HUMAN\tsp|O43768-2|ENSA_HUMAN\t\t0.877317\t8.54376\t0.001041\t110.11\t55.028\t110.11\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t1\tS\tTGDHIPTPQDLPQRKSSLVTSKLAG______\tX;X;X;X;X;X;X;X;X;X;X;X;X;X;X;Phospho (STY);X;X;X;X;X;X;X;X;X;X;X;X;X;X;X\tXXXXXXXXXXXXXXPPPPPPPPXXXXXXXXX\tKS(0.877)S(0.123)LVTSK\tKS(8.54)S(-8.54)LVT(-58.58)S(-72.01)K\t2\t2\t0.022801\t\t\tBy MS/MS\t\t\t\t18629000\t18629000\t0\t0\t\t0\t0\t18629000\t0\t0\t0\t\t\t\t\t\t\t0\t0\t0\t0\t0\t0\t18629000\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t700\t529\t108\t108\t12310;20039\t13742;22688\t99166\t91729\t99166\t91729\tQE05099\t5593\t99166\t91729\tQE05099\t5593\t99166\t91729\tQE05099\t5593\n+sp|O43768-2|ENSA_HUMAN;sp|O43768|ENSA_HUMAN;sp|O43768-9|ENSA_HUMAN;sp|O43768-3|ENSA_HUMAN;sp|O43768-4|ENSA_HUMAN;sp|O43768-6|ENSA_HUMAN;sp|O43768-5|ENSA_HUMAN;sp|O43768-7|ENSA_HUMAN\t109;109;125;125;132;105;105;121\tsp|O43768-2|ENSA_HUMAN\tsp|O43768-2|ENSA_HUMAN\t\t0.877764\t9.23011\t0.00135208\t98.182\t25.939\t55.754\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t1\tS\tGDHIPTPQDLPQRKSSLVTSKLAG_______\tX;X;X;X;X;X;X;X;X;X;X;X;X;X;X;Phospho (STY);X;X;X;X;X;X;X;X;X;X;X;X;X;X;X\tXXXXXXXXXXXXXPPPPPPPPXXXXXXXXXX\tKS(0.105)S(0.878)LVT(0.015)S(0.002)K\tKS(-9.23)S(9.23)LVT(-17.65)S(-25.69)K\t3\t2\t-0.061619\tBy MS/MS\tBy MS/MS\tBy matching\tBy matching\tBy matching\tBy MS/MS\t81973000\t81973000\t0\t0\t\t7090300\t8341200\t9691500\t10030000\t1675200\t9952100\t\t\t\t\t\t\t7090300\t0\t0\t8341200\t0\t0\t9691500\t0\t0\t10030000\t0\t0\t1675200\t0\t0\t99'..b'\tADALQAGAS(-49.99)QFET(-10.66)S(10.66)AAK\t14\t2\t0.23449\tBy MS/MS\tBy MS/MS\tBy MS/MS\tBy MS/MS\tBy matching\tBy MS/MS\t265240000\t265240000\t0\t0\t0.036151\t44627000\t41445000\t69094000\t42521000\t5738000\t61819000\t0.03226\t0.028442\t0.039791\t0.036967\t0.030963\t0.043392\t44627000\t0\t0\t41445000\t0\t0\t69094000\t0\t0\t42521000\t0\t0\t5738000\t0\t0\t61819000\t0\t0\t0.47624\t0.90925\t12.188\t0.51677\t1.0694\t7.2217\tNaN\tNaN\tNaN\t0.81588\t4.4311\t19.209\tNaN\tNaN\tNaN\t0.4388\t0.78189\t5.9861\t\t\t4442\t2836\t63\t63\t279\t319\t2297;2298;2299;2300;2301;2302\t1992;1993;1994;1995;1996\t2300\t1995\tQE05100\t30086\t2301\t1996\tQE05102\t30007\t2301\t1996\tQE05102\t30007\n+sp|Q15836|VAMP3_HUMAN;sp|P63027|VAMP2_HUMAN;sp|P23763-2|VAMP1_HUMAN;sp|P23763-3|VAMP1_HUMAN;sp|P23763|VAMP1_HUMAN\t44;61;63;63;63\tsp|Q15836|VAMP3_HUMAN\tsp|Q15836|VAMP3_HUMAN\t\t1\t65.4951\t2.36E-06\t126.19\t98.602\t65.495\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t1\tS\tMRVNVDKVLERDQKLSELDDRADALQAGASQ\tX;X;X;X;X;X;X;X;X;X;X;X;X;X;X;Phospho (STY);X;X;X;X;X;X;X;X;X;X;X;X;X;X;X\tXXXXXXXXXXXPPPPPPPPPPXXXXXXXXXX\tDQKLS(1)ELDDR\tDQKLS(65.5)ELDDR\t5\t3\t-0.72518\tBy MS/MS\tBy MS/MS\tBy MS/MS\tBy MS/MS\tBy matching\tBy MS/MS\t412950000\t412950000\t0\t0\tNaN\t75542000\t44814000\t32924000\t35016000\t11023000\t4669900\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\t75542000\t0\t0\t44814000\t0\t0\t32924000\t0\t0\t35016000\t0\t0\t11023000\t0\t0\t4669900\t0\t0\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\t\t\t4443\t2836\t44\t44\t4530\t5083\t37093;37094;37095;37096;37097;37098;37099;37100;37101;37102;37103;37104\t34712;34713;34714;34715;34716;34717;34718;34719\t37100\t34719\tQE05102\t18436\t37093\t34712\tQE05097\t18245\t37093\t34712\tQE05097\t18245\n+sp|Q15836|VAMP3_HUMAN\t11\tsp|Q15836|VAMP3_HUMAN\tsp|Q15836|VAMP3_HUMAN\t\t0.97018\t15.1316\t0.000117365\t79.652\t72.041\t79.652\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t1\tS\t_____MSTGPTAATGSNRRLQQTQNQVDEVV\tX;X;X;X;X;X;X;X;X;X;X;X;X;X;X;Phospho (STY);X;X;X;X;X;X;X;X;X;X;X;X;X;X;X\tXXXXXXPPPPPPPPPPPPPXXXXXXXXXXXX\tSTGPTAAT(0.03)GS(0.97)NRR\tS(-66.94)T(-63.48)GPT(-42.47)AAT(-15.13)GS(15.13)NRR\t10\t2\t-0.15791\tBy matching\tBy matching\tBy MS/MS\tBy matching\tBy matching\tBy MS/MS\t34280000\t34280000\t0\t0\tNaN\t3057100\t4718800\t12052000\t5047700\t1070900\t8333500\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\t3057100\t0\t0\t4718800\t0\t0\t12052000\t0\t0\t5047700\t0\t0\t1070900\t0\t0\t8333500\t0\t0\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\t\t\t4444\t2836\t11\t11\t20280\t22978\t162490;162491;162492;162493;162494;162495\t144222;144223\t162490\t144222\tQE05099\t7582\t162490\t144222\tQE05099\t7582\t162490\t144222\tQE05099\t7582\n+sp|Q9BV40|VAMP8_HUMAN\t55\tsp|Q9BV40|VAMP8_HUMAN\tsp|Q9BV40|VAMP8_HUMAN\t\t0.959784\t13.7778\t3.78E-05\t91.969\t27.98\t91.969\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t1\tS\tNLEHLRNKTEDLEATSEHFKTTSQKVARKFW\tX;X;X;X;X;X;X;X;X;X;X;X;X;X;X;Phospho (STY);X;X;X;X;X;X;X;X;X;X;X;X;X;X;X\tXXXXXXXXPPPPPPPPPPPPXXXXXXXXXXX\tTEDLEAT(0.04)S(0.96)EHFK\tT(-83.18)EDLEAT(-13.78)S(13.78)EHFK\t8\t2\t0.40785\tBy matching\tBy matching\tBy matching\t\t\tBy MS/MS\t114520000\t114520000\t0\t0\tNaN\t20400000\t9738500\t7862300\t0\t0\t76518000\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\t20400000\t0\t0\t9738500\t0\t0\t7862300\t0\t0\t0\t0\t0\t0\t0\t0\t76518000\t0\t0\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\t\t\t7902\t4687\t55\t55\t21013\t23827\t168874;168875;168876;168877\t150433\t168874\t150433\tQE05102\t19524\t168874\t150433\tQE05102\t19524\t168874\t150433\tQE05102\t19524\n+sp|P54764-2|EPHA4_HUMAN;sp|P54764|EPHA4_HUMAN\t551;602\tsp|P54764-2|EPHA4_HUMAN\tsp|P54764-2|EPHA4_HUMAN\t\t0.871707\t6.48916\t4.61E-08\t65.374\t58.758\t65.374\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t+\t2\tY\tKHLNQGVRTYVDPFTYEDPNQAVREFAKEID\tX;X;X;X;X;X;X;X;Phospho (STY);X;X;X;X;X;X;Phospho (STY);X;X;X;X;X;X;X;X;X;X;X;X;X;X;X\tXXXXXXXXPPPPPPPPPPPPPPPPXXXXXXX\tT(0.499)Y(0.501)VDPFT(0.128)Y(0.872)EDPNQAVR\tT(0.85)Y(-0.85)VDPFT(-6.49)Y(6.49)EDPNQAVR\t8\t3\t0.97415\tBy matching\t\tBy MS/MS\tBy matching\tBy matching\t\t3679100\t0\t3679100\t0\tNaN\t725460\t0\t1651300\t655850\t646420\t0\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\t0\t725460\t0\t0\t0\t0\t0\t1651300\t0\t0\t655850\t0\t0\t646420\t0\t0\t0\t0\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\tNaN\t\t\t242\t260\t551\t551\t972\t999\t4968;4969;4970;4971\t3421\t4968\t3421\tQE04980\t9557\t4968\t3421\tQE04980\t9557\t4968\t3421\tQE04980\t9557\n'
b
diff -r 000000000000 -r dbff53e6f75f test-data/test_kinase_substrate.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_kinase_substrate.tabular Mon Jul 11 19:22:25 2022 +0000
b
@@ -0,0 +1,5 @@
+GENE KINASE KIN_ACC_ID KIN_ORGANISM SUBSTRATE SUB_GENE_ID SUB_ACC_ID SUB_GENE SUB_ORGANISM SUB_MOD_RSD SITE_GRP_ID SITE_+/-7_AA DOMAIN IN_VIVO_RXN IN_VITRO_RXN CST_CAT#
+Csnk2a1 CK2A1 Q60737 human VAMP4 53330 O70480 Vamp4 human S30 454285 RNLLEDDsDEEEDFF   X
+EPHA2 EphA2 P29317 human EphA2 1969 P29317 EPHA2 human Y588 450859 QLkPLktyVDPHtyE EphA2_TM X X 7423; 12677
+EPHA4 EphA4 P54764 human EphA4 2043 P54764 EPHA4 human Y596 450856 LNQGVRtyVDPFtyE EphA2_TM X
+EPHA4 EphA4 P54764 human EphA4 2043 P54764 EPHA4 human Y602 450857 tyVDPFtyEDPNQAV EphA2_TM X
b
diff -r 000000000000 -r dbff53e6f75f test-data/test_networkin.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_networkin.tabular Mon Jul 11 19:22:25 2022 +0000
b
b'@@ -0,0 +1,101 @@\n+#substrate\tposition\tid\tnetworkin_score\ttree\tnetphorest_group\tnetphorest_score\tstring_identifier\tstring_score\tsubstrate_name\tsequence\tstring_path\n+VAMP4 (ENSP00000236192)\t30\tCK2alpha\t35.6396\tKIN\tCK2_group\t0.5228\tENSP00000236192\t0.85\tVAMP4\tLLEDDsDEEED\t"ENSP00000217244, 0.68 ENSP00000236192"\n+SSRP1 (ENSP00000278412)\t444\tCK2alpha\t28.6345\tKIN\tCK2_group\t0.3768\tENSP00000278412\t0.874\tSSRP1\tDEYADsDEDQH\t"ENSP00000217244, 0.6992 ENSP00000278412"\n+SSRP1 (ENSP00000278412)\t667\tCK2alpha\t22.2088\tKIN\tCK2_group\t0.3168\tENSP00000278412\t0.874\tSSRP1\tSKEFVsSDESS\t"ENSP00000217244, 0.6992 ENSP00000278412"\n+HERC2 (ENSP00000261609)\t1577\tCK2alpha\t10.7686\tKIN\tCK2_group\t0.5253\tENSP00000261609\t0.4514\tHERC2\tIGNEEsDLEEA\t"ENSP00000217244, 0.764 ENSP00000346659, 0.76 ENSP00000261609"\n+HERC2 (ENSP00000261609)\t2928\tCK2alpha\t10.7686\tKIN\tCK2_group\t0.4698\tENSP00000261609\t0.4514\tHERC2\tVPFLAsDNEEE\t"ENSP00000217244, 0.764 ENSP00000346659, 0.76 ENSP00000261609"\n+RRP15 (ENSP00000355899)\t11\tCK2alpha\t8.5484\tKIN\tCK2_group\t0.3566\tENSP00000355899\t0.461\tRRP15\tPDSRVsEEENL\t"ENSP00000217244, 0.3688 ENSP00000355899"\n+SSRP1 (ENSP00000278412)\t444\tCK2a2\t7.8435\tKIN\tCK2_group\t0.3768\tENSP00000278412\t0.615\tSSRP1\tDEYADsDEDQH\t"ENSP00000262506, 0.492 ENSP00000278412"\n+SSRP1 (ENSP00000278412)\t667\tCK2a2\t7.7757\tKIN\tCK2_group\t0.3168\tENSP00000278412\t0.615\tSSRP1\tSKEFVsSDESS\t"ENSP00000262506, 0.492 ENSP00000278412"\n+VAMP2 (ENSP00000314214)\t80\tPKD3\t6.9217\tKIN\tPKD_group\t0.0744\tENSP00000314214\t0.949\tVAMP2\tSQFETsAAKLK\t"ENSP00000234179, 0.7592 ENSP00000314214"\n+VAMP2 (ENSP00000314214)\t61\tCK2alpha\t6.3122\tKIN\tCK2_group\t0.3338\tENSP00000314214\t0.4391\tVAMP2\tRDQKLsELDDR\t"ENSP00000217244, 0.7992 ENSP00000222812, 0.7544 ENSP00000314214"\n+VAMP1 (ENSP00000380148)\t63\tCK2alpha\t6.1363\tKIN\tCK2_group\t0.3338\tENSP00000380148\t0.4364\tVAMP1\tRDQKLsELDDR\t"ENSP00000217244, 0.7944 ENSP00000222812, 0.7544 ENSP00000380148"\n+ERC1 (ENSP00000354158)\t191\tIKKalpha\t5.3194\tKIN\tIKKalpha_IKKbeta_group\t0.031\tENSP00000354158\t0.96\tERC1\tIKTFWsPELKK\t"ENSP00000359424, 0.768 ENSP00000354158"\n+ERC1 (ENSP00000354158)\t191\tIKKalpha\t5.3194\tKIN\tIKKalpha_IKKbeta_group\t0.031\tENSP00000354158\t0.96\tERC1\tIKTFWsPELKK\t"ENSP00000359424, 0.768 ENSP00000354158"\n+VAMP2 (ENSP00000314214)\t61\tPKAbeta\t4.9293\tKIN\tPKA_group\t0.1153\tENSP00000314214\t0.8\tVAMP2\tRDQKLsELDDR\t"ENSP00000359719, 0.64 ENSP00000314214"\n+VAMP2 (ENSP00000314214)\t61\tPKAgamma\t4.9293\tKIN\tPKA_group\t0.1153\tENSP00000314214\t0.8\tVAMP2\tRDQKLsELDDR\t"ENSP00000366488, 0.64 ENSP00000314214"\n+VAMP3 (ENSP00000054666)\t44\tCK2alpha\t4.2842\tKIN\tCK2_group\t0.3338\tENSP00000054666\t0.4201\tVAMP3\tRDQKLsELDDR\t"ENSP00000217244, 0.7992 ENSP00000317714, 0.6792 ENSP00000054666"\n+VAMP2 (ENSP00000314214)\t80\tPKCiota\t3.8971\tKIN\tPKC_group\t0.0928\tENSP00000314214\t0.899\tVAMP2\tSQFETsAAKLK\t"ENSP00000295797, 0.7192 ENSP00000314214"\n+SSRP1 (ENSP00000278412)\t444\tCDK7\t3.6159\tKIN\tCDK7\t0.0186\tENSP00000278412\t0.903\tSSRP1\tDEYADsDEDQH\t"ENSP00000256443, 0.7224 ENSP00000278412"\n+SSRP1 (ENSP00000278412)\t444\tCK1alpha\t3.3573\tKIN\tCK1_group\t0.1264\tENSP00000278412\t0.404\tSSRP1\tDEYADsDEDQH\t"ENSP00000261798, 0.3232 ENSP00000278412"\n+VAMP3 (ENSP00000054666)\t11\tPKCalpha\t3.0633\tKIN\tPKC_group\t0.4633\tENSP00000054666\t0.3277\tVAMP3\tTAATGsNRRLQ\t"ENSP00000284384, 0.6232 ENSP00000359025, 0.6352 ENSP00000054666"\n+SSRP1 (ENSP00000278412)\t659\tPKCalpha\t3.0524\tKIN\tPKC_group\t0.4345\tENSP00000278412\t0.237\tSSRP1\tRQLSEsFKSKE\t"ENSP00000284384, 0.4552 ENSP00000351885, 0.76 ENSP00000278412"\n+VAMP2 (ENSP00000314214)\t61\tPKCiota\t2.7785\tKIN\tPKC_group\t0.0463\tENSP00000314214\t0.899\tVAMP2\tRDQKLsELDDR\t"ENSP00000295797, 0.7192 ENSP00000314214"\n+SSRP1 (ENSP00000278412)\t659\tCDK7\t2.5961\tKIN\tCDK7\t0.0104\tENSP00000278412\t0.903\tSSRP1\tRQLSEsFKSKE\t"ENSP00000256443, 0.7224 ENSP00000278412"\n+SSRP1 (ENSP00000278412)\t667\tCDK7\t2.5961\tKIN\tCDK7\t0.0124\tENSP00000278412\t0.903\tSSRP1\tSKEFVsSDESS\t"ENSP00000256443, 0.7224 ENSP00000278412"\n+ERC1 (ENSP00000354158)\t191\tIKKbeta\t2.571\tKIN\tIKKalpha_IKKbeta_group\t0.031\tENSP00000354158\t0.946\tERC1\tIKTFWsPELKK\t"ENSP00000339151, 0.7568 ENSP00000354158"\n+E'..b'86829)\t928\tEphA7\t2.7878\tKIN\tEph_group\t0.0482\tENSP00000281821\t0.904\tEPHA4\tIKMDRyKDNFT\t"ENSP00000358309, 0.7232 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t779\tEphA6\t2.7874\tKIN\tEph_group\t0.0482\tENSP00000281821\t0.903\tEPHA4\tDPEAAyTTRGG\t"ENSP00000374323, 0.7224 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t798\tEphA6\t2.7874\tKIN\tEph_group\t0.0482\tENSP00000281821\t0.903\tEPHA4\tPEAIAyRKFTS\t"ENSP00000374323, 0.7224 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t928\tEphA6\t2.7874\tKIN\tEph_group\t0.0482\tENSP00000281821\t0.903\tEPHA4\tIKMDRyKDNFT\t"ENSP00000374323, 0.7224 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t596\tFgr\t2.7541\tKIN\tSrc_group\t0.036\tENSP00000281821\t0.902\tEPHA4\tQGVRTyVDPFT\t"ENSP00000363115, 0.7216 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t596\tYes\t2.7541\tKIN\tSrc_group\t0.036\tENSP00000281821\t0.902\tEPHA4\tQGVRTyVDPFT\t"ENSP00000324740, 0.7216 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t596\tBLK\t2.7532\tKIN\tSrc_group\t0.036\tENSP00000281821\t0.9\tEPHA4\tQGVRTyVDPFT\t"ENSP00000259089, 0.72 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t798\tFgr\t2.7477\tKIN\tSrc_group\t0.0263\tENSP00000281821\t0.902\tEPHA4\tPEAIAyRKFTS\t"ENSP00000363115, 0.7216 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t798\tYes\t2.7477\tKIN\tSrc_group\t0.0263\tENSP00000281821\t0.902\tEPHA4\tPEAIAyRKFTS\t"ENSP00000324740, 0.7216 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t928\tFgr\t2.7472\tKIN\tSrc_group\t0.0257\tENSP00000281821\t0.902\tEPHA4\tIKMDRyKDNFT\t"ENSP00000363115, 0.7216 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t928\tYes\t2.7472\tKIN\tSrc_group\t0.0257\tENSP00000281821\t0.902\tEPHA4\tIKMDRyKDNFT\t"ENSP00000324740, 0.7216 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t798\tBLK\t2.7468\tKIN\tSrc_group\t0.0263\tENSP00000281821\t0.9\tEPHA4\tPEAIAyRKFTS\t"ENSP00000259089, 0.72 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t928\tBLK\t2.7463\tKIN\tSrc_group\t0.0257\tENSP00000281821\t0.9\tEPHA4\tIKMDRyKDNFT\t"ENSP00000259089, 0.72 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t596\tHCK\t2.7098\tKIN\tSrc_group\t0.036\tENSP00000281821\t0.899\tEPHA4\tQGVRTyVDPFT\t"ENSP00000365012, 0.7192 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t602\tHCK\t2.7098\tKIN\tSrc_group\t0.0705\tENSP00000281821\t0.899\tEPHA4\tVDPFTyEDPNQ\t"ENSP00000365012, 0.7192 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t779\tHCK\t2.7098\tKIN\tSrc_group\t0.0583\tENSP00000281821\t0.899\tEPHA4\tDPEAAyTTRGG\t"ENSP00000365012, 0.7192 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t798\tHCK\t2.7098\tKIN\tSrc_group\t0.0263\tENSP00000281821\t0.899\tEPHA4\tPEAIAyRKFTS\t"ENSP00000365012, 0.7192 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t928\tHCK\t2.7098\tKIN\tSrc_group\t0.0257\tENSP00000281821\t0.899\tEPHA4\tIKMDRyKDNFT\t"ENSP00000365012, 0.7192 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t780\tPKCalpha\t2.5567\tKIN\tPKC_group\t0.3699\tENSP00000281821\t0.401\tEPHA4\tPEAAYtTRGGK\t"ENSP00000284384, 0.7464 ENSP00000244007, 0.7784 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t780\tPKCbeta\t2.4948\tKIN\tPKC_group\t0.3699\tENSP00000281821\t0.3759\tEPHA4\tPEAAYtTRGGK\t"ENSP00000305355, 0.7464 ENSP00000244007, 0.7296 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t602\tAbl\t2.1653\tKIN\tAbl_group\t0.0221\tENSP00000281821\t0.806\tEPHA4\tVDPFTyEDPNQ\t"ENSP00000361423, 0.6448 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t798\tAbl\t2.1376\tKIN\tAbl_group\t0.0221\tENSP00000281821\t0.806\tEPHA4\tPEAIAyRKFTS\t"ENSP00000361423, 0.6448 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t928\tAbl\t2.1099\tKIN\tAbl_group\t0.0221\tENSP00000281821\t0.806\tEPHA4\tIKMDRyKDNFT\t"ENSP00000361423, 0.6448 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t602\tEphB6\t2.04\tKIN\tEph_group\t0.1443\tENSP00000281821\t0.5258\tEPHA4\tVDPFTyEDPNQ\t"ENSP00000376684, 0.7976 ENSP00000226091, 0.7976 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t596\tEphB6\t2.0393\tKIN\tEph_group\t0.1442\tENSP00000281821\t0.5258\tEPHA4\tQGVRTyVDPFT\t"ENSP00000376684, 0.7976 ENSP00000226091, 0.7976 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t602\tEphB3\t2.0282\tKIN\tEph_group\t0.1443\tENSP00000281821\t0.5231\tEPHA4\tVDPFTyEDPNQ\t"ENSP00000332118, 0.7976 ENSP00000226091, 0.7936 ENSP00000281821"\n+EPHA4 (ENSP00000386829)\t596\tEphB3\t2.0276\tKIN\tEph_group\t0.1442\tENSP00000281821\t0.5231\tEPHA4\tQGVRTyVDPFT\t"ENSP00000332118, 0.7976 ENSP00000226091, 0.7936 ENSP00000281821"\n'
b
diff -r 000000000000 -r dbff53e6f75f test-data/test_regulatory_sites.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_regulatory_sites.tabular Mon Jul 11 19:22:25 2022 +0000
b
@@ -0,0 +1,9 @@
+32017
+"PhosphoSitePlus(R) (PSP) was created by Cell Signaling Technology Inc. It is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License. When using PSP data or analyses in printed publications or in online resources, the following acknowledgements must be included: (a) the words ""PhosphoSitePlus(R), www.phosphosite.org"" must be included at appropriate places in the text or webpage, and (b) the following citation must be included in the bibliography: ""Hornbeck PV, Zhang B, Murray B, Kornhauser JM, Latham V, Skrzypek E PhosphoSitePlus, 2014: mutations, PTMs and recalibrations. Nucleic Acids Res. 2015 43:D512-20. PMID: 25514926."""
+
+GENE PROTEIN PROT_TYPE ACC_ID GENE_ID HU_CHR_LOC ORGANISM MOD_RSD SITE_GRP_ID SITE_+/-7_AA DOMAIN ON_FUNCTION ON_PROCESS ON_PROT_INTERACT ON_OTHER_INTERACT PMIDs LT_LIT MS_LIT MS_CST NOTES
+ENSA ENSA "Inhibitor; Protein phosphatase, regulatory subunit" O43768 2029 1q21.3 human S109-p 477819 DLPQRKSsLVTSKLA Endosulfine "molecular association, regulation; protein conformation" SNCA(DISRUPTS) 18973346 1 34 50
+VAMP8 VAMP8 "Membrane protein, integral; Vesicle" Q9BV40 8673 2p11.2 human S55-p 12738929 TEDLEATsEHFKTTS Synaptobrevin "activity, inhibited" 27402227 1 8 0 "abolish function in SNARE complex during mast cell secretion, reduces in vitro ensemble vesicle fusion"
+ENSA ENSA "Inhibitor; Protein phosphatase, regulatory subunit" O43768 2029 1q21.3 human S67-p 455934 KGQKYFDsGDYNMAK Endosulfine "molecular association, regulation" cell cycle regulation PPP2CA(INDUCES) 27889260 3 56 47
+Vamp4 VAMP4 "Membrane protein, integral; Vesicle" O70480 53330 1 H2.1|1 70.29 cM mouse S30-p 454285 RNLLEDDsDEEEDFF "molecular association, regulation; intracellular localization" PACS-1(INDUCES) 14608369 1 64 10
+EPHA4 EphA4 "EC 2.7.10.1; KINASE; Kinase, protein; Membrane protein, integral; Protein kinase, TK; Protein kinase, tyrosine (receptor)" P54764 2043 2q36.1 human Y602-p 450857 TYVDPFTyEDPNQAV EphA2_TM "molecular association, regulation" Fyn(INDUCES) 8622893 6 16 155
b
diff -r 000000000000 -r dbff53e6f75f test-data/test_swissprot.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_swissprot.fasta Mon Jul 11 19:22:25 2022 +0000
b
b'@@ -0,0 +1,72 @@\n+>sp|Q9Y3B9|RRP15_HUMAN RRP15-like protein OS=Homo sapiens OX=9606 GN=RRP15 PE=1 SV=2\n+MAAAAPDSRVSEEENLKKTPKKKMKMVTGAVASVLEDEATDTSDSEGSCGSEKDHFYSDDDAIEADSEGDAEPCDKENENDGESSVGTNMGWADAMAKVLNKKTPESKPTILVKNKKLEKEKEKLKQERLEKIKQRDKRLEWEMMCRVKPDVVQDKETERNLQRIATRGVVQLFNAVQKHQKNVDEKVKEAGSSMRKRAKLISTVSKKDFISVLRGMDGSTNETASSRKKPKAKQTEVKSEEGPGWTILRDDFMMGASMKDWDKESDGPDDSRPESASDSDT\n+>sp|Q08945|SSRP1_HUMAN FACT complex subunit SSRP1 OS=Homo sapiens OX=9606 GN=SSRP1 PE=1 SV=1\n+MAETLEFNDVYQEVKGSMNDGRLRLSRQGIIFKNSKTGKVDNIQAGELTEGIWRRVALGHGLKLLTKNGHVYKYDGFRESEFEKLSDFFKTHYRLELMEKDLCVKGWNWGTVKFGGQLLSFDIGDQPVFEIPLSNVSQCTTGKNEVTLEFHQNDDAEVSLMEVRFYVPPTQEDGVDPVEAFAQNVLSKADVIQATGDAICIFRELQCLTPRGRYDIRIYPTFLHLHGKTFDYKIPYTTVLRLFLLPHKDQRQMFFVISLDPPIKQGQTRYHFLILLFSKDEDISLTLNMNEEEVEKRFEGRLTKNMSGSLYEMVSRVMKALVNRKITVPGNFQGHSGAQCITCSYKASSGLLYPLERGFIYVHKPPVHIRFDEISFVNFARGTTTTRSFDFEIETKQGTQYTFSSIEREEYGKLFDFVNAKKLNIKNRGLKEGMNPSYDEYADSDEDQHDAYLERMKEEGKIREENANDSSDDSGEETDESFNPGEEEEDVAEEFDSNASASSSSNEGDSDRDEKKRKQLKKAKMAKDRKSRKKPVEVKKGKDPNAPKRPMSAYMLWLNASREKIKSDHPGISITDLSKKAGEIWKGMSKEKKEEWDRKAEDARRDYEKAMKEYEGGRGESSKRDKSKKKKKVKVKMEKKSTPSRGSSSKSSSRQLSESFKSKEFVSSDESSSGENKSKKKRRRSEDSEEEELASTPPSSEDSASGSDE\n+>sp|Q96SA4|SERC2_HUMAN Serine incorporator 2 OS=Homo sapiens OX=9606 GN=SERINC2 PE=2 SV=3\n+MGACLGACSLLSCASCLCGSAPCILCSCCPASRNSTVSRLIFTFFLFLGVLVSIIMLSPGVESQLYKLPWVCEEGAGIPTVLQGHIDCGSLLGYRAVYRMCFATAAFFFFFTLLMLCVSSSRDPRAAIQNGFWFFKFLILVGLTVGAFYIPDGSFTNIWFYFGVVGSFLFILIQLVLLIDFAHSWNQRWLGKAEECDSRAWYAGLFFFTLLFYLLSIAAVALMFMYYTEPSGCHEGKVFISLNLTFCVCVSIAAVLPKVQDAQPNSGLLQASVITLYTMFVTWSALSSIPEQKCNPHLPTQLGNETVVAGPEGYETQWWDAPSIVGLIIFLLCTLFISLRSSDHRQVNSLMQTEECPPMLDATQQQQQVAACEGRAFDNEQDGVTYSYSFFHFCLVLASLHVMMTLTNWYKPGETRKMISTWTAVWVKICASWAGLLLYLWTLVAPLLLRNRDFS\n+>sp|Q96SA4-2|SERC2_HUMAN Isoform 2 of Serine incorporator 2 OS=Homo sapiens OX=9606 GN=SERINC2\n+MGAEGAPDFLSCPRVRRASCLCGSAPCILCSCCPASRNSTVSRLIFTFFLFLGVLVSIIMLSPGVESQLYKLPWVCEEGAGIPTVLQGHIDCGSLLGYRAVYRMCFATAAFFFFFTLLMLCVSSSRDPRAAIQNGFWFFKFLILVGLTVGAFYIPDGSFTNIWFYFGVVGSFLFILIQLVLLIDFAHSWNQRWLGKAEECDSRAWYAGLFFFTLLFYLLSIAAVALMFMYYTEPSGCHEGKVFISLNLTFCVCVSIAAVLPKVQDAQPNSGLLQASVITLYTMFVTWSALSSIPEQKCNPHLPTQLGNETVVAGPEGYETQWWDAPSIVGLIIFLLCTLFISLRSSDHRQVNSLMQTEECPPMLDATQQQQQVAACEGRAFDNEQDGVTYSYSFFHFCLVLASLHVMMTLTNWYKPGETRKMISTWTAVWVKICASWAGLLLYLWTLVAPLLLRNRDFS\n+>sp|Q96SA4-3|SERC2_HUMAN Isoform 3 of Serine incorporator 2 OS=Homo sapiens OX=9606 GN=SERINC2\n+MRSMRLREEESPGPSHTASCLCGSAPCILCSCCPASRNSTVSRLIFTFFLFLGVLVSIIMLSPGVESQLYKLPWVCEEGAGIPTVLQGHIDCGSLLGYRAVYRMCFATAAFFFFFTLLMLCVSSSRDPRAAIQNGFWFFKFLILVGLTVGAFYIPDGSFTNIWFYFGVVGSFLFILIQLVLLIDFAHSWNQRWLGKAEECDSRAWYAGLFFFTLLFYLLSIAAVALMFMYYTEPSGCHEGKVFISLNLTFCVCVSIAAVLPKVQDAQPNSGLLQASVITLYTMFVTWSALSSIPEQKCNPHLPTQLGNETVVAGPEGYETQWWDAPSIVGLIIFLLCTLFISLRSSDHRQVNSLMQTEECPPMLDATQQQQQVAACEGRAFDNEQDGVTYSYSFFHFCLVLASLHVMMTLTNWYKPGETRKMISTWTAVWVKICASWAGLLLYLWTLVAPLLLRNRDFS\n+>sp|Q96SA4-4|SERC2_HUMAN Isoform 4 of Serine incorporator 2 OS=Homo sapiens OX=9606 GN=SERINC2\n+MDGRMMRSMRLREEESPGPSHTASCLCGSAPCILCSCCPASRNSTVSRLIFTFFLFLGVLVSIIMLSPGVESQLYKLPWVCEEGAGIPTVLQGHIDCGSLLGYRAVYRMCFATAAFFFFFTLLMLCVSSSRDPRAAIQNGFWFFKFLILVGLTVGAFYIPDGSFTNIWFYFGVVGSFLFILIQLVLLIDFAHSWNQRWLGKAEECDSRAWYAGLFFFTLLFYLLSIAAVALMFMYYTEPSGCHEGKVFISLNLTFCVCVSIAAVLPKVQDAQPNSGLLQASVITLYTMFVTWSALSSIPEQKCNPHLPTQLGNETVVAGPEGYETQWWDAPSIVGLIIFLLCTLFISLRSSDHRQVNSLMQTEECPPMLDATQQQQQVAACEGRAFDNEQDGVTYSYSFFHFCLVLASLHVMMTLTNWYKPGETRKMISTWTAVWVKICASWAGLLLYLWTLVAPLLLRNRDFS\n+>sp|Q9NRX5|SERC1_HUMAN Serine incorporator 1 OS=Homo sapiens OX=9606 GN=SERINC1 PE=1 SV=1\n+MGSVLGLCSMASWIPCLCGSAPCLLCRCCPSGNNSTVTRLIYALFLLVGVCVACVMLIPGMEEQLNKIPGFCENEKGVVPCNILVGYKAVYRLCFGLAMFYLLLSLLMIKVKSSSDPRAAVHNGFWFFKFAAAIAIIIGAFFIPEGTFTTVWFYVGMAGAFCFILIQLVLLIDFAHSWNESWVEKMEEGNSRCWYAALLSATALNYLLSLVAIVLFFVYYTHPASCSENKAFISVNMLLCVGASVMSILPKIQESQPRSGLLQSSVITVYTMYLTWSAMTNEPETNCNPSLLSIIGYNTTSTVPKEGQSVQWWHAQGIIGLILFLLCVFYSSIRTSNNSQVNKLTLTSDESTLIEDGGARSDGSLEDGDDVHRAVDNERDGVTYSYSFFHFMLFLASLYIMMTLTNWYRYEPSREMKSQWTAVWVKISSSWIGIVLYVWTLVAPLVLTNRDFD\n+>sp|O43768|ENSA_HUMAN Alpha-endosulf'..b'ociated membrane protein 4 OS=Homo sapiens OX=9606 GN=VAMP4 PE=1 SV=2\n+MPPKFKRHLNDDDVTGSVKSERRNLLEDDSDEEEDFFLRGPSGPRFGPRNDKIKHVQNQVDEVIDVMQENITKVIERGERLDELQDKSESLSDNATAFSNRSKQLRRQMWWRGCKIKAIMALVAAILLLVIIILIVMKYRT\n+>sp|O75379-2|VAMP4_HUMAN_Isoform 2 of Vesicle-associated membrane protein 4 OS=Homo sapiens OX=9606 GN=VAMP4\n+MPPKFKRHLNDDDVTGSVKSERRNLLEDDSDEEEDFFLGPSGPRFGPRNDKIKHVQNQVDEVIDVMQENITKVIERGERLDELQDKSESLSDNATAFSNRSKQLRRQMWWRGCKIKAIMALVAAILLLVIIILIVMKYRT\n+>sp|O95183|VAMP5_HUMAN_Vesicle-associated membrane protein 5 OS=Homo sapiens OX=9606 GN=VAMP5 PE=1 SV=1\n+MAGIELERCQQQANEVTEIMRNNFGKVLERGVKLAELQQRSDQLLDMSSTFNKTTQNLAQKKCWENIRYRICVGLVVVGVLLIILIVLLVVFLPQSSDSSSAPRTQDAGIASGPGN\n+>sp|P51809|VAMP7_HUMAN_Vesicle-associated membrane protein 7 OS=Homo sapiens OX=9606 GN=VAMP7 PE=1 SV=3\n+MAILFAVVARGTTILAKHAWCGGNFLEVTEQILAKIPSENNKLTYSHGNYLFHYICQDRIVYLCITDDDFERSRAFNFLNEIKKRFQTTYGSRAQTALPYAMNSEFSSVLAAQLKHHSENKGLDKVMETQAQVDELKGIMVRNIDLVAQRGERLELLIDKTENLVDSSVTFKTTSRNLARAMCMKNLKLTIIIIIVSIVFIYIIVSPLCGGFTWPSCVKK\n+>sp|P51809-2|VAMP7_HUMAN_Isoform 2 of Vesicle-associated membrane protein 7 OS=Homo sapiens OX=9606 GN=VAMP7\n+MAILFAVVARGTTILAKHAWCGGNFLEVTEQILAKIPSENNKLTYSHGNYLFHYICQDRIVYLCITDDDFERSRAFNFLNEIKKRFQTTYGSRAQTALPYAMNSEFSSVLAAQLKHHSENKGLDKVMETQAQVDELKGIMVRNIVCHLQNYQQKSCSSHVYEEPQAHYYHHHRINCVHLYHCFTSLWWIYMAKLCEEIGKKKLPLTKDMREQGVKSNPCDSSLSHTDRWYLPVSSTLFSLFKILFHASRFIFVLSTSLFL\n+>sp|P51809-3|VAMP7_HUMAN_Isoform 3 of Vesicle-associated membrane protein 7 OS=Homo sapiens OX=9606 GN=VAMP7\n+MAILFAVVARGTTILAKHAWCGGNFLEDFERSRAFNFLNEIKKRFQTTYGSRAQTALPYAMNSEFSSVLAAQLKHHSENKGLDKVMETQAQVDELKGIMVRNIDLVAQRGERLELLIDKTENLVDSSVTFKTTSRNLARAMCMKNLKLTIIIIIVSIVFIYIIVSPLCGGFTWPSCVKK\n+>sp|Q9BV40|VAMP8_HUMAN_Vesicle-associated membrane protein 8 OS=Homo sapiens OX=9606 GN=VAMP8 PE=1 SV=1\n+MEEASEGGGNDRVRNLQSEVEGVKNIMTQNVERILARGENLEHLRNKTEDLEATSEHFKTTSQKVARKFWWKNVKMIVLICVIVFIIILFIVLFATGAFS\n+>sp|P54764|EPHA4_HUMAN Ephrin type-A receptor 4 OS=Homo sapiens OX=9606 GN=EPHA4 PE=1 SV=1\n+MAGIFYFALFSCLFGICDAVTGSRVYPANEVTLLDSRSVQGELGWIASPLEGGWEEVSIMDEKNTPIRTYQVCNVMEPSQNNWLRTDWITREGAQRVYIEIKFTLRDCNSLPGVMGTCKETFNLYYYESDNDKERFIRENQFVKIDTIAADESFTQVDIGDRIMKLNTEIRDVGPLSKKGFYLAFQDVGACIALVSVRVFYKKCPLTVRNLAQFPDTITGADTSSLVEVRGSCVNNSEEKDVPKMYCGADGEWLVPIGNCLCNAGHEERSGECQACKIGYYKALSTDATCAKCPPHSYSVWEGATSCTCDRGFFRADNDAASMPCTRPPSAPLNLISNVNETSVNLEWSSPQNTGGRQDISYNVVCKKCGAGDPSKCRPCGSGVHYTPQQNGLKTTKVSITDLLAHTNYTFEIWAVNGVSKYNPNPDQSVSVTVTTNQAAPSSIALVQAKEVTRYSVALAWLEPDRPNGVILEYEVKYYEKDQNERSYRIVRTAARNTDIKGLNPLTSYVFHVRARTAAGYGDFSEPLEVTTNTVPSRIIGDGANSTVLLVSVSGSVVLVVILIAAFVISRRRSKYSKAKQEADEEKHLNQGVRTYVDPFTYEDPNQAVREFAKEIDASCIKIEKVIGVGEFGEVCSGRLKVPGKREICVAIKTLKAGYTDKQRRDFLSEASIMGQFDHPNIIHLEGVVTKCKPVMIITEYMENGSLDAFLRKNDGRFTVIQLVGMLRGIGSGMKYLSDMSYVHRDLAARNILVNSNLVCKVSDFGMSRVLEDDPEAAYTTRGGKIPIRWTAPEAIAYRKFTSASDVWSYGIVMWEVMSYGERPYWDMSNQDVIKAIEEGYRLPPPMDCPIALHQLMLDCWQKERSDRPKFGQIVNMLDKLIRNPNSLKRTGTESSRPNTALLDPSSPEFSAVVSVGDWLQAIKMDRYKDNFTAAGYTTLEAVVHVNQEDLARIGITAITHQNKILSSVQAMRTQMQQMHGRMVPV\n+>sp|P54764-2|EPHA4_HUMAN Isoform 2 of Ephrin type-A receptor 4 OS=Homo sapiens OX=9606 GN=EPHA4\n+MKWEEVSIMDEKNTPIRTYQVCNVMEPSQNNWLRTDWITREGAQRVYIEIKFTLRDCNSLPGVMGTCKETFNLYYYESDNDKERFIRENQFVKIDTIAADESFTQVDIGDRIMKLNTEIRDVGPLSKKGFYLAFQDVGACIALVSVRVFYKKCPLTVRNLAQFPDTITGADTSSLVEVRGSCVNNSEEKDVPKMYCGADGEWLVPIGNCLCNAGHEERSGECQACKIGYYKALSTDATCAKCPPHSYSVWEGATSCTCDRGFFRADNDAASMPCTRPPSAPLNLISNVNETSVNLEWSSPQNTGGRQDISYNVVCKKCGAGDPSKCRPCGSGVHYTPQQNGLKTTKVSITDLLAHTNYTFEIWAVNGVSKYNPNPDQSVSVTVTTNQAAPSSIALVQAKEVTRYSVALAWLEPDRPNGVILEYEVKYYEKDQNERSYRIVRTAARNTDIKGLNPLTSYVFHVRARTAAGYGDFSEPLEVTTNTVPSRIIGDGANSTVLLVSVSGSVVLVVILIAAFVISRRRSKYSKAKQEADEEKHLNQGVRTYVDPFTYEDPNQAVREFAKEIDASCIKIEKVIGVGEFGEVCSGRLKVPGKREICVAIKTLKAGYTDKQRRDFLSEASIMGQFDHPNIIHLEGVVTKCKPVMIITEYMENGSLDAFLRKNDGRFTVIQLVGMLRGIGSGMKYLSDMSYVHRDLAARNILVNSNLVCKVSDFGMSRVLEDDPEAAYTTRGGKIPIRWTAPEAIAYRKFTSASDVWSYGIVMWEVMSYGERPYWDMSNQDVIKAIEEGYRLPPPMDCPIALHQLMLDCWQKERSDRPKFGQIVNMLDKLIRNPNSLKRTGTESSRPNTALLDPSSPEFSAVVSVGDWLQAIKMDRYKDNFTAAGYTTLEAVVHVNQEDLARIGITAITHQNKILSSVQAMRTQMQQMHGRMVPV\n'
b
diff -r 000000000000 -r dbff53e6f75f workflow/ppenrich_suite_wf.ga
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/workflow/ppenrich_suite_wf.ga Mon Jul 11 19:22:25 2022 +0000
[
b'@@ -0,0 +1,904 @@\n+{\n+    "a_galaxy_workflow": "true",\n+    "annotation": "phoshpoproteomic enrichment data pre-processing and ANOVA",\n+    "creator": [\n+        {\n+            "class": "Person",\n+            "identifier": "0000-0002-2882-0508",\n+            "name": "Art Eschenlauer"\n+        }\n+    ],\n+    "format-version": "0.1",\n+    "license": "MIT",\n+    "name": "ppenrich_suite_wf",\n+    "steps": {\n+        "0": {\n+            "annotation": "The Phospho (STY)Sites.txt file produced by MaxQuant (found in the txt folder).",\n+            "content_id": null,\n+            "errors": null,\n+            "id": 0,\n+            "input_connections": {},\n+            "inputs": [\n+                {\n+                    "description": "The Phospho (STY)Sites.txt file produced by MaxQuant (found in the txt folder).",\n+                    "name": "Phospho (STY)Sites.txt"\n+                }\n+            ],\n+            "label": "Phospho (STY)Sites.txt",\n+            "name": "Input dataset",\n+            "outputs": [],\n+            "position": {\n+                "bottom": 290.16561126708984,\n+                "height": 82.1624984741211,\n+                "left": 515.090576171875,\n+                "right": 715.0874328613281,\n+                "top": 208.00311279296875,\n+                "width": 199.99685668945312,\n+                "x": 515.090576171875,\n+                "y": 208.00311279296875\n+            },\n+            "tool_id": null,\n+            "tool_state": "{\\"optional\\": false, \\"format\\": [\\"tabular\\"], \\"tag\\": \\"\\"}",\n+            "tool_version": null,\n+            "type": "data_input",\n+            "uuid": "c366566c-2a61-4918-b4ea-c1f565c4f2ca",\n+            "workflow_outputs": []\n+        },\n+        "1": {\n+            "annotation": "THIS IS pST BY DEFAULT.  Change if your data are enriched for pY.",\n+            "content_id": null,\n+            "errors": null,\n+            "id": 1,\n+            "input_connections": {},\n+            "inputs": [\n+                {\n+                    "description": "THIS IS pST BY DEFAULT.  Change if your data are enriched for pY.",\n+                    "name": "enrichmentType"\n+                }\n+            ],\n+            "label": "enrichmentType",\n+            "name": "Input parameter",\n+            "outputs": [],\n+            "position": {\n+                "bottom": 375.7687225341797,\n+                "height": 61.76249694824219,\n+                "left": 531.1312255859375,\n+                "right": 731.1280822753906,\n+                "top": 314.0062255859375,\n+                "width": 199.99685668945312,\n+                "x": 531.1312255859375,\n+                "y": 314.0062255859375\n+            },\n+            "tool_id": null,\n+            "tool_state": "{\\"restrictions\\": [\\"pST\\", \\"pY\\"], \\"parameter_type\\": \\"text\\", \\"optional\\": false}",\n+            "tool_version": null,\n+            "type": "parameter_input",\n+            "uuid": "5f31b776-9e2b-4f3a-a9e6-886ac2062e15",\n+            "workflow_outputs": [\n+                {\n+                    "label": null,\n+                    "output_name": "output",\n+                    "uuid": "1ff7eb95-9dd3-4006-ab0b-03e4f84a1aa5"\n+                }\n+            ]\n+        },\n+        "2": {\n+            "annotation": "Pattern matching columns that have peptide intensity data (PERL-compatible regular expression matching column label)",\n+            "content_id": null,\n+            "errors": null,\n+            "id": 2,\n+            "input_connections": {},\n+            "inputs": [\n+                {\n+                    "description": "Pattern matching columns that have peptide intensity data (PERL-compatible regular expression matching column label)",\n+                    "name": "Intensity-column pattern"\n+                }\n+            ],\n+            "label": "Intensity-column pattern",\n+            "name": "Input parameter",\n+            "outputs": [],\n+            "position": {\n+                "bottom": 576.2812118530273,\n+            '..b'              "output_name": "output"\n+                }\n+            },\n+            "inputs": [\n+                {\n+                    "description": "runtime parameter for tool MaxQuant Phosphopeptide ANOVA",\n+                    "name": "alpha_file"\n+                },\n+                {\n+                    "description": "runtime parameter for tool MaxQuant Phosphopeptide ANOVA",\n+                    "name": "input_file"\n+                }\n+            ],\n+            "label": "MaxQuant Phosphopeptide ANOVA randomly imputed",\n+            "name": "MaxQuant Phosphopeptide ANOVA",\n+            "outputs": [\n+                {\n+                    "name": "imputed_data_file",\n+                    "type": "tabular"\n+                },\n+                {\n+                    "name": "imp_qn_lt_file",\n+                    "type": "tabular"\n+                },\n+                {\n+                    "name": "report_file",\n+                    "type": "pdf"\n+                }\n+            ],\n+            "position": {\n+                "bottom": 2106.0374145507812,\n+                "height": 367.51873779296875,\n+                "left": 1399.153076171875,\n+                "right": 1599.1499328613281,\n+                "top": 1738.5186767578125,\n+                "width": 199.99685668945312,\n+                "x": 1399.153076171875,\n+                "y": 1738.5186767578125\n+            },\n+            "post_job_actions": {\n+                "RenameDatasetActionimp_qn_lt_file": {\n+                    "action_arguments": {\n+                        "newname": "#{input_file}.intensities_randomly-imputed_QN_LT"\n+                    },\n+                    "action_type": "RenameDatasetAction",\n+                    "output_name": "imp_qn_lt_file"\n+                },\n+                "RenameDatasetActionimputed_data_file": {\n+                    "action_arguments": {\n+                        "newname": "#{input_file}.intensities_randomly-imputed"\n+                    },\n+                    "action_type": "RenameDatasetAction",\n+                    "output_name": "imputed_data_file"\n+                },\n+                "RenameDatasetActionreport_file": {\n+                    "action_arguments": {\n+                        "newname": "#{input_file}.intensities_randomly-imputed_report"\n+                    },\n+                    "action_type": "RenameDatasetAction",\n+                    "output_name": "report_file"\n+                }\n+            },\n+            "tool_id": "mqppep_anova",\n+            "tool_state": "{\\"alpha_file\\": {\\"__class__\\": \\"RuntimeValue\\"}, \\"imputation\\": {\\"imputation_method\\": \\"random\\", \\"__current_case__\\": 3, \\"meanPercentile\\": \\"1\\", \\"sdPercentile\\": \\"1.0\\"}, \\"input_file\\": {\\"__class__\\": \\"RuntimeValue\\"}, \\"intensity_column_regex\\": \\"^Intensity[^_]\\", \\"sample_grouping_regex\\": {\\"__class__\\": \\"ConnectedValue\\"}, \\"sample_names_regex\\": {\\"__class__\\": \\"ConnectedValue\\"}, \\"__page__\\": null, \\"__rerun_remap_job_id__\\": null}",\n+            "tool_version": null,\n+            "type": "tool",\n+            "uuid": "e71562a7-c941-429d-99a8-e14721df3670",\n+            "workflow_outputs": [\n+                {\n+                    "label": "intensities_randomly-imputed",\n+                    "output_name": "imputed_data_file",\n+                    "uuid": "e27c540b-07d0-496f-8b11-b4c1472dce12"\n+                },\n+                {\n+                    "label": "intensities_randomly-imputed_report",\n+                    "output_name": "report_file",\n+                    "uuid": "abe2dbf4-956d-4625-a0e1-ad1c6c988a7c"\n+                },\n+                {\n+                    "label": "intensities_randomly-imputed_QN_LT",\n+                    "output_name": "imp_qn_lt_file",\n+                    "uuid": "cb5b1d8f-905b-453a-a479-507e01a8f8f7"\n+                }\n+            ]\n+        }\n+    },\n+    "tags": [\n+        "ppenrich"\n+    ],\n+    "uuid": "234db768-520c-4eaa-a5be-061e3d858682",\n+    "version": 2\n+}\n'