Repository 'galaxy_proteomics'
hg clone https://toolshed.g2.bx.psu.edu/repos/labis-app/galaxy_proteomics

Changeset 0:ba070efb6f78 (2018-07-03)
Next changeset 1:f3fa21cda5f5 (2018-08-10)
Commit message:
planemo upload commit 13e72e84c523bda22bda792bbebf4720d28542d5-dirty
added:
cat_2.pl
cat_2.xml
read_util.R
saida.csv
selectproteinids.pl
selectproteinids.xml
t-test.R
t-test.xml
test-data/Galaxy134-[Concatenate_on_data_132_and_data_131].fasta
test-data/f1.fasta
test-data/f2.fasta
test-data/proteinGroups.csv
test-data/proteinGroups_csv_filtered_with_contaminants.tabular
test-data/proteinGroups_csv_filtered_without_contaminants.tabular
write_util.R
b
diff -r 000000000000 -r ba070efb6f78 cat_2.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cat_2.pl Tue Jul 03 17:34:13 2018 -0400
[
@@ -0,0 +1,22 @@
+#!/usr/bin/perl -w
+# Developed by Flavia Vischi Winck and Mateus Bellomo_2014 (flaviavw[at]gmail.com)
+# usage : perl cat_2.pl <FASTA file1> <FASTA file2> <outputfile>
+
+open FILE1, "<", $ARGV[0] or die $!;
+my @file1 = <FILE1>;
+
+open FILE2, "<", $ARGV[1] or die $!;
+my @file2 = <FILE2>;
+
+
+open SEL, '>', $ARGV[2] or die $!;
+foreach (@file1) {
+    print SEL $_;
+}
+
+foreach (@file2){
+    print SEL $_;
+}
+
+close SEL;
+
b
diff -r 000000000000 -r ba070efb6f78 cat_2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cat_2.xml Tue Jul 03 17:34:13 2018 -0400
[
@@ -0,0 +1,34 @@
+<tool id="cat_2" name="Concatenate" version="0.1.0">
+    <description></description>
+  <command interpreter="perl">cat_2.pl $input1 $input2 $output</command>
+  <inputs>
+    <param format="fasta" name="input1" type="data" label="FASTA file1"/>
+    <param format="fasta" name="input2" type="data" label="FASTA file2"/>
+  </inputs>
+  <outputs>
+    <data format="fasta" name="output" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input1" value="f1.fasta"/>
+      <param name="input2" value="f2.fasta"/>
+      <output name="output" file="Galaxy134-[Concatenate_on_data_132_and_data_131].fasta"/>
+    </test>
+  </tests>
+
+  <help>
+This tool concatenate two FASTA files in a tail-head manner. First you must upload your FASTA files using Upload File tool in the Get Data subsection. After the uploading process is completed, you will select the FASTA files here in this Concatenate tool by selecting them as FASTA file 1 and FASTA file 2. Click on Execute to start the job. Your concatenated FASTA file will be generated and it will be available to download once you click on the name of the job in the History menu at the right side of the screen in the Download option (small disk icon). 
+  </help>
+
+  <citations>
+    <citation type="bibtex">
+      @misc{Concatenate,
+        author = {Application, Labis},
+        year = 2018,
+        title = Concatenate,
+        publisher = labisapplications
+      }
+    </citation>
+  </citations>
+</tool>
b
diff -r 000000000000 -r ba070efb6f78 read_util.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/read_util.R Tue Jul 03 17:34:13 2018 -0400
[
@@ -0,0 +1,38 @@
+# auxiliar script to help with the read of all R scripts
+read_function <- function(options) {
+
+    # reads the table from input
+    table <- read.delim(options$inputfile_name, header=TRUE, fill=TRUE);
+
+    # get the defined regex from the requested type
+    if (options$type == "lfqlog2") {
+      regexpr <- "LFQ[.]intensity[.]([^[:digit:]]+)[[:digit:]]+";
+      code <- "LFQ";
+    } else if (options$type == "intensity") {
+      regexpr <- "Intensity[.]([^[:digit:]]+)[[:digit:]]+";
+      code <- "INT";
+    } else {
+      regexpr <- "MS[.]MS[.]Count[.]([^[:digit:]]+)[[:digit:]]+";
+      code <- "MS";
+    }
+    if (!(TRUE %in% grepl(regexpr, colnames(table)))) {
+      print (sprintf("Error: No columns of type %s in input table", code));
+      q(1,save="no");
+    }
+
+    # define the columns that will be taken in account for the t-test
+    columns_names <- grep(regexpr, colnames(table), value=TRUE);
+
+    # here I extract the different experiment names in an array for easier
+    # manipulation, ordering them
+    experiment_names <- mixedsort(gsub(".*[.]([^[:digit:]]+[[:digit:]]+).*", "\\1",
+                                        columns_names));
+
+    # extract from the experiment names all the different categories in the table
+    different_categories <- unique(gsub("([^[:digit:]]+).*", "\\1",
+                                        experiment_names));
+
+    read_list <- list(table=table, regex=regexpr, code=code, col_names=columns_names, ex_names=experiment_names, diff_cat=different_categories);
+
+    return(read_list);
+}
b
diff -r 000000000000 -r ba070efb6f78 saida.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/saida.csv Tue Jul 03 17:34:13 2018 -0400
b
@@ -0,0 +1,59 @@
+A6NGA9
+P62736
+Q15154
+A8MWY0-3
+Q01638
+P16415
+B9EGE7
+Q9Y6D9
+C9K044
+P02768-1
+Q5VVM6
+E1P506
+Q00536-3
+P47712
+Q6V1P9-4
+Q12866
+F5H5P6
+J3KNF5
+F8W0W6
+Q96JB1
+H0YCF9
+H0YET9
+Q9BTM1-2
+P00738
+H3BS82
+Q6P158
+Q13045
+J3KTH2
+Q96NJ3-2
+M0QY22
+Q6ZN19-3
+O43603
+O75132
+O94889-2
+P05141
+P0C6C1
+P0C841
+P62684
+Q01668-2
+Q13315
+Q14160-3
+Q5JTZ9
+Q5T742
+Q5T7X1
+Q5TAL2
+Q9HD45
+Q6PI48
+Q7Z5H3-3
+Q86Z20
+Q8IW50
+Q8IYF3-2
+Q8ND76
+Q8WXI9
+Q96DT5
+Q96TA1-2
+Q9BVL4
+Q9H0L4
+Q9UJY1
+S4R332
b
diff -r 000000000000 -r ba070efb6f78 selectproteinids.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/selectproteinids.pl Tue Jul 03 17:34:13 2018 -0400
[
@@ -0,0 +1,38 @@
+#!/usr/bin/perl -w
+
+use strict;
+use warnings;
+use FileHandle;
+
+
+# open file with the ids and get the lines #
+my $in = $ARGV[0];
+open INFILE, "<", $in or die $!;
+seek(INFILE, 0, 0);
+
+my @lines = <INFILE>;
+shift @lines;
+
+# flag to see wheter maintain contaminants or not
+my $maintain_contaminants = $ARGV[1];
+
+# output file with table filtered
+my $out = $ARGV[2];
+open OUTFILE, ">", $out or die $!;
+
+# select first id of first column
+foreach(@lines){
+    my @vec = split ' ', $_;
+    my @id = split ';', $vec[0];
+    if($id[0] =~ m/^CON__/ ){
+ if($maintain_contaminants eq "yes"){
+     print OUTFILE $id[0] =~ s/^CON__//r, "\n";
+ }
+    } else{
+ print OUTFILE $id[0], "\n";
+    }
+}
+
+close INFILE;
+close OUTFILE;
+
b
diff -r 000000000000 -r ba070efb6f78 selectproteinids.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/selectproteinids.xml Tue Jul 03 17:34:13 2018 -0400
b
@@ -0,0 +1,57 @@
+<?xml version="1.0"?>
+<tool id="selectproteinids" name="Filter ids" version="1.0" hidden="false">
+  
+  <description></description>
+  <command interpreter="perl">
+    selectproteinids.pl $input $maintaincon $out_file
+  </command>
+
+  <inputs>
+    <param format="tabular" name="input" type="data" label="Input table"/>
+
+    <param name="maintaincon" type="select" display="radio" label="Maintain contaminants">
+      <option value="yes">Yes</option>
+      <option value="no">No</option>
+    </param>
+
+  </inputs>
+
+  <outputs>
+    <data format="tabular" name="out_file" label="${input.name} filtered"/>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="protein_ids_simple.csv"/>
+      <param name="maintaincon" value="yes"/>
+      <output name="out_file" file="proteinGroups_csv_filtered_with_contaminants.tabular"/>
+    </test>
+
+    <test>
+      <param name="input" value="protein_ids_simple.csv"/>
+      <param name="maintaincon" value="no"/>
+      <output name="out_file" file="proteinGroups_csv_filtered_without_contaminants.tabular"/>
+    </test>
+
+  </tests>
+
+  <help>
+
+    Input: Protein Groups from MaxQuant
+
+
+    Output: Table with id's got from MaxQuant Protein Groups
+
+  </help>
+
+  <citations>
+    <citation type="bibtex">
+      @misc{Concatenate,
+        author = {Application, Labis},
+        year = 2018,
+        title = Concatenate,
+        publisher = labisapplications
+      }
+    </citation>
+  </citations>
+</tool>
b
diff -r 000000000000 -r ba070efb6f78 t-test.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/t-test.R Tue Jul 03 17:34:13 2018 -0400
[
@@ -0,0 +1,80 @@
+#!/usr/bin/env Rscript
+
+# t-test.R
+# AUTHOR: Daniel Travieso
+# E-mail: danielgtravieso@gmail.com
+# LAST REVISED: April 2015
+#
+# Required packages to work: (getopt", "gtools")
+# Laboratory of Mass Spectrometry at Brazilian Biosciences National Laboratory
+# http://lnbio.cnpem.br/
+# Copyright CC BY-NC-SA (c) 2014  Brazilian Center for Research in Energy and Materials
+# All rights reserved.
+require('gtools', quietly=TRUE);
+require('getopt', quietly=TRUE);
+#include and execute the read util script
+library('read_util.R');
+library('write_util.R');
+
+#define de options input that the read_util$code will have
+opt = matrix(c(
+    'inputfile_name', 'i', 1, 'character',
+    'type', 't', 1, 'character',
+    'outputfile_name', 'o', 1, 'character'
+),byrow=TRUE, ncol=4);
+
+# parse de input
+options = getopt(opt);
+
+read_util <- read_function(options);
+
+i<-1;
+columns <- list();
+aux <- c();
+for (cat in read_util$diff_cat) {
+  col <- read_util$col_names[gsub(read_util$regex, "\\1", read_util$col_names) == cat]
+  aux <- c(aux, col);
+  columns[[i]] <- col;
+  i<-i+1;
+}
+# this is a filtered read_util$table to help with calculations
+table_only_columns <- read_util$table[-1, aux]
+
+# this loop computes the ttest result for each row
+# and adds it to a vector
+i <- 2;
+ttestresult <- c("");
+ttestsignificant <- c("");
+if (length(read_util$diff_cat) < 2) {
+  print(sprintf("Can't calculate t-test. There is only one category for %s collumns", read_util$code));
+  q(1,save="no");
+}
+
+for (i in seq(2, nrow(table_only_columns)+1)) {
+  # the t-test arguments are the control values vector, the treatment values vector
+  # and some extra arguments. var.equal says it's a student t-test with stardard
+  # deviations assumed equal. mu=0 sets the hipothesis to be null.
+  ttestresult[i] <- t.test(table_only_columns[i-1, columns[[1]]],
+    table_only_columns[i-1, columns[[2]]], var.equal=TRUE, mu=0)$p.value;
+  if (is.na(ttestresult[i]))
+    ttestresult[i] = 1.0
+}
+
+# this defines if the p-value returned for each row is significant
+ttestsignificant[ttestresult <= 0.05] <- "+"
+ttestsignificant[ttestresult > 0.05] <- ""
+
+
+# create two extra rows on the read_util$table, one for p-values and other
+# for siginificance
+#TODO: ou colocar perto da intensidade que se refere ou na 3ยช coluna
+read_util$table[paste0("T.test.result.", read_util$code)] <- NA;
+read_util$table[paste0("T.test.result.", read_util$code)] <- ttestresult;
+read_util$table[paste0("T.test.significant.", read_util$code)] <- NA;
+read_util$table[paste0("T.test.significant.", read_util$code)] <- ttestsignificant;
+
+
+
+
+# write out the read_util$table
+writeout(options$outputfile_name, read_util$table);
b
diff -r 000000000000 -r ba070efb6f78 t-test.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/t-test.xml Tue Jul 03 17:34:13 2018 -0400
b
@@ -0,0 +1,45 @@
+<?xml version="1.0"?>
+<tool id="t-test" name="T-Test" version="1.0" hidden="false">
+    <description>
+    </description>
+    <command interpreter="Rscript">
+        t-test.R --inputfile_name=$input --type=$t_test_type --outputfile_name=$output
+    </command>
+
+    <inputs>
+        <param format="tabular" name="input" type="data" label="Input table"/>
+        <param format="txt" name="t_test_type" type="select" label="Type of T-test">
+          <option value="lfqlog2"> Log2 of LFQ Intensity </option>
+          <option value="intensity"> Intensity </option>
+          <option value="mscount"> MS Count </option>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data format="tabular" name="output" label="Complete table with p-values and significance on ${input.name}"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input" value="f1.fasta"/>
+            <param name="t_test_type" value="Intensity"/>
+            <output name="output" file="result_T_test.csv"/>
+        </test>
+    </tests>
+
+    <help>
+        ** What it does **
+        This tool executes an RScript that produces a p-values colum on the table.
+    </help>
+
+    <citations>
+        <citation type="bibtex">
+          @misc{Concatenate,
+            author = {Application, Labis},
+            year = 2018,
+            title = Concatenate,
+            publisher = labisapplications
+          }
+        </citation>
+    </citations>
+</tool>
[
diff -r 000000000000 -r ba070efb6f78 test-data/Galaxy134-[Concatenate_on_data_132_and_data_131].fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Galaxy134-[Concatenate_on_data_132_and_data_131].fasta Tue Jul 03 17:34:13 2018 -0400
b
@@ -0,0 +1,46 @@
+
+>sp|P35579|MYH9_HUMAN Myosin-9 OS=Homo sapiens GN=MYH9 PE=1 SV=4
+MAQQAADKYLYVDKNFINNPLAQADWAAKKLVWVPSDKSGFEPASLKEEVGEEAIVELVE
+NGKKVKVNKDDIQKMNPPKFSKVEDMAELTCLNEASVLHNLKERYYSGLIYTYSGLFCVV
+INPYKNLPIYSEEIVEMYKGKKRHEMPPHIYAITDTAYRSMMQDREDQSILCTGESGAGK
+TENTKKVIQYLAYVASSHKSKKDQGELERQLLQANPILEAFGNAKTVKNDNSSRFGKFIR
+INFDVNGYIVGANIETYLLEKSRAIRQAKEERTFHIFYYLLSGAGEHLKTDLLLEPYNKY
+RFLSNGHVTIPGQQDKDMFQETMEAMRIMGIPEEEQMGLLRVISGVLQLGNIVFKKERNT
+DQASMPDNTAAQKVSHLLGINVTDFTRGILTPRIKVGRDYVQKAQTKEQADFAIEALAKA
+TYERMFRWLVLRINKALDKTKRQGASFIGILDIAGFEIFDLNSFEQLCINYTNEKLQQLF
+NHTMFILEQEEYQREGIEWNFIDFGLDLQPCIDLIEKPAGPPGILALLDEECWFPKATDK
+SFVEKVMQEQGTHPKFQKPKQLKDKADFCIIHYAGKVDYKADEWLMKNMDPLNDNIATLL
+HQSSDKFVSELWKDVDRIIGLDQVAGMSETALPGAFKTRKGMFRTVGQLYKEQLAKLMAT
+LRNTNPNFVRCIIPNHEKKAGKLDPHLVLDQLRCNGVLEGIRICRQGFPNRVVFQEFRQR
+YEILTPNSIPKGFMDGKQACVLMIKALELDSNLYRIGQSKVFFRAGVLAHLEEERDLKIT
+DVIIGFQACCRGYLARKAFAKRQQQLTAMKVLQRNCAAYLKLRNWQWWRLFTKVKPLLQV
+SRQEEEMMAKEEELVKVREKQLAAENRLTEMETLQSQLMAEKLQLQEQLQAETELCAEAE
+ELRARLTAKKQELEEICHDLEARVEEEEERCQHLQAEKKKMQQNIQELEEQLEEEESARQ
+KLQLEKVTTEAKLKKLEEEQIILEDQNCKLAKEKKLLEDRIAEFTTNLTEEEEKSKSLAK
+LKNKHEAMITDLEERLRREEKQRQELEKTRRKLEGDSTDLSDQIAELQAQIAELKMQLAK
+KEEELQAALARVEEEAAQKNMALKKIRELESQISELQEDLESERASRNKAEKQKRDLGEE
+LEALKTELEDTLDSTAAQQELRSKREQEVNILKKTLEEEAKTHEAQIQEMRQKHSQAVEE
+LAEQLEQTKRVKANLEKAKQTLENERGELANEVKVLLQGKGDSEHKRKKVEAQLQELQVK
+FNEGERVRTELADKVTKLQVELDNVTGLLSQSDSKSSKLTKDFSALESQLQDTQELLQEE
+NRQKLSLSTKLKQVEDEKNSFREQLEEEEEAKHNLEKQIATLHAQVADMKKKMEDSVGCL
+ETAEEVKRKLQKDLEGLSQRHEEKVAAYDKLEKTKTRLQQELDDLLVDLDHQRQSACNLE
+KKQKKFDQLLAEEKTISAKYAEERDRAEAEAREKETKALSLARALEEAMEQKAELERLNK
+QFRTEMEDLMSSKDDVGKSVHELEKSKRALEQQVEEMKTQLEELEDELQATEDAKLRLEV
+NLQAMKAQFERDLQGRDEQSEEKKKQLVRQVREMEAELEDERKQRSMAVAARKKLEMDLK
+DLEAHIDSANKNRDEAIKQLRKLQAQMKDCMRELDDTRASREEILAQAKENEKKLKSMEA
+EMIQLQEELAAAERAKRQAQQERDELADEIANSSGKGALALEEKRRLEARIAQLEEELEE
+EQGNTELINDRLKKANLQIDQINTDLNLERSHAQKNENARQQLERQNKELKVKLQEMEGT
+VKSKYKASITALEAKIAQLEEQLDNETKERQAACKQVRRTEKKLKDVLLQVDDERRNAEQ
+YKDQADKASTRLKQLKRQLEEAEEEAQRANASRRKLQRELEDATETADAMNREVSSLKNK
+LRRGDLPFVVPRRMARKGAGDGSDEEVDGKADGAEAKPAE
+
+>sp|Q99456|K1C12_HUMAN Keratin, type I cytoskeletal 12 OS=Homo sapiens GN=KRT12 PE=1 SV=1
+MDLSNNTMSLSVRTPGLSRRLSSQSVIGRPRGMSASSVGSGYGGSAFGFGASCGGGFSAA
+SMFGSSSGFGGGSGSSMAGGLGAGYGRALGGGSFGGLGMGFGGSPGGGSLGILSGNDGGL
+LSGSEKETMQNLNDRLASYLDKVRALEEANTELENKIREWYETRGTGTADASQSDYSKYY
+PLIEDLRNKIISASIGNAQLLLQIDNARLAAEDFRMKYENELALRQGVEADINGLRRVLD
+ELTLTRTDLEMQIESLNEELAYMKKNHEDELQSFRVGGPGEVSVEMDAAPGVDLTRLLND
+MRAQYETIAEQNRKDAEAWFIEKSGELRKEISTNTEQLQSSKSEVTDLRRAFQNLEIELQ
+SQLAMKKSLEDSLAEAEGDYCAQLSQVQQLISNLEAQLLQVRADAERQNVDHQRLLNVKA
+RLELEIETYRRLLDGEAQGDGLEESLFVTDSKSQAQSTDSSKDPTKTRKIKTVVQEMVNG
+EVVSSQVQEIEELM
b
diff -r 000000000000 -r ba070efb6f78 test-data/f1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/f1.fasta Tue Jul 03 17:34:13 2018 -0400
b
@@ -0,0 +1,35 @@
+
+>sp|P35579|MYH9_HUMAN Myosin-9 OS=Homo sapiens GN=MYH9 PE=1 SV=4
+MAQQAADKYLYVDKNFINNPLAQADWAAKKLVWVPSDKSGFEPASLKEEVGEEAIVELVE
+NGKKVKVNKDDIQKMNPPKFSKVEDMAELTCLNEASVLHNLKERYYSGLIYTYSGLFCVV
+INPYKNLPIYSEEIVEMYKGKKRHEMPPHIYAITDTAYRSMMQDREDQSILCTGESGAGK
+TENTKKVIQYLAYVASSHKSKKDQGELERQLLQANPILEAFGNAKTVKNDNSSRFGKFIR
+INFDVNGYIVGANIETYLLEKSRAIRQAKEERTFHIFYYLLSGAGEHLKTDLLLEPYNKY
+RFLSNGHVTIPGQQDKDMFQETMEAMRIMGIPEEEQMGLLRVISGVLQLGNIVFKKERNT
+DQASMPDNTAAQKVSHLLGINVTDFTRGILTPRIKVGRDYVQKAQTKEQADFAIEALAKA
+TYERMFRWLVLRINKALDKTKRQGASFIGILDIAGFEIFDLNSFEQLCINYTNEKLQQLF
+NHTMFILEQEEYQREGIEWNFIDFGLDLQPCIDLIEKPAGPPGILALLDEECWFPKATDK
+SFVEKVMQEQGTHPKFQKPKQLKDKADFCIIHYAGKVDYKADEWLMKNMDPLNDNIATLL
+HQSSDKFVSELWKDVDRIIGLDQVAGMSETALPGAFKTRKGMFRTVGQLYKEQLAKLMAT
+LRNTNPNFVRCIIPNHEKKAGKLDPHLVLDQLRCNGVLEGIRICRQGFPNRVVFQEFRQR
+YEILTPNSIPKGFMDGKQACVLMIKALELDSNLYRIGQSKVFFRAGVLAHLEEERDLKIT
+DVIIGFQACCRGYLARKAFAKRQQQLTAMKVLQRNCAAYLKLRNWQWWRLFTKVKPLLQV
+SRQEEEMMAKEEELVKVREKQLAAENRLTEMETLQSQLMAEKLQLQEQLQAETELCAEAE
+ELRARLTAKKQELEEICHDLEARVEEEEERCQHLQAEKKKMQQNIQELEEQLEEEESARQ
+KLQLEKVTTEAKLKKLEEEQIILEDQNCKLAKEKKLLEDRIAEFTTNLTEEEEKSKSLAK
+LKNKHEAMITDLEERLRREEKQRQELEKTRRKLEGDSTDLSDQIAELQAQIAELKMQLAK
+KEEELQAALARVEEEAAQKNMALKKIRELESQISELQEDLESERASRNKAEKQKRDLGEE
+LEALKTELEDTLDSTAAQQELRSKREQEVNILKKTLEEEAKTHEAQIQEMRQKHSQAVEE
+LAEQLEQTKRVKANLEKAKQTLENERGELANEVKVLLQGKGDSEHKRKKVEAQLQELQVK
+FNEGERVRTELADKVTKLQVELDNVTGLLSQSDSKSSKLTKDFSALESQLQDTQELLQEE
+NRQKLSLSTKLKQVEDEKNSFREQLEEEEEAKHNLEKQIATLHAQVADMKKKMEDSVGCL
+ETAEEVKRKLQKDLEGLSQRHEEKVAAYDKLEKTKTRLQQELDDLLVDLDHQRQSACNLE
+KKQKKFDQLLAEEKTISAKYAEERDRAEAEAREKETKALSLARALEEAMEQKAELERLNK
+QFRTEMEDLMSSKDDVGKSVHELEKSKRALEQQVEEMKTQLEELEDELQATEDAKLRLEV
+NLQAMKAQFERDLQGRDEQSEEKKKQLVRQVREMEAELEDERKQRSMAVAARKKLEMDLK
+DLEAHIDSANKNRDEAIKQLRKLQAQMKDCMRELDDTRASREEILAQAKENEKKLKSMEA
+EMIQLQEELAAAERAKRQAQQERDELADEIANSSGKGALALEEKRRLEARIAQLEEELEE
+EQGNTELINDRLKKANLQIDQINTDLNLERSHAQKNENARQQLERQNKELKVKLQEMEGT
+VKSKYKASITALEAKIAQLEEQLDNETKERQAACKQVRRTEKKLKDVLLQVDDERRNAEQ
+YKDQADKASTRLKQLKRQLEEAEEEAQRANASRRKLQRELEDATETADAMNREVSSLKNK
+LRRGDLPFVVPRRMARKGAGDGSDEEVDGKADGAEAKPAE
\ No newline at end of file
b
diff -r 000000000000 -r ba070efb6f78 test-data/f2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/f2.fasta Tue Jul 03 17:34:13 2018 -0400
b
@@ -0,0 +1,11 @@
+
+>sp|Q99456|K1C12_HUMAN Keratin, type I cytoskeletal 12 OS=Homo sapiens GN=KRT12 PE=1 SV=1
+MDLSNNTMSLSVRTPGLSRRLSSQSVIGRPRGMSASSVGSGYGGSAFGFGASCGGGFSAA
+SMFGSSSGFGGGSGSSMAGGLGAGYGRALGGGSFGGLGMGFGGSPGGGSLGILSGNDGGL
+LSGSEKETMQNLNDRLASYLDKVRALEEANTELENKIREWYETRGTGTADASQSDYSKYY
+PLIEDLRNKIISASIGNAQLLLQIDNARLAAEDFRMKYENELALRQGVEADINGLRRVLD
+ELTLTRTDLEMQIESLNEELAYMKKNHEDELQSFRVGGPGEVSVEMDAAPGVDLTRLLND
+MRAQYETIAEQNRKDAEAWFIEKSGELRKEISTNTEQLQSSKSEVTDLRRAFQNLEIELQ
+SQLAMKKSLEDSLAEAEGDYCAQLSQVQQLISNLEAQLLQVRADAERQNVDHQRLLNVKA
+RLELEIETYRRLLDGEAQGDGLEESLFVTDSKSQAQSTDSSKDPTKTRKIKTVVQEMVNG
+EVVSSQVQEIEELM
b
diff -r 000000000000 -r ba070efb6f78 test-data/proteinGroups.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/proteinGroups.csv Tue Jul 03 17:34:13 2018 -0400
[
b'@@ -0,0 +1,60 @@\n+Protein IDs\tMajority protein IDs\tPeptide counts (all)\tPeptide counts (razor+unique)\tPeptide counts (unique)\tProtein names\tGene names\tFasta headers\tProteins\tPeptides\tRazor + unique peptides\tUnique peptides\tPeptides test2\tRazor + unique peptides test2\tUnique peptides test2\tSequence coverage [%]\tUnique + razor sequence coverage [%]\tUnique sequence coverage [%]\tMol. weight [kDa]\tSequence length\tSequence lengths\tSlice average\tSlice 1\tExperiment test2\tPEP\tSequence coverage test2 [%]\tIntensity\tIntensity test2\tOnly identified by site\tReverse\tContaminant\tid\tPeptide IDs\tPeptide is razor\tMod. peptide IDs\tEvidence IDs\tMS/MS IDs\tBest MS/MS\tOxidation (M) site IDs\tOxidation (M) site positions\r\n+A6NGA9\tA6NGA9\t1\t1\t1\tTransmembrane protein 202\tTMEM202\t>sp|A6NGA9|TM202_HUMAN Transmembrane protein 202 OS=Homo sapiens GN=TMEM202 PE=2 SV=1\t1\t1\t1\t1\t1\t1\t1\t5.5\t5.5\t5.5\t31.352\t273\t273\t1\t1\t1\t0.021486\t5.5\t74156\t74156\t\t\t\t0\t68\tTrue\t71\t88\t100\t100\t\t\r\n+P62736;P68032;P68133;P63267;CON__P60712;P60709;P63261;Q5T8M7;P63267-2;A6NL76;Q5T8M8\tP62736;P68032;P68133;P63267;CON__P60712;P60709;P63261;Q5T8M7;P63267-2;A6NL76;Q5T8M8\t1;1;1;1;1;1;1;1;1;1;1\t1;1;1;1;1;1;1;1;1;1;1\t1;1;1;1;1;1;1;1;1;1;1\tActin, aortic smooth muscle;Actin, alpha cardiac muscle 1;Actin, alpha skeletal muscle;Actin, gamma-enteric smooth muscle;Actin, cytoplasmic 1;Actin, cytoplasmic 1, N-terminally processed;Actin, cytoplasmic 2;Actin, cytoplasmic 2, N-terminally processed\tACTA2;ACTC1;ACTA1;ACTG2;ACTB;ACTG1\t>sp|P62736|ACTA_HUMAN Actin, aortic smooth muscle OS=Homo sapiens GN=ACTA2 PE=1 SV=1;>sp|P68032|ACTC_HUMAN Actin, alpha cardiac muscle 1 OS=Homo sapiens GN=ACTC1 PE=1 SV=1;>sp|P68133|ACTS_HUMAN Actin, alpha skeletal muscle OS=Homo sapiens GN=ACTA1 PE=1 SV=\t11\t1\t1\t1\t1\t1\t1\t2.9\t2.9\t2.9\t42.009\t377\t377;377;377;376;375;375;375;342;333;289;287\t1\t1\t1\t0.0012645\t2.9\t0\t0\t\t\t+\t1\t14\tTrue\t16\t21\t28\t28\t0\t327\r\n+Q15154;E7ETA6;Q15154-2;H0YBA1;A6NNN6\tQ15154;E7ETA6;Q15154-2;H0YBA1;A6NNN6\t1;1;1;1;1\t1;1;1;1;1\t1;1;1;1;1\tPericentriolar material 1 protein\tPCM1\t>sp|Q15154|PCM1_HUMAN Pericentriolar material 1 protein OS=Homo sapiens GN=PCM1 PE=1 SV=4;>tr|E7ETA6|E7ETA6_HUMAN Pericentriolar material 1 protein OS=Homo sapiens GN=PCM1 PE=2 SV=1;>sp|Q15154-2|PCM1_HUMAN Isoform 2 of Pericentriolar material 1 protein OS=\t5\t1\t1\t1\t1\t1\t1\t0.5\t0.5\t0.5\t228.53\t2024\t2024;2016;1969;764;723\t1\t1\t1\t0.022199\t0.5\t0\t0\t\t\t\t2\t34\tTrue\t37\t43\t51\t51\t\t\r\n+A8MWY0-3\tA8MWY0-3\t1\t1\t1\t\t\t>sp|A8MWY0-3|K132L_HUMAN Isoform 3 of UPF0577 protein KIAA1324-like OS=Homo sapiens GN=KIAA1324L\t1\t1\t1\t1\t1\t1\t1\t3.5\t3.5\t3.5\t95.749\t862\t862\t1\t1\t1\t0.0198\t3.5\t128350\t128350\t\t\t\t3\t42\tTrue\t45\t55\t63\t63\t\t\r\n+Q01638;Q01638-2;E9PC41;Q01638-3;B4E0I3\tQ01638;Q01638-2;E9PC41;Q01638-3;B4E0I3\t1;1;1;1;1\t1;1;1;1;1\t1;1;1;1;1\tInterleukin-1 receptor-like 1\tIL1RL1\t>sp|Q01638|ILRL1_HUMAN Interleukin-1 receptor-like 1 OS=Homo sapiens GN=IL1RL1 PE=1 SV=4;>sp|Q01638-2|ILRL1_HUMAN Isoform B of Interleukin-1 receptor-like 1 OS=Homo sapiens GN=IL1RL1;>tr|E9PC41|E9PC41_HUMAN Interleukin-1 receptor-like 1 OS=Homo sapiens GN=\t5\t1\t1\t1\t1\t1\t1\t2.5\t2.5\t2.5\t63.357\t556\t556;328;314;259;211\t1\t1\t1\t0.011882\t2.5\t0\t0\t\t\t\t4\t55\tTrue\t58\t71\t79\t79\t\t\r\n+P16415;B7Z8D4\tP16415;B7Z8D4\t1;1\t1;1\t1;1\tZinc finger protein 823\tZNF823\t>sp|P16415|ZN823_HUMAN Zinc finger protein 823 OS=Homo sapiens GN=ZNF823 PE=2 SV=2;>tr|B7Z8D4|B7Z8D4_HUMAN Zinc finger protein 823 OS=Homo sapiens GN=ZNF823 PE=2 SV=1\t2\t1\t1\t1\t1\t1\t1\t2.8\t2.8\t2.8\t70.27\t610\t610;428\t1\t1\t1\t0.021702\t2.8\t2500200\t2500200\t\t\t\t5\t12\tTrue\t14\t19\t26\t26\t\t\r\n+B9EGE7;Q8TCN5;Q8TCN5-2\tB9EGE7;Q8TCN5;Q8TCN5-2\t1;1;1\t1;1;1\t1;1;1\tZinc finger protein 507\tZNF507\t>tr|B9EGE7|B9EGE7_HUMAN ZNF507 protein OS=Homo sapiens GN=ZNF507 PE=2 SV=1;>sp|Q8TCN5|ZN507_HUMAN Zinc finger protein 507 OS=Homo sapiens GN=ZNF507 PE=1 SV=2;>sp|Q8TCN5-2|ZN507_HUMAN Isoform 2 of Zinc finger protein 507 OS=Homo sapiens GN=ZNF507\t3\t1\t1\t1\t1\t1\t1\t4.2\t4.2\t4.2\t106.33\t957\t957;953;764\t1\t1\t1\t0.017482\t4.2\t112540\t112540\t\t\t\t6\t61\tTrue\t64\t77\t87\t87\t\t\r\n+Q9Y6D9;C9JJ38\tQ9Y6D9;C9JJ38\t1;1\t1;1\t1;1\tMitotic spindle assembly checkpoint protein'..b'7.887\t589\t589;254\t1\t1\t1\t0.022061\t6.6\t122910\t122910\t\t\t\t45\t18\tTrue\t21\t26\t33\t33\t\t\r\n+Q6PI48\tQ6PI48\t1\t1\t1\tAspartate--tRNA ligase, mitochondrial\tDARS2\t>sp|Q6PI48|SYDM_HUMAN Aspartate--tRNA ligase, mitochondrial OS=Homo sapiens GN=DARS2 PE=1 SV=1\t1\t1\t1\t1\t1\t1\t1\t2.3\t2.3\t2.3\t73.562\t645\t645\t1\t1\t1\t0.016652\t2.3\t45097\t45097\t\t\t\t46\t75\tTrue\t78\t98\t112\t112\t\t\r\n+Q7Z5H3-3\tQ7Z5H3-3\t1\t1\t1\t\t\t>sp|Q7Z5H3-3|RHG22_HUMAN Isoform 3 of Rho GTPase-activating protein 22 OS=Homo sapiens GN=ARHGAP22\t1\t1\t1\t1\t1\t1\t1\t1.2\t1.2\t1.2\t66.61\t608\t608\t1\t1\t1\t0.019637\t1.2\t417420\t417420\t\t\t\t47\t48\tTrue\t51\t62\t70\t70\t9\t1\r\n+Q86Z20;Q86Z20-2\tQ86Z20;Q86Z20-2\t1;1\t1;1\t1;1\tCoiled-coil domain-containing protein 125\tCCDC125\t>sp|Q86Z20|CC125_HUMAN Coiled-coil domain-containing protein 125 OS=Homo sapiens GN=CCDC125 PE=1 SV=2;>sp|Q86Z20-2|CC125_HUMAN Isoform 2 of Coiled-coil domain-containing protein 125 OS=Homo sapiens GN=CCDC125\t2\t1\t1\t1\t1\t1\t1\t2.9\t2.9\t2.9\t58.628\t511\t511;386\t1\t1\t1\t0.0075203\t2.9\t563670\t563670\t\t\t\t48\t27\tTrue\t30\t36\t43\t43\t\t\r\n+Q8IW50;Q8IW50-4;Q8IW50-6;Q8IW50-5;Q8IW50-2;Q8IW50-7;Q8IW50-3\tQ8IW50;Q8IW50-4;Q8IW50-6;Q8IW50-5;Q8IW50-2;Q8IW50-7;Q8IW50-3\t1;1;1;1;1;1;1\t1;1;1;1;1;1;1\t1;1;1;1;1;1;1\tProtein FAM219A\tFAM219A\t>sp|Q8IW50|F219A_HUMAN Protein FAM219A OS=Homo sapiens GN=FAM219A PE=1 SV=3;>sp|Q8IW50-4|F219A_HUMAN Isoform 4 of Protein FAM219A OS=Homo sapiens GN=FAM219A;>sp|Q8IW50-6|F219A_HUMAN Isoform 6 of Protein FAM219A OS=Homo sapiens GN=FAM219A;>sp|Q8IW50-5|F219A\t7\t1\t1\t1\t1\t1\t1\t3.8\t3.8\t3.8\t20.399\t185\t185;173;168;167;157;157;156\t1\t1\t1\t0.00067799\t3.8\t0\t0\t\t\t\t49\t45\tTrue\t48\t59\t67\t67\t\t\r\n+Q8IYF3-2\tQ8IYF3-2\t1\t1\t1\t\t\t>sp|Q8IYF3-2|TEX11_HUMAN Isoform 2 of Testis-expressed sequence 11 protein OS=Homo sapiens GN=TEX11\t1\t1\t1\t1\t1\t1\t1\t4.1\t4.1\t4.1\t71.283\t615\t615\t1\t1\t1\t0.0040995\t4.1\t0\t0\t\t\t\t50\t39\tTrue\t42\t52\t60\t60\t\t\r\n+Q8ND76;Q8ND76-2;Q8ND76-3\tQ8ND76;Q8ND76-2;Q8ND76-3\t1;1;1\t1;1;1\t1;1;1\tCyclin-Y\tCCNY\t>sp|Q8ND76|CCNY_HUMAN Cyclin-Y OS=Homo sapiens GN=CCNY PE=1 SV=2;>sp|Q8ND76-2|CCNY_HUMAN Isoform 2 of Cyclin-Y OS=Homo sapiens GN=CCNY;>sp|Q8ND76-3|CCNY_HUMAN Isoform 3 of Cyclin-Y OS=Homo sapiens GN=CCNY\t3\t1\t1\t1\t1\t1\t1\t3.2\t3.2\t3.2\t39.336\t341\t341;316;287\t1\t1\t1\t0.022359\t3.2\t332840000\t332840000\t\t\t\t51\t67\tTrue\t70\t87\t99\t99\t\t\r\n+Q8WXI9\tQ8WXI9\t1\t1\t1\tTranscriptional repressor p66-beta\tGATAD2B\t>sp|Q8WXI9|P66B_HUMAN Transcriptional repressor p66-beta OS=Homo sapiens GN=GATAD2B PE=1 SV=1\t1\t1\t1\t1\t1\t1\t1\t2\t2\t2\t65.26\t593\t593\t1\t1\t1\t0.01708\t2\t715080\t715080\t\t\t\t52\t65\tTrue\t68\t85\t97\t97\t10;11\t34;40\r\n+Q96DT5;U3KQJ8\tQ96DT5;U3KQJ8\t1;1\t1;1\t1;1\tDynein heavy chain 11, axonemal\tDNAH11\t>sp|Q96DT5|DYH11_HUMAN Dynein heavy chain 11, axonemal OS=Homo sapiens GN=DNAH11 PE=1 SV=3;>tr|U3KQJ8|U3KQJ8_HUMAN Dynein heavy chain 11, axonemal OS=Homo sapiens GN=DNAH11 PE=4 SV=1\t2\t1\t1\t1\t1\t1\t1\t0.2\t0.2\t0.2\t521.04\t4523\t4523;4516\t1\t1\t1\t0.022359\t0.2\t332840000\t332840000\t\t\t\t53\t4\tTrue\t4\t7\t8\t8\t\t\r\n+Q96TA1-2\tQ96TA1-2\t1\t1\t1\t\t\t>sp|Q96TA1-2|NIBL1_HUMAN Isoform 2 of Niban-like protein 1 OS=Homo sapiens GN=FAM129B\t1\t1\t1\t1\t1\t1\t1\t1.4\t1.4\t1.4\t82.682\t733\t733\t1\t1\t1\t0.019411\t1.4\t0\t0\t\t\t\t54\t41\tTrue\t44\t54\t62\t62\t\t\r\n+Q9BVL4\tQ9BVL4\t1\t1\t1\tSelenoprotein O\tSELO\t>sp|Q9BVL4|SELO_HUMAN Selenoprotein O OS=Homo sapiens GN=SELO PE=2 SV=3\t1\t1\t1\t1\t1\t1\t1\t2.2\t2.2\t2.2\t73.506\t669\t669\t1\t1\t1\t0.021901\t2.2\t30381000\t30381000\t\t\t\t55\t22\tTrue\t25\t31\t38\t38\t\t\r\n+Q9H0L4\tQ9H0L4\t1\t1\t1\tCleavage stimulation factor subunit 2 tau variant\tCSTF2T\t>sp|Q9H0L4|CSTFT_HUMAN Cleavage stimulation factor subunit 2 tau variant OS=Homo sapiens GN=CSTF2T PE=1 SV=1\t1\t1\t1\t1\t1\t1\t1\t3.2\t3.2\t3.2\t64.436\t616\t616\t1\t1\t1\t0.022136\t3.2\t309390\t309390\t\t\t\t56\t24\tTrue\t27\t33\t40\t40\t\t\r\n+Q9UJY1\tQ9UJY1\t1\t1\t1\tHeat shock protein beta-8\tHSPB8\t>sp|Q9UJY1|HSPB8_HUMAN Heat shock protein beta-8 OS=Homo sapiens GN=HSPB8 PE=1 SV=1\t1\t1\t1\t1\t1\t1\t1\t7.7\t7.7\t7.7\t21.604\t196\t196\t1\t1\t1\t0.00014865\t7.7\t0\t0\t\t\t\t57\t38\tTrue\t41\t51\t59\t59\t\t\r\n+S4R332\tS4R332\t1\t1\t1\t\t\t>tr|S4R332|S4R332_HUMAN Calcium uniporter protein, mitochondrial OS=Homo sapiens GN=MCU PE=4 SV=1\t1\t1\t1\t1\t1\t1\t1\t13.6\t13.6\t13.6\t9.4376\t88\t88\t1\t1\t1\t0.0074961\t13.6\t944210000\t944210000\t\t\t\t58\t57\tTrue\t60\t73\t81;82;83\t81\t\t\r\n'
b
diff -r 000000000000 -r ba070efb6f78 test-data/proteinGroups_csv_filtered_with_contaminants.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/proteinGroups_csv_filtered_with_contaminants.tabular Tue Jul 03 17:34:13 2018 -0400
b
@@ -0,0 +1,59 @@
+A6NGA9
+P62736
+Q15154
+A8MWY0-3
+Q01638
+P16415
+B9EGE7
+Q9Y6D9
+C9K044
+P02768-1
+Q5VVM6
+E1P506
+Q00536-3
+P47712
+Q6V1P9-4
+Q12866
+F5H5P6
+J3KNF5
+F8W0W6
+Q96JB1
+H0YCF9
+H0YET9
+Q9BTM1-2
+P00738
+H3BS82
+Q6P158
+Q13045
+J3KTH2
+Q96NJ3-2
+M0QY22
+Q6ZN19-3
+O43603
+O75132
+O94889-2
+P05141
+P0C6C1
+P0C841
+P62684
+Q01668-2
+Q13315
+Q14160-3
+Q5JTZ9
+Q5T742
+Q5T7X1
+Q5TAL2
+Q9HD45
+Q6PI48
+Q7Z5H3-3
+Q86Z20
+Q8IW50
+Q8IYF3-2
+Q8ND76
+Q8WXI9
+Q96DT5
+Q96TA1-2
+Q9BVL4
+Q9H0L4
+Q9UJY1
+S4R332
b
diff -r 000000000000 -r ba070efb6f78 test-data/proteinGroups_csv_filtered_without_contaminants.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/proteinGroups_csv_filtered_without_contaminants.tabular Tue Jul 03 17:34:13 2018 -0400
b
@@ -0,0 +1,58 @@
+A6NGA9
+P62736
+Q15154
+A8MWY0-3
+Q01638
+P16415
+B9EGE7
+Q9Y6D9
+C9K044
+Q5VVM6
+E1P506
+Q00536-3
+P47712
+Q6V1P9-4
+Q12866
+F5H5P6
+J3KNF5
+F8W0W6
+Q96JB1
+H0YCF9
+H0YET9
+Q9BTM1-2
+P00738
+H3BS82
+Q6P158
+Q13045
+J3KTH2
+Q96NJ3-2
+M0QY22
+Q6ZN19-3
+O43603
+O75132
+O94889-2
+P05141
+P0C6C1
+P0C841
+P62684
+Q01668-2
+Q13315
+Q14160-3
+Q5JTZ9
+Q5T742
+Q5T7X1
+Q5TAL2
+Q9HD45
+Q6PI48
+Q7Z5H3-3
+Q86Z20
+Q8IW50
+Q8IYF3-2
+Q8ND76
+Q8WXI9
+Q96DT5
+Q96TA1-2
+Q9BVL4
+Q9H0L4
+Q9UJY1
+S4R332
b
diff -r 000000000000 -r ba070efb6f78 write_util.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/write_util.R Tue Jul 03 17:34:13 2018 -0400
b
@@ -0,0 +1,5 @@
+writeout <- function (filename, table) {
+    output_handler <- file(filename, "w")
+    write.table(table, file=output_handler, sep="\t", row.names=FALSE);
+    close(output_handler)
+}