Repository 'difference_finder'
hg clone https://toolshed.g2.bx.psu.edu/repos/jfb/difference_finder

Changeset 0:23eea82f5192 (2019-01-16)
Next changeset 1:110a74dff8e1 (2019-01-16)
Commit message:
Uploaded
added:
all stuff/Commonality and Difference finderMADE 7 TO 7 1-15-2019.R
all stuff/Commonality and Difference finderMADE 7 TO 7.R
all stuff/Difference finderMADE 7 TO 7 1-15-2019.R
all stuff/difference finder for 2 overlaps proper names 7-7_1-15-2019.R
all stuff/differenceFinder.xml
all stuff/test-data/1R1 SBF.csv
all stuff/test-data/1R1 substrates.csv
all stuff/test-data/1R2 SBf.csv
all stuff/test-data/1R2 subs.csv
all stuff/test-data/R1 SBF.csv
all stuff/test-data/R1 substrates.csv
all stuff/test-data/R2 SBf.csv
all stuff/test-data/R2 subs.csv
all stuff/test-data/S1.csv
all stuff/test-data/S2.csv
all stuff/test-data/SBF1.csv
all stuff/test-data/SBF2.csv
b
diff -r 000000000000 -r 23eea82f5192 all stuff/Commonality and Difference finderMADE 7 TO 7 1-15-2019.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/Commonality and Difference finderMADE 7 TO 7 1-15-2019.R Wed Jan 16 13:55:22 2019 -0500
[
b'@@ -0,0 +1,1281 @@\n+#I should make an SOP for this.  Problems we encountered: no x in the xY motif, and the kilodemon\r\n+#the output files have both Y and xY, they shouldn\'t why is that happening?  make it not happen\r\n+#make sure that accession numbers stay locked to each motif, somehow\r\n+#output should look just like the KALIP input\r\n+\r\n+#ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps\r\n+FullMotifsOnly_questionmark<-"NO"\r\n+#If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps\r\n+TruncatedMotifsOnly_questionmark<-"NO"\r\n+#if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps)\r\n+Are_You_Looking_For_Commonality<-"NO"\r\n+\r\n+\r\n+#put the names of your input files here\r\n+FirstSubstrateSet<- read.csv("Galaxy63-BTK_PLUS-R1_Substrates.csv", stringsAsFactors=FALSE)\r\n+Firstsubbackfreq<- read.csv("Galaxy64-BTK_PLUS-R1_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+SecondSubstrateSet<- read.csv("Galaxy65-BTK_PLUS_R2_Substrates.csv", stringsAsFactors=FALSE)\r\n+Secondsubbackfreq<- read.csv("Galaxy66-BTK_PLUS_R2_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+ThirdSubstrateSet<- read.csv("Galaxy69-BTK_PLUS_R3_Substrates.csv", stringsAsFactors=FALSE)\r\n+Thirdsubbackfreq<- read.csv("Galaxy70-BTK_PLUS_R3_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+#then put the names of your output files here\r\n+Shared_motifs_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-substrates.csv"\r\n+Shared_subbackfreq_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-SubBackFreq.csv"\r\n+\r\n+# Shared_motifs_table<-"Shared motifs 7-27-17.csv"\r\n+# Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv"\r\n+\r\n+First_unshared_motifs_table<-"R1 substrates.csv"\r\n+First_unshared_subbackfreq<-"R1 SBF.csv"\r\n+\r\n+Second_unshared_motifs_table<-"R2 subs.csv"\r\n+Second_unshared_subbackfreq<-"R2 SBf.csv"\r\n+\r\n+Third_unshared_motifs_table<-"R3 subs.csv"\r\n+Third_unshared_subbackfreq<-"R3 SBF.csv"\r\n+\r\n+#final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles.  I think I\'ll poke around\r\n+#other languages to see if any of them can do it.\r\n+####################################################################################################################################\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+FirstxY<-rep("xY",times=nrow(FirstSubstrateSet))\r\n+FirstSubstrateSet[,11]<-FirstxY\r\n+\r\n+SecondxY<-rep("xY",times=nrow(SecondSubstrateSet))\r\n+SecondSubstrateSet[,11]<-SecondxY\r\n+\r\n+ThirdxY<-rep("xY",times=nrow(ThirdSubstrateSet))\r\n+ThirdSubstrateSet[,11]<-ThirdxY\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+####################################################################################################################################\r\n+####################################################################################################################################\r\n+# better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two \r\n+# separate proteins thus two separate accession numbers?\r\n+# It should actually output the shared motif and BOTH accession numbers.  Right now it does not, it only maps out the second\r\n+# accession number.  So that needs to be fixed BUT you need to keep the commonality between a motif and its accession number\r\n+####################################################################################################################################\r\n+####################################################################################################################################\r\n+####################################################################################################################################\r\n+####################################################################################################################################\r\n+\r\n+#Create the motif sets, deciding w'..b'ly after I\'ve unduped them\r\n+#   D835YnondupeAccessionNumbers<-D835YnondupeAccessionNumbers[!duplicated(D835YnondupeAccessionNumbers)]\r\n+#   \r\n+#   columnalheader<-c(rep(NA,35))\r\n+#   D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1)\r\n+#   \r\n+#   for (k in 1:length(D835YnondupeAccessionNumbers)) {\r\n+#     #I don\'t remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is\r\n+#     #destroyed immediately after use\r\n+#     for (m in 1:ncol(Secondsubbackfreq)) {\r\n+#       AN <- as.character(Secondsubbackfreq[1, m])\r\n+#       if (grepl(pattern = AN,\r\n+#                 x = D835YnondupeAccessionNumbers[k],\r\n+#                 fixed = TRUE) == TRUE) {\r\n+#         outputmatrix <- as.character(Secondsubbackfreq[, m])\r\n+#         outputmatrix <- matrix(outputmatrix, nrow = 1)\r\n+#         #with that accession number, find a match in the subbackfreq file and save it here\r\n+#         D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix)\r\n+#       }\r\n+#     }\r\n+#   }\r\n+#   \r\n+#   \r\n+# \r\n+#   # FinalFTLmotifs<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)]\r\n+#   # FinalFTLAccessionNumbers<-FTLnondupeAccessionNumbers[!duplicated(FTLnondupeAccessionNumbers)]\r\n+#   # necessaryNAs<-rep(NA,times=(length(FinalFTLmotifs)-length(FinalFTLAccessionNumbers)))\r\n+#   # FinalFTLAccessionNumbers<-c(FinalFTLAccessionNumbers,necessaryNAs)\r\n+#   # TRUEFTLoutputmatrix<-cbind(FinalFTLmotifs,FinalFTLAccessionNumbers)\r\n+#   # TRUEFTLoutputmatrix\r\n+# \r\n+#   write.table(x=FTLwtmotifsFINAL,\r\n+#               file=First_unshared_motifs_table,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))\r\n+#   columnalheader<-matrix(columnalheader,nrow = 1)\r\n+#   write.table(x=columnalheader,\r\n+#               file=First_unshared_subbackfreq,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   write.table(x=FTLFinalMatrix,\r\n+#               file=First_unshared_subbackfreq,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+# \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   write.table(x=D835YmotifsFINAL,\r\n+#               file=Second_unshared_motifs_table,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))\r\n+#   columnalheader<-matrix(columnalheader,nrow = 1)\r\n+#   write.table(x=columnalheader,\r\n+#               file=Second_unshared_subbackfreq,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   write.table(x=D835YFinalMatrix,\r\n+#               file=Second_unshared_subbackfreq,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   write.table(x=ITDmotifsFINAL,\r\n+#               file=Third_unshared_motifs_table,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))\r\n+#   columnalheader<-matrix(columnalheader,nrow = 1)\r\n+#   write.table(x=columnalheader,\r\n+#               file=Third_unshared_subbackfreq,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   write.table(x=ITDFinalMatrix,\r\n+#               file=Third_unshared_subbackfreq,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+# }\r\n'
b
diff -r 000000000000 -r 23eea82f5192 all stuff/Commonality and Difference finderMADE 7 TO 7.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/Commonality and Difference finderMADE 7 TO 7.R Wed Jan 16 13:55:22 2019 -0500
[
b'@@ -0,0 +1,1281 @@\n+#I should make an SOP for this.  Problems we encountered: no x in the xY motif, and the kilodemon\r\n+#the output files have both Y and xY, they shouldn\'t why is that happening?  make it not happen\r\n+#make sure that accession numbers stay locked to each motif, somehow\r\n+#output should look just like the KALIP input\r\n+\r\n+#ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps\r\n+FullMotifsOnly_questionmark<-"NO"\r\n+#If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps\r\n+TruncatedMotifsOnly_questionmark<-"NO"\r\n+#if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps)\r\n+Are_You_Looking_For_Commonality<-"YES"\r\n+\r\n+\r\n+#put the names of your input files here\r\n+FirstSubstrateSet<- read.csv("Galaxy63-BTK_PLUS-R1_Substrates.csv", stringsAsFactors=FALSE)\r\n+Firstsubbackfreq<- read.csv("Galaxy64-BTK_PLUS-R1_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+SecondSubstrateSet<- read.csv("Galaxy65-BTK_PLUS_R2_Substrates.csv", stringsAsFactors=FALSE)\r\n+Secondsubbackfreq<- read.csv("Galaxy66-BTK_PLUS_R2_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+ThirdSubstrateSet<- read.csv("Galaxy69-BTK_PLUS_R3_Substrates.csv", stringsAsFactors=FALSE)\r\n+Thirdsubbackfreq<- read.csv("Galaxy70-BTK_PLUS_R3_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+#then put the names of your output files here\r\n+Shared_motifs_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-substrates.csv"\r\n+Shared_subbackfreq_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-SubBackFreq.csv"\r\n+\r\n+# Shared_motifs_table<-"Shared motifs 7-27-17.csv"\r\n+# Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv"\r\n+\r\n+First_unshared_motifs_table<-"R1 substrates.csv"\r\n+First_unshared_subbackfreq<-"R1 SBF.csv"\r\n+\r\n+Second_unshared_motifs_table<-"R2 subs.csv"\r\n+Second_unshared_subbackfreq<-"R2 SBf.csv"\r\n+\r\n+Third_unshared_motifs_table<-"R3 subs.csv"\r\n+Third_unshared_subbackfreq<-"R3 SBF.csv"\r\n+\r\n+#final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles.  I think I\'ll poke around\r\n+#other languages to see if any of them can do it.\r\n+####################################################################################################################################\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+FirstxY<-rep("xY",times=nrow(FirstSubstrateSet))\r\n+FirstSubstrateSet[,11]<-FirstxY\r\n+\r\n+SecondxY<-rep("xY",times=nrow(SecondSubstrateSet))\r\n+SecondSubstrateSet[,11]<-SecondxY\r\n+\r\n+ThirdxY<-rep("xY",times=nrow(ThirdSubstrateSet))\r\n+ThirdSubstrateSet[,11]<-ThirdxY\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+####################################################################################################################################\r\n+####################################################################################################################################\r\n+# better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two \r\n+# separate proteins thus two separate accession numbers?\r\n+# It should actually output the shared motif and BOTH accession numbers.  Right now it does not, it only maps out the second\r\n+# accession number.  So that needs to be fixed BUT you need to keep the commonality between a motif and its accession number\r\n+####################################################################################################################################\r\n+####################################################################################################################################\r\n+####################################################################################################################################\r\n+####################################################################################################################################\r\n+\r\n+#Create the motif sets, deciding '..b'ly after I\'ve unduped them\r\n+#   D835YnondupeAccessionNumbers<-D835YnondupeAccessionNumbers[!duplicated(D835YnondupeAccessionNumbers)]\r\n+#   \r\n+#   columnalheader<-c(rep(NA,35))\r\n+#   D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1)\r\n+#   \r\n+#   for (k in 1:length(D835YnondupeAccessionNumbers)) {\r\n+#     #I don\'t remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is\r\n+#     #destroyed immediately after use\r\n+#     for (m in 1:ncol(Secondsubbackfreq)) {\r\n+#       AN <- as.character(Secondsubbackfreq[1, m])\r\n+#       if (grepl(pattern = AN,\r\n+#                 x = D835YnondupeAccessionNumbers[k],\r\n+#                 fixed = TRUE) == TRUE) {\r\n+#         outputmatrix <- as.character(Secondsubbackfreq[, m])\r\n+#         outputmatrix <- matrix(outputmatrix, nrow = 1)\r\n+#         #with that accession number, find a match in the subbackfreq file and save it here\r\n+#         D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix)\r\n+#       }\r\n+#     }\r\n+#   }\r\n+#   \r\n+#   \r\n+# \r\n+#   # FinalFTLmotifs<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)]\r\n+#   # FinalFTLAccessionNumbers<-FTLnondupeAccessionNumbers[!duplicated(FTLnondupeAccessionNumbers)]\r\n+#   # necessaryNAs<-rep(NA,times=(length(FinalFTLmotifs)-length(FinalFTLAccessionNumbers)))\r\n+#   # FinalFTLAccessionNumbers<-c(FinalFTLAccessionNumbers,necessaryNAs)\r\n+#   # TRUEFTLoutputmatrix<-cbind(FinalFTLmotifs,FinalFTLAccessionNumbers)\r\n+#   # TRUEFTLoutputmatrix\r\n+# \r\n+#   write.table(x=FTLwtmotifsFINAL,\r\n+#               file=First_unshared_motifs_table,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))\r\n+#   columnalheader<-matrix(columnalheader,nrow = 1)\r\n+#   write.table(x=columnalheader,\r\n+#               file=First_unshared_subbackfreq,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   write.table(x=FTLFinalMatrix,\r\n+#               file=First_unshared_subbackfreq,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+# \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   write.table(x=D835YmotifsFINAL,\r\n+#               file=Second_unshared_motifs_table,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))\r\n+#   columnalheader<-matrix(columnalheader,nrow = 1)\r\n+#   write.table(x=columnalheader,\r\n+#               file=Second_unshared_subbackfreq,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   write.table(x=D835YFinalMatrix,\r\n+#               file=Second_unshared_subbackfreq,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   write.table(x=ITDmotifsFINAL,\r\n+#               file=Third_unshared_motifs_table,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   columnalheader<-c("Accession Numbers",as.character(Thirdsubbackfreq[1:35,1]))\r\n+#   columnalheader<-matrix(columnalheader,nrow = 1)\r\n+#   write.table(x=columnalheader,\r\n+#               file=Third_unshared_subbackfreq,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   write.table(x=ITDFinalMatrix,\r\n+#               file=Third_unshared_subbackfreq,\r\n+#               quote=FALSE, sep=",",\r\n+#               row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+#   \r\n+# }\r\n'
b
diff -r 000000000000 -r 23eea82f5192 all stuff/Difference finderMADE 7 TO 7 1-15-2019.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/Difference finderMADE 7 TO 7 1-15-2019.R Wed Jan 16 13:55:22 2019 -0500
[
b'@@ -0,0 +1,348 @@\n+#I should make an SOP for this.  Problems we encountered: no x in the xY motif, and the kilodemon\r\n+#the output files have both Y and xY, they shouldn\'t why is that happening?  make it not happen\r\n+#make sure that accession numbers stay locked to each motif, somehow\r\n+#output should look just like the KALIP input\r\n+\r\n+#ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps\r\n+FullMotifsOnly_questionmark<-"NO"\r\n+#If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps\r\n+TruncatedMotifsOnly_questionmark<-"NO"\r\n+#if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps)\r\n+Are_You_Looking_For_Commonality<-"NO"\r\n+\r\n+\r\n+#put the names of your input files here\r\n+FirstSubstrateSet<- read.csv("S1.csv", stringsAsFactors=FALSE)\r\n+Firstsubbackfreq<- read.csv("SBF1.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+SecondSubstrateSet<- read.csv("S2.csv", stringsAsFactors=FALSE)\r\n+Secondsubbackfreq<- read.csv("SBF2.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+# ThirdSubstrateSet<- read.csv("Galaxy69-BTK_PLUS_R3_Substrates.csv", stringsAsFactors=FALSE)\r\n+# Thirdsubbackfreq<- read.csv("Galaxy70-BTK_PLUS_R3_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+#then put the names of your output files here\r\n+# Shared_motifs_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-substrates.csv"\r\n+# Shared_subbackfreq_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-SubBackFreq.csv"\r\n+\r\n+# Shared_motifs_table<-"Shared motifs 7-27-17.csv"\r\n+# Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv"\r\n+\r\n+First_unshared_motifs_table<-"1R1 substrates.csv"\r\n+First_unshared_subbackfreq<-"1R1 SBF.csv"\r\n+\r\n+Second_unshared_motifs_table<-"1R2 subs.csv"\r\n+Second_unshared_subbackfreq<-"1R2 SBf.csv"\r\n+\r\n+# Third_unshared_motifs_table<-"R3 subs.csv"\r\n+# Third_unshared_subbackfreq<-"R3 SBF.csv"\r\n+\r\n+#final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles.  I think I\'ll poke around\r\n+#other languages to see if any of them can do it.\r\n+####################################################################################################################################\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+# grepl(pattern = "S", x=asdf, ignore.case = TRUE)\r\n+\r\n+FirstCentralLetters<-FirstSubstrateSet[,11]\r\n+SecondCentralLetters<-SecondSubstrateSet[,11]\r\n+\r\n+FirstEsses<-sapply(FirstCentralLetters, grepl, pattern="S", ignore.case=TRUE)\r\n+FirstTees<-sapply(FirstCentralLetters, grepl, pattern="T", ignore.case=TRUE)\r\n+FirstWys<-sapply(FirstCentralLetters, grepl, pattern="Y", ignore.case=TRUE)\r\n+\r\n+SecondEsses<-sapply(FirstCentralLetters, grepl, pattern="S", ignore.case=TRUE)\r\n+SecondTees<-sapply(FirstCentralLetters, grepl, pattern="T", ignore.case=TRUE)\r\n+SecondWys<-sapply(FirstCentralLetters, grepl, pattern="Y", ignore.case=TRUE)\r\n+\r\n+FirstCentralLetters<-replace(FirstCentralLetters,FirstEsses,"xS")\r\n+FirstCentralLetters<-replace(FirstCentralLetters,FirstTees,"xT")\r\n+FirstCentralLetters<-replace(FirstCentralLetters,FirstWys,"xY")\r\n+\r\n+SecondCentralLetters<-replace(SecondCentralLetters,FirstEsses,"xS")\r\n+SecondCentralLetters<-replace(SecondCentralLetters,FirstTees,"xT")\r\n+SecondCentralLetters<-replace(SecondCentralLetters,FirstWys,"xY")\r\n+\r\n+####################################################################################################################################\r\n+####################################################################################################################################\r\n+# better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two \r\n+# separate proteins thus two separate accession numbers?\r\n+# It should actually output the shared motif and BOTH accession numbers.  Right now it does not, it only maps out the second\r\n+# accession number.  So that needs to be fixed BUT you need to keep the commonality b'..b'names(FTLoutputmatrix2)<-NULL\r\n+  rownames(FTLoutputmatrix2)<-NULL\r\n+  colnames(FLTheader)<-NULL\r\n+  rownames(FLTheader)<-NULL\r\n+  \r\n+  \r\n+  FirstCentralLettersAGAIN<-FTLoutputmatrix2[,11]\r\n+  \r\n+  FirstEsses<-sapply(FirstCentralLettersAGAIN, grepl, pattern="S", ignore.case=TRUE)\r\n+  FirstTees<-sapply(FirstCentralLettersAGAIN, grepl, pattern="T", ignore.case=TRUE)\r\n+  FirstWys<-sapply(FirstCentralLettersAGAIN, grepl, pattern="Y", ignore.case=TRUE)\r\n+  \r\n+  FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstEsses,"xS")\r\n+  FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstTees,"xT")\r\n+  FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstWys,"xY")\r\n+  \r\n+  FirstCentralLettersAGAIN->FTLoutputmatrix2[,11]\r\n+  \r\n+  FTLoutputmatrix2<-rbind(FLTheader,FTLoutputmatrix2)\r\n+  \r\n+  write.table(x=FTLoutputmatrix2,\r\n+              file=First_unshared_motifs_table,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  columnalheader<-c("Accession Numbers",as.character(Firstsubbackfreq[1:35,1]))\r\n+  columnalheader<-matrix(columnalheader,nrow = 1)\r\n+  write.table(x=columnalheader,\r\n+              file=First_unshared_subbackfreq,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  write.table(x=FTLFinalMatrix,\r\n+              file=First_unshared_subbackfreq,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  ############################################################################################################\r\n+  \r\n+  D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2)\r\n+  \r\n+  D835Yheader<-c("Substrate","Species","Reference","-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7","Phosphite")\r\n+  # D835Yheader<-unlist(D835Yheader)\r\n+  lefthandD835<-matrix(data = rep(NA,times=2*nrow(D835Youtputmatrix)),nrow=nrow(D835Youtputmatrix))\r\n+  righthandD835<-matrix(data = rep(NA,times=1*nrow(D835Youtputmatrix)),nrow=nrow(D835Youtputmatrix))\r\n+  D835Yaset<-D835Youtputmatrix[,2]\r\n+  D835meat<-sapply(D835Youtputmatrix[,1], strsplit, "")\r\n+  D835meat<-sapply(D835meat, unlist)\r\n+  colnames(D835meat)<-NULL\r\n+  D835meat<-t(D835meat)\r\n+  \r\n+  D835Youtputmatrix2<-cbind(lefthandD835,D835Yaset,D835meat,righthandD835)\r\n+  colnames(D835Youtputmatrix2)<-NULL\r\n+  rownames(D835Youtputmatrix2)<-NULL\r\n+  colnames(D835Yheader)<-NULL\r\n+  rownames(D835Yheader)<-NULL\r\n+  \r\n+  \r\n+  SecondCentralLettersAGAIN<-D835Youtputmatrix2[,11]\r\n+  \r\n+  SecondEsses<-sapply(SecondCentralLettersAGAIN, grepl, pattern="S", ignore.case=TRUE)\r\n+  SecondTees<-sapply(SecondCentralLettersAGAIN, grepl, pattern="T", ignore.case=TRUE)\r\n+  SecondWys<-sapply(SecondCentralLettersAGAIN, grepl, pattern="Y", ignore.case=TRUE)\r\n+  \r\n+  SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,FirstEsses,"xS")\r\n+  SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,FirstTees,"xT")\r\n+  SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,FirstWys,"xY")\r\n+  \r\n+  SecondCentralLettersAGAIN->D835Youtputmatrix2[,11]\r\n+  \r\n+  D835Youtputmatrix2<-rbind(D835Yheader,D835Youtputmatrix2)\r\n+  \r\n+  write.table(x=D835Youtputmatrix2,\r\n+              file=Second_unshared_motifs_table,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  columnalheader<-c("Accession Numbers",as.character(Firstsubbackfreq[1:35,1]))\r\n+  columnalheader<-matrix(columnalheader,nrow = 1)\r\n+  write.table(x=columnalheader,\r\n+              file=Second_unshared_subbackfreq,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  write.table(x=D835YFinalMatrix,\r\n+              file=Second_unshared_subbackfreq,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+}\r\n+\r\n'
b
diff -r 000000000000 -r 23eea82f5192 all stuff/difference finder for 2 overlaps proper names 7-7_1-15-2019.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/difference finder for 2 overlaps proper names 7-7_1-15-2019.R Wed Jan 16 13:55:22 2019 -0500
[
b'@@ -0,0 +1,547 @@\n+#Difference finder for only 2 \r\n+\r\n+#ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps\r\n+FullMotifsOnly_questionmark<-"NO"\r\n+#If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps\r\n+TruncatedMotifsOnly_questionmark<-"NO"\r\n+\r\n+FirstSubstrateSet<- read.csv("170922-BTK-MINUS-COMBO FILES_Substrates.csv", stringsAsFactors=FALSE)\r\n+Firstsubbackfreq<- read.csv("170922-BTK-MINUS-COMBO FILES_Substrates.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+SecondSubstrateSet<- read.csv("170922-btk-rep OVLP-plus_Substrates.csv", stringsAsFactors=FALSE)\r\n+Secondsubbackfreq<- read.csv("170922-btk-rep OVLP-plus_SubBackFreq.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+First_unshared_motifs_table<-"170922-BTK-MINUS-COMBO FILES_Substrates-unique.csv"\r\n+First_unshared_subbackfreq<-"170922-BTK-MINUS-COMBO FILES_SubBackFreq-unique.csv"\r\n+\r\n+Second_unshared_motifs_table<-"170922-btk-rep OVLP-plus_Substrates-unique.csv"\r\n+Second_unshared_subbackfreq<-"170922-btk-rep OVLP-plus_SubBackFreq-unique.csv"\r\n+\r\n+\r\n+LeftOfYLetters<-7\r\n+RightOfYLetters<-7\r\n+\r\n+if (FullMotifsOnly_questionmark=="YES"){\r\n+  FirstMotifs=rep(NA,times=nrow(FirstSubstrateSet))\r\n+  FirstAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))\r\n+  leftspaces<-c()\r\n+  rightspaces<-c()\r\n+  for (i in 1:nrow(FirstSubstrateSet)){\r\n+    FirstLetters<-FirstSubstrateSet[i,7:15]\r\n+    FirstLetters<-FirstLetters[FirstLetters !="XXXXX"]\r\n+    FirstLetters<-paste(FirstLetters, sep="", collapse="")\r\n+    \r\n+    \r\n+    YYYmotif <- unlist(strsplit(FirstLetters, split = ""))\r\n+    YYYposition <- match(x = "x", table = YYYmotif)\r\n+    #position itself tells me how much is to the left of that X by what it\'s number is.  x at position 4 tells me that there are\r\n+    #just 3 letters to the left of x\r\n+    \r\n+    YYYLettersToTheLeft <- YYYposition - 1\r\n+    #how many letters to the right SHOULD just be length(motif)-position-1 if it\'s 5 long and x is at 3 then Y is at 4 and there is\r\n+    #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1\r\n+    YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1\r\n+    #then sanity check, we\'re currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the\r\n+    #variable the user puts in is\r\n+    \r\n+    if (YYYLettersToTheLeft > 3 && YYYLettersToTheRight > 3) {\r\n+      motif<-YYYmotif\r\n+      #save that motif, which is the Y and +/- 4 amino acids, including truncation\r\n+      motif<-motif[!motif %in% "x"]\r\n+      motif<-paste(motif, sep="", collapse="")\r\n+      FirstLetters<-motif\r\n+      FirstMotifs[i]<-FirstLetters\r\n+      FirstAccessionNumbers[i]<-FirstSubstrateSet[i,3]\r\n+    }\r\n+    \r\n+  }\r\n+  # FirstMotifs <- FirstMotifs[!is.na(FirstMotifs)]\r\n+  # FirstMotifs<-matrix(FirstMotifs,ncol = 1)\r\n+  # \r\n+  \r\n+  SecondMotifs=rep(NA,times=nrow(FirstSubstrateSet))\r\n+  SecondAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))\r\n+  \r\n+  for (i in 1:nrow(SecondSubstrateSet)){\r\n+    SecondLetters<-SecondSubstrateSet[i,7:15]\r\n+    SecondLetters<-SecondLetters[SecondLetters !="XXXXX"]\r\n+    SecondLetters<-paste(SecondLetters, sep="", collapse="")\r\n+    \r\n+    \r\n+    YYYmotif <- unlist(strsplit(SecondLetters, split = ""))\r\n+    YYYposition <- match(x = "x", table = YYYmotif)\r\n+    #position itself tells me how much is to the left of that X by what it\'s number is.  x at position 4 tells me that there are\r\n+    #just 3 letters to the left of x\r\n+    \r\n+    YYYLettersToTheLeft <- YYYposition - 1\r\n+    #how many letters to the right SHOULD just be length(motif)-position-1 if it\'s 5 long and x is at 3 then Y is at 4 and there is\r\n+    #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1\r\n+    YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1\r\n+    #then sanity check, we\'re currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the\r\n+    #variable the user puts in is\r\n+    \r\n+    if (YYYLettersToTheLeft'..b'        outputmatrix <- as.character(Thirdsubbackfreq[, m])\r\n+        outputmatrix <- matrix(outputmatrix, nrow = 1)\r\n+        #with that accession number, find a match in the subbackfreq file and save it here\r\n+        ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix)\r\n+      }\r\n+    }\r\n+  }\r\n+  ITDFinalMatrix<-ITDFinalMatrix[!duplicated(ITDFinalMatrix),]\r\n+}\r\n+\r\n+columnalheader<-c(rep(NA,36))\r\n+SecondFinalMatrix<-matrix(data =columnalheader,nrow = 1)\r\n+\r\n+for (k in 1:length(SecondMotifsFINAL)) {\r\n+  #I don\'t remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is\r\n+  #destroyed immediately after use\r\n+  for (m in 1:ncol(Secondsubbackfreq)) {\r\n+    AN <- as.character(Secondsubbackfreq[1, m])\r\n+    if (grepl(pattern = AN,\r\n+              x = names(SecondMotifsFINAL[k]),\r\n+              fixed = TRUE) == TRUE) {\r\n+      outputmatrix <- as.character(Secondsubbackfreq[, m])\r\n+      outputmatrix <- matrix(outputmatrix, nrow = 1)\r\n+      #with that accession number, find a match in the subbackfreq file and save it here\r\n+      SecondFinalMatrix<-rbind(SecondFinalMatrix,outputmatrix)\r\n+    }\r\n+  }\r\n+}\r\n+SecondFinalMatrix<-SecondFinalMatrix[!duplicated(SecondFinalMatrix),]\r\n+FTLoutputmatrix<-matrix(data=c(FirstMotifsFINAL,names(FirstMotifsFINAL)),ncol = 2)\r\n+\r\n+\r\n+write.table(x=FTLoutputmatrix,\r\n+            file=First_unshared_motifs_table,\r\n+            quote=FALSE, sep=",",\r\n+            row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+\r\n+columnalheader<-c(as.character(Firstsubbackfreq[1:36,1]))\r\n+columnalheader<-matrix(columnalheader,nrow = 1)\r\n+write.table(x=columnalheader,\r\n+            file=First_unshared_subbackfreq,\r\n+            quote=FALSE, sep=",",\r\n+            row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+FirstFinalMatrix<-FirstFinalMatrix[2:nrow(FirstFinalMatrix),]\r\n+write.table(x=FirstFinalMatrix,\r\n+            file=First_unshared_subbackfreq,\r\n+            quote=FALSE, sep=",",\r\n+            row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+\r\n+############################################################################################################\r\n+\r\n+D835Youtputmatrix<-matrix(data=c(SecondMotifsFINAL,names(SecondMotifsFINAL)),ncol = 2)\r\n+\r\n+write.table(x=D835Youtputmatrix,\r\n+            file=Second_unshared_motifs_table,\r\n+            quote=FALSE, sep=",",\r\n+            row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+\r\n+columnalheader<-c(as.character(Firstsubbackfreq[1:36,1]))\r\n+columnalheader<-matrix(columnalheader,nrow = 1)\r\n+write.table(x=columnalheader,\r\n+            file=Second_unshared_subbackfreq,\r\n+            quote=FALSE, sep=",",\r\n+            row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+SecondFinalMatrix<-SecondFinalMatrix[2:nrow(SecondFinalMatrix),]  \r\n+write.table(x=SecondFinalMatrix,\r\n+            file=Second_unshared_subbackfreq,\r\n+            quote=FALSE, sep=",",\r\n+            row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+\r\n+############################################################################################################\r\n+\r\n+# ITDoutputmatrix<-matrix(data = c(ITDmotifsFINAL,names(ITDmotifsFINAL)),ncol = 2)\r\n+# \r\n+# write.table(x=ITDoutputmatrix,\r\n+#             file=Third_unshared_motifs_table,\r\n+#             quote=FALSE, sep=",",\r\n+#             row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+# \r\n+# columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))\r\n+# columnalheader<-matrix(columnalheader,nrow = 1)\r\n+# write.table(x=columnalheader,\r\n+#             file=Third_unshared_subbackfreq,\r\n+#             quote=FALSE, sep=",",\r\n+#             row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+# ITDFinalMatrix<-ITDFinalMatrix[2:nrow(ITDFinalMatrix),]\r\n+# write.table(x=ITDFinalMatrix,\r\n+#             file=Third_unshared_subbackfreq,\r\n+#             quote=FALSE, sep=",",\r\n+#             row.names=FALSE,col.names = FALSE, na="", append=TRUE)\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 23eea82f5192 all stuff/differenceFinder.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/differenceFinder.xml Wed Jan 16 13:55:22 2019 -0500
[
@@ -0,0 +1,49 @@
+<tool id="commonality_r" name="difference_finder" version="1.0.0">
+    <description>finds differences</description>
+    <requirements>
+       <requirement type="package">R</requirement>
+    </requirements>
+    <command><![CDATA[
+ ln -s '$input1' S1.csv &&
+ ln -s '$input2' SBF1.csv &&
+ ln -s '$input3' S2.csv &&
+ ln -s '$input4' SBF2.csv &&
+
+ Rscript '$__tool_directory__/Difference finderMADE 7 TO 7 1-15-2019.R'
+    ]]></command>
+    <inputs>
+ <param format="csv" name="input1" type="data" label="First Substrate Set"/>
+        <param format="csv" name="input2" type="data" label="First Substrate Background Frequency"/>
+ <param format="csv" name="input3" type="data" label="Second Substrate Set"/>
+ <param format="csv" name="input4" type="data" label="Second Substrate Background Frequency"/>
+ <param name="outGroup" type="text" value="kinase" label="Kinase Name"/>
+    </inputs>      
+    <outputs>
+        <data format="csv" name="substrates1" from_work_dir="1R1 substrates.csv" label="${outGroup}_First_Unshared_Substrates.csv"/>
+ <data format="csv" name="SBF1" from_work_dir="1R1 SBF.csv" label="${outGroup}_First_Unshared_SBF.csv"/>
+
+  <data format="csv" name="substrates2" from_work_dir="1R2 subs.csv" label="${outGroup}_Second_Unshared_Substrates.csv"/>
+ <data format="csv" name="SBF2" from_work_dir="1R2 SBf.csv" label="${outGroup}_Second_Unshared_SubstratesSBF.csv"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input1.csv" ftype="csv" value="input1.csv"/>
+            <param name="positives" ftype="csv" value="substrates.csv"/>
+            <param name="SBF" ftype="csv" value="SBF.csv"/>
+ <output name="Negatives" file="negatives.csv"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+ This tool takes two Kinamine outputs (substrates and substrate background frequency files) and finds the symmetrical differences between those sets
+
+ Let us call the two INPUT substrate files 'Substrates A' and 'Substrates B'.  Let us call the two OUTPUT substrate files 'Substrates Y' and 'Substrates Z'.  If we treat each file as a SET, then...
+
+ 'Substrates Y' consists of all 'Substrates A' that are not in 'Substrates B'.  'Substrates Z' consists of all 'Substrates B' that are not in 'Substrates A'.  
+
+ The associated Substrate Background Frequency files for each consists of all the proteins associated with that substrate file.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1074/mcp.RA118.001111</citation>
+    </citations>
+</tool>
+
b
diff -r 000000000000 -r 23eea82f5192 all stuff/test-data/1R1 SBF.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/test-data/1R1 SBF.csv Wed Jan 16 13:55:22 2019 -0500
b
b'@@ -0,0 +1,88 @@\n+Accession Numbers,Amino Acids,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,Properties,Hydrophobic,Polar,Small,Negative,Postive,Amide,Large Aliphatic,Small Aliphatic,Aromatic,Hydroxy,X,Number of ST,Number of pST\r\n+sp|P19338|NUCL_HUMAN,10.140845070422536,0.14084507042253522,10.28169014084507,14.507042253521126,3.6619718309859155,10.985915492957748,0.14084507042253522,2.3943661971830985,12.676056338028168,3.943661971830986,1.1267605633802817,2.9577464788732395,4.366197183098591,1.971830985915493,3.6619718309859155,5.070422535211268,5.352112676056338,5.211267605633803,0.28169014084507044,1.1267605633802817,,39.014084507042256,46.197183098591545,54.50704225352113,24.788732394366196,16.47887323943662,4.929577464788732,6.338028169014084,26.338028169014088,5.070422535211268,10.422535211267606,,74,13,710\r\n+sp|P09651|ROA1_HUMAN,2.956989247311828,0.5376344086021506,4.838709677419355,4.838709677419355,6.182795698924731,25.0,2.1505376344086025,2.1505376344086025,4.838709677419355,2.1505376344086025,1.6129032258064515,5.64516129032258,2.6881720430107525,3.225806451612903,6.720430107526881,11.29032258064516,3.225806451612903,4.56989247311828,0.2688172043010753,5.10752688172043,,50.537634408602145,32.25806451612903,60.752688172043015,9.67741935483871,13.70967741935484,8.870967741935484,4.301075268817205,32.52688172043011,11.559139784946236,14.516129032258064,,54,6,372\r\n+sp|Q32P51|RA1L2_HUMAN,3.4375000000000004,0.9375,5.625,5.3125,6.875000000000001,19.6875,2.5,2.5,5.9375,2.5,1.875,5.3125,3.125,3.4375000000000004,6.875000000000001,10.0,4.375,5.625,0.3125,3.75,,47.5,35.0,58.125,10.9375,15.3125,8.75,5.0,28.75,10.9375,14.375,,46,4,320\r\n+sp|P22626|ROA2_HUMAN,3.1161473087818696,0.28328611898017,4.815864022662889,6.232294617563739,5.382436260623229,24.362606232294617,1.9830028328611897,2.5495750708215295,5.382436260623229,2.8328611898017,1.9830028328611897,5.382436260623229,4.2492917847025495,2.8328611898017,7.0821529745042495,7.0821529745042495,3.39943342776204,4.53257790368272,0.28328611898017,6.232294617563739,,51.55807365439094,33.711048158640224,57.22379603399433,11.048158640226628,14.44759206798867,8.215297450424929,5.38243626062323,32.01133144475921,11.898016997167138,10.48158640226629,,37,9,353\r\n+sp|P60709|ACTB_HUMAN,7.733333333333333,1.6,6.133333333333333,6.933333333333333,3.4666666666666663,7.466666666666668,2.4,7.466666666666668,5.066666666666666,7.199999999999999,4.533333333333333,2.4,5.066666666666666,3.2,4.8,6.666666666666667,6.933333333333333,5.866666666666666,1.0666666666666667,4.0,,50.400000000000006,30.93333333333333,49.86666666666666,13.066666666666666,12.266666666666666,5.6,14.666666666666668,21.066666666666666,8.533333333333333,13.6,,51,15,375\r\n+sp|P63261|ACTG_HUMAN,7.733333333333333,1.6,5.333333333333334,7.733333333333333,3.4666666666666663,7.466666666666668,2.4,7.733333333333333,5.066666666666666,7.199999999999999,4.533333333333333,2.4,5.066666666666666,3.2,4.8,6.666666666666667,6.933333333333333,5.6000000000000005,1.0666666666666667,4.0,,50.400000000000006,30.93333333333333,48.8,13.066666666666666,12.266666666666666,5.6,14.933333333333334,20.8,8.533333333333333,13.6,,51,15,375\r\n+sp|P07195|LDHB_HUMAN,6.287425149700598,1.4970059880239521,5.688622754491018,6.287425149700598,1.4970059880239521,6.88622754491018,2.095808383233533,7.18562874251497,7.784431137724551,10.778443113772456,2.9940119760479043,5.089820359281437,3.293413173652695,3.293413173652695,2.3952095808383236,7.784431137724551,3.8922155688622757,11.377245508982035,1.7964071856287425,2.095808383233533,,52.395209580838326,32.634730538922156,51.796407185628745,11.976047904191617,12.275449101796408,8.383233532934131,17.964071856287426,24.550898203592816,5.389221556886227,11.676646706586826,,39,2,334\r\n+sp|O75746|CMC1_HUMAN,9.587020648967552,1.0324483775811208,4.277286135693215,5.3097345132743365,6.1946902654867255,8.259587020648967,1.9174041297935103,5.752212389380531,5.014749262536873,10.176991150442479,1.4749262536873156,3.687315634218289,5.014749262536873,4'..b'968,,41.26984126984127,37.3015873015873,50.0,7.936507936507937,24.6031746031746,4.761904761904762,9.523809523809524,23.809523809523807,5.555555555555555,16.666666666666664,,21,2,126\r\n+sp|Q99880|H2B1L_HUMAN,10.317460317460316,0.0,2.380952380952381,5.555555555555555,1.5873015873015872,4.761904761904762,2.380952380952381,4.761904761904762,15.873015873015872,5.555555555555555,2.380952380952381,2.380952380952381,3.968253968253968,2.380952380952381,6.349206349206349,11.904761904761903,6.349206349206349,7.142857142857142,0.0,3.968253968253968,,40.476190476190474,37.3015873015873,49.2063492063492,7.936507936507937,24.6031746031746,4.761904761904762,10.317460317460316,22.22222222222222,5.555555555555555,18.253968253968253,,23,2,126\r\n+sp|P58876|H2B1D_HUMAN,9.523809523809524,0.0,2.380952380952381,5.555555555555555,1.5873015873015872,5.555555555555555,2.380952380952381,4.761904761904762,15.873015873015872,4.761904761904762,2.380952380952381,2.380952380952381,4.761904761904762,2.380952380952381,6.349206349206349,11.11111111111111,7.142857142857142,7.142857142857142,0.0,3.968253968253968,,39.68253968253968,37.3015873015873,50.0,7.936507936507937,24.6031746031746,4.761904761904762,9.523809523809524,22.22222222222222,5.555555555555555,18.253968253968253,,23,2,126\r\n+sp|Q93079|H2B1H_HUMAN,10.317460317460316,0.0,3.1746031746031744,4.761904761904762,1.5873015873015872,5.555555555555555,2.380952380952381,4.761904761904762,15.873015873015872,4.761904761904762,2.380952380952381,2.380952380952381,4.761904761904762,2.380952380952381,6.349206349206349,11.11111111111111,6.349206349206349,7.142857142857142,0.0,3.968253968253968,,40.476190476190474,37.3015873015873,50.793650793650784,7.936507936507937,24.6031746031746,4.761904761904762,9.523809523809524,23.015873015873012,5.555555555555555,17.46031746031746,,22,2,126\r\n+sp|Q5QNW6|H2B2F_HUMAN,9.523809523809524,0.0,3.1746031746031744,4.761904761904762,1.5873015873015872,5.555555555555555,2.380952380952381,4.761904761904762,15.873015873015872,4.761904761904762,2.380952380952381,2.380952380952381,4.761904761904762,2.380952380952381,6.349206349206349,11.11111111111111,6.349206349206349,7.936507936507936,0.0,3.968253968253968,,40.476190476190474,37.3015873015873,50.79365079365079,7.936507936507937,24.6031746031746,4.761904761904762,9.523809523809524,23.015873015873016,5.555555555555555,17.46031746031746,,22,2,126\r\n+sp|Q99877|H2B1N_HUMAN,9.523809523809524,0.0,2.380952380952381,5.555555555555555,1.5873015873015872,5.555555555555555,2.380952380952381,4.761904761904762,15.873015873015872,4.761904761904762,2.380952380952381,2.380952380952381,4.761904761904762,2.380952380952381,6.349206349206349,11.904761904761903,6.349206349206349,7.142857142857142,0.0,3.968253968253968,,39.68253968253968,37.3015873015873,50.0,7.936507936507937,24.6031746031746,4.761904761904762,9.523809523809524,22.22222222222222,5.555555555555555,18.253968253968253,,23,2,126\r\n+sp|P57053|H2BFS_HUMAN,10.317460317460316,0.0,2.380952380952381,5.555555555555555,1.5873015873015872,5.555555555555555,2.380952380952381,4.761904761904762,15.079365079365079,4.761904761904762,2.380952380952381,2.380952380952381,5.555555555555555,2.380952380952381,7.142857142857142,10.317460317460316,6.349206349206349,7.142857142857142,0.0,3.968253968253968,,40.476190476190474,37.301587301587304,50.0,7.936507936507937,24.6031746031746,4.761904761904762,9.523809523809524,23.015873015873012,5.555555555555555,16.666666666666664,,21,2,126\r\n+sp|Q99879|H2B1M_HUMAN,8.73015873015873,0.0,2.380952380952381,5.555555555555555,1.5873015873015872,5.555555555555555,2.380952380952381,5.555555555555555,15.873015873015872,4.761904761904762,2.380952380952381,3.1746031746031744,4.761904761904762,2.380952380952381,6.349206349206349,11.11111111111111,5.555555555555555,7.936507936507936,0.0,3.968253968253968,,40.476190476190474,38.095238095238095,49.2063492063492,7.936507936507937,24.6031746031746,5.555555555555555,10.317460317460316,22.22222222222222,5.555555555555555,16.666666666666664,,21,2,126\r\n'
b
diff -r 000000000000 -r 23eea82f5192 all stuff/test-data/1R1 substrates.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/test-data/1R1 substrates.csv Wed Jan 16 13:55:22 2019 -0500
b
b'@@ -0,0 +1,293 @@\n+Substrate,Species,Reference,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,Phosphite\r\n+,,sp|Q13283|G3BP1_HUMAN, , , , , , , ,xS,S,S,P,A,P,A,D,\r\n+,,sp|Q08043|ACTN3_HUMAN; sp|P35609|ACTN2_HUMAN; sp|P12814|ACTN1_HUMAN; sp|O43707|ACTN4_HUMAN, , , , , ,T,F,xT,A,W,C,N,S,H,L,\r\n+,,sp|P19338|NUCL_HUMAN,D,E,E,E,D,D,D,xS,E,E,D,E,E,D,D,\r\n+,,sp|Q15738|NSDHL_HUMAN, , ,V,A,L,A,G,xT,F,H,Y,Y,S,C,E,\r\n+,,sp|Q9Y371|SHLB1_HUMAN, ,L,A,A,D,A,G,xT,F,L,S,R, , , ,\r\n+,,sp|P09651|ROA1_HUMAN; sp|Q32P51|RA1L2_HUMAN,S,H,F,E,Q,W,G,xT,L,T,D,C,V,V,M,\r\n+,,sp|P54136|SYRC_HUMAN, , , , , , , ,xS,T,I,I,G,E,S,I,\r\n+,,sp|P22626|ROA2_HUMAN, , ,G,F,G,F,V,xT,F,S,S,M,A,E,V,\r\n+,,sp|Q8NBS9|TXND5_HUMAN, , , , ,G,Y,P,xT,L,L,L,F,R, , ,\r\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN, ,T,V,L,S,G,G,xT,T,M,Y,P,G,I,A,\r\n+,,sp|Q96JH7|VCIP1_HUMAN, , , , , ,S,S,xT,T,F,F,E,L,Q,E,\r\n+,,sp|Q13283|G3BP1_HUMAN, , , , , , ,S,xS,S,P,A,P,A,D,I,\r\n+,,sp|P07195|LDHB_HUMAN, , , , , ,G,L,xT,S,V,I,N,Q,K, ,\r\n+,,sp|Q15007|FL2D_HUMAN, , , , , , ,S,xT,M,V,D,P,A,I,N,\r\n+,,sp|O14776|TCRG1_HUMAN, , , , , , ,A,xT,F,S,E,F,A,A,K,\r\n+,,sp|O15294|OGT1_HUMAN,L,C,P,T,H,A,D,xS,L,N,N,L,A,N,I,\r\n+,,sp|Q9UKX7|NUP50_HUMAN, , , , ,G,I,G,xT,L,H,L,K,P,T,A,\r\n+,,sp|O43312|MTSS1_HUMAN, , , ,R,P,A,S,xT,A,G,L,P,T,T,L,\r\n+,,sp|P22314|UBA1_HUMAN, , , , , ,L,Q,xT,S,S,V,L,V,S,G,\r\n+,,sp|O75746|CMC1_HUMAN, , , , , ,L,A,xT,A,T,F,A,G,I,E,\r\n+,,sp|Q15005|SPCS2_HUMAN, ,F,F,D,H,S,G,xT,L,V,M,D,A,Y,E,\r\n+,,sp|P62995|TRA2B_HUMAN, , , , ,R,P,H,xT,P,T,P,G,I,Y, ,\r\n+,,sp|Q13596|SNX1_HUMAN, , ,A,V,G,T,Q,xT,L,S,G,A,G,L,L,\r\n+,,sp|P42166|LAP2A_HUMAN,S,D,E,E,R,E,P,xT,P,V,L,G,S,G,A,\r\n+,,sp|P22626|ROA2_HUMAN, , , , , , ,V,xT,F,S,S,M,A,E,V,\r\n+,,sp|P11142|HSP7C_HUMAN; sp|P0DMV9|HS71B_HUMAN; sp|P0DMV8|HS71A_HUMAN, , , , , , ,I,xT,I,T,N,D,K, , ,\r\n+,,sp|P25789|PSA4_HUMAN, , , , , , ,T,xT,I,F,S,P,E,G,R,\r\n+,,sp|Q9Y277|VDAC3_HUMAN, , , ,V,N,N,A,xS,L,I,G,L,G,Y,T,\r\n+,,sp|O75521|ECI2_HUMAN,P,F,S,H,L,G,Q,xS,P,E,G,C,S,S,Y,\r\n+,,sp|P04075|ALDOA_HUMAN, , , , , ,A,L,xT,F,S,Y,G,R, , ,\r\n+,,sp|Q14498|RBM39_HUMAN, , , , , ,D,K,xS,P,V,R,E,P,I,D,\r\n+,,sp|P42166|LAP2A_HUMAN, ,G,P,P,D,F,S,xS,D,E,E,R,E,P,T,\r\n+,,sp|Q9UPN3|MACF1_HUMAN, , , , ,I,S,G,xT,F,S,S,G,W,T,V,\r\n+,,sp|P12955|PEPD_HUMAN, , , , , , , ,xS,T,L,F,V,P,R, ,\r\n+,,sp|P49321|NASP_HUMAN, ,P,E,V,N,G,G,xS,G,D,A,V,P,S,G,\r\n+,,sp|Q9ULW0|TPX2_HUMAN, , , , , ,G,C,xT,I,V,K,P,F,N,L,\r\n+,,sp|Q13177|PAK2_HUMAN, , ,S,T,M,V,G,xT,P,Y,W,M,A,P,E,\r\n+,,sp|P62987|RL40_HUMAN; sp|P62979|RS27A_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG47|UBB_HUMAN; sp|P0CG47|UBB_HUMAN; sp|P0CG47|UBB_HUMAN; tr|A0A2R8Y422|A0A2R8Y422_HUMAN, , , , , , ,E,xS,T,L,H,L,V,L,R,\r\n+,,sp|P17844|DDX5_HUMAN, , , , , , ,T,xT,Y,L,V,L,D,E,A,\r\n+,,sp|P12956|XRCC6_HUMAN, , , ,T,F,N,T,xS,T,G,G,L,L,L,P,\r\n+,,sp|O15294|OGT1_HUMAN, , , , ,L,C,P,xT,H,A,D,S,L,N,N,\r\n+,,sp|P09651|ROA1_HUMAN; sp|Q32P51|RA1L2_HUMAN,K,L,F,I,G,G,L,xS,F,E,T,T,D,E,S,\r\n+,,sp|Q9UGU5|HMGX4_HUMAN, , , , , , , ,xS,P,P,T,T,M,L,L,\r\n+,,sp|Q15181|IPYR_HUMAN, ,G,I,S,C,M,N,xT,T,L,S,E,S,P,F,\r\n+,,sp|Q9NUL3|STAU2_HUMAN, , , , , ,V,I,xS,G,T,T,L,G,Y,L,\r\n+,,sp|P24752|THIL_HUMAN, , , ,T,P,I,G,xS,F,L,G,S,L,S,L,\r\n+,,sp|P60900|PSA6_HUMAN, , , , , ,H,I,xT,I,F,S,P,E,G,R,\r\n+,,sp|P63010|AP2B1_HUMAN; sp|Q10567|AP1B1_HUMAN, , ,L,S,H,A,N,xS,A,V,V,L,S,A,V,\r\n+,,sp|Q99832|TCPH_HUMAN,G,D,G,T,T,S,V,xT,L,L,A,A,E,F,L,\r\n+,,sp|P53396|ACLY_HUMAN, , , , , ,G,S,xT,F,M,D,H,V,L,R,\r\n+,,sp|A0FGR8|ESYT2_HUMAN,G,A,N,T,H,L,S,xT,F,S,F,T,K, , ,\r\n+,,sp|P49407|ARRB1_HUMAN,A,D,D,T,V,A,P,xS,S,T,F,C,K, , ,\r\n+,,sp|Q15007|FL2D_HUMAN, , , , , , , ,xS,T,M,V,D,P,A,I,\r\n+,,sp|Q96JH7|VCIP1_HUMAN, , , , ,S,S,T,xT,F,F,E,L,Q,E,S,\r\n+,,sp|Q99832|TCPH_HUMAN,A,E,V,G,D,G,T,xT,S,V,T,L,L,A,A,\r\n+,,sp|Q96I25|SPF45_HUMAN, , , , , , , ,xS,P,T,G,P,S,N,S,\r\n+,,sp|O60506|HNRPQ_HUMAN, , ,G,Y,A,F,V,xT,F,C,T,K, , , ,\r\n+,,sp|O00148|DX39A_HUMAN; sp|Q13838|DX39B_HUMAN, , , , , , ,L,xT,L,H,G,L,Q,Q,Y,\r\n+,,sp|Q99832|TCPH_HUMAN,E,V,G,D,G'..b',K, , , ,\r\n+,,sp|P14314|GLU2B_HUMAN, , ,L,G,G,S,P,xT,S,L,G,T,W,G,S,\r\n+,,sp|P01911|2B1F_HUMAN; sp|Q30154|DRB5_HUMAN,V,E,H,P,S,V,T,xS,P,L,T,V,E,W,R,\r\n+,,sp|P52272|HNRPM_HUMAN,F,G,G,S,F,A,G,xS,F,G,G,A,G,G,H,\r\n+,,tr|A0A182DWH4|A0A182DWH4_HUMAN,V,E,H,P,S,V,T,xS,P,L,T,V,E,W,S,\r\n+,,sp|O75521|ECI2_HUMAN,A,T,F,H,T,P,F,xS,H,L,G,Q,S,P,E,\r\n+,,sp|Q9BTE3|MCMBP_HUMAN, , , , , ,N,S,xT,F,T,E,H,L,Y,R,\r\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN, , , ,A,V,F,P,xS,I,V,G,R, , , ,\r\n+,,sp|Q86VP6|CAND1_HUMAN, , , , , ,L,G,xT,L,S,A,L,D,I,L,\r\n+,,sp|Q08211|DHX9_HUMAN, , , , , ,G,M,xT,L,V,T,P,L,Q,L,\r\n+,,sp|Q9Y3I0|RTCB_HUMAN, , , , ,S,S,M,xT,F,L,T,R, , , ,\r\n+,,sp|Q9BRA2|TXD17_HUMAN, , , , , , , ,xT,I,F,A,Y,F,T,G,\r\n+,,sp|P12956|XRCC6_HUMAN, , , , ,T,F,N,xT,S,T,G,G,L,L,L,\r\n+,,sp|P43686|PRS6B_HUMAN, , , , ,L,I,F,xS,T,I,T,S,K, , ,\r\n+,,sp|P54819|KAD2_HUMAN,Q,A,Y,H,T,Q,T,xT,P,L,I,E,Y,Y,R,\r\n+,,sp|P27797|CALR_HUMAN, , , , , ,G,Q,xT,L,V,V,Q,F,T,V,\r\n+,,sp|Q14697|GANAB_HUMAN, , , , , , ,L,xS,F,Q,H,D,P,E,T,\r\n+,,sp|O60508|PRP17_HUMAN, , , , ,S,P,S,xS,K,P,S,L,A,V,A,\r\n+,,sp|P51991|ROA3_HUMAN, , , ,W,G,T,L,xT,D,C,V,V,M,R,D,\r\n+,,sp|P55884|EIF3B_HUMAN, , , , ,I,S,V,xS,F,Y,H,V,K, , ,\r\n+,,sp|Q99575|POP1_HUMAN, , , ,Y,I,T,A,xS,T,F,A,Q,A,R, ,\r\n+,,sp|P15880|RS2_HUMAN, , , , , , ,G,xT,G,I,V,S,A,P,V,\r\n+,,sp|P49588|SYAC_HUMAN, , , , , ,I,G,xT,I,Y,G,D,L,K, ,\r\n+,,sp|Q7Z5L9|I2BP2_HUMAN,R,P,A,S,V,S,S,xS,A,A,V,E,H,E,Q,\r\n+,,sp|P25789|PSA4_HUMAN, , , , , , , ,xT,T,I,F,S,P,E,G,\r\n+,,sp|Q9UIA9|XPO7_HUMAN, , , , , , ,T,xT,F,Y,T,A,L,G,R,\r\n+,,sp|P07437|TBB5_HUMAN; sp|Q13509|TBB3_HUMAN; tr|A0A0B4J269|A0A0B4J269_HUMAN,G,T,G,S,G,M,G,xT,L,L,I,S,K, , ,\r\n+,,sp|O15042|SR140_HUMAN, , , , , ,L,Y,xS,I,L,Q,G,D,S,P,\r\n+,,sp|Q9UKM9|RALY_HUMAN, , , , ,G,R,L,xS,P,V,P,V,P,R, ,\r\n+,,sp|Q13586|STIM1_HUMAN, , ,N,T,L,F,G,xT,F,H,V,A,H,S,S,\r\n+,,sp|Q96JH7|VCIP1_HUMAN, , , , , , , ,xS,S,T,T,F,F,E,L,\r\n+,,sp|Q7Z5L9|I2BP2_HUMAN, , ,R,P,A,S,V,xS,S,S,A,A,V,E,H,\r\n+,,sp|Q8WUK0|PTPM1_HUMAN, , , , , ,D,G,xT,F,V,I,S,K, , ,\r\n+,,sp|P15498|VAV_HUMAN, , , , ,S,D,G,xT,F,L,V,R, , , ,\r\n+,,sp|P09651|ROA1_HUMAN; tr|A0A2R8Y4L2|A0A2R8Y4L2_HUMAN, , , , , ,Y,H,xT,V,N,G,H,N,C,E,\r\n+,,sp|Q14C86|GAPD1_HUMAN,L,S,V,V,S,G,I,xS,A,T,S,E,D,I,P,\r\n+,,sp|Q9H7N4|SFR19_HUMAN, , , ,Q,R,S,P,xS,P,A,P,A,P,A,P,\r\n+,,sp|Q9HCC0|MCCB_HUMAN, , , , ,K,Q,G,xT,I,F,L,A,G,P,P,\r\n+,,sp|Q9UI08|EVL_HUMAN, , , ,Y,N,Q,A,xT,P,T,F,H,Q,W,R,\r\n+,,sp|Q13283|G3BP1_HUMAN, , , , , ,S,S,xS,P,A,P,A,D,I,A,\r\n+,,sp|Q13177|PAK2_HUMAN, , , , , , , ,xS,T,M,V,G,T,P,Y,\r\n+,,sp|P20290|BTF3_HUMAN,Q,A,S,L,A,A,N,xT,F,T,I,T,G,H,A,\r\n+,,sp|P47756|CAPZB_HUMAN, , , , , , , ,xS,T,L,N,E,I,Y,F,\r\n+,,sp|Q13094|LCP2_HUMAN, , ,I,N,Q,D,G,xT,F,L,V,R, , , ,\r\n+,,sp|Q9H7N4|SFR19_HUMAN, , , , , ,Q,R,xS,P,S,P,A,P,A,P,\r\n+,,sp|Q9BWJ5|SF3B5_HUMAN, , , , , , ,Y,xT,I,H,S,Q,L,E,H,\r\n+,,sp|P30101|PDIA3_HUMAN,Y,G,V,S,G,Y,P,xT,L,K, , , , , ,\r\n+,,sp|Q6ZNL6|FGD5_HUMAN, , , ,E,R,P,V,xS,M,S,F,P,L,S,S,\r\n+,,sp|P55265|DSRAD_HUMAN, ,T,L,P,L,T,G,xS,T,F,H,D,Q,I,A,\r\n+,,sp|O60573|IF4E2_HUMAN, , , , ,Q,I,G,xT,F,A,S,V,E,Q,F,\r\n+,,sp|Q8WV74|NUDT8_HUMAN, , , ,A,R,P,A,xS,A,A,V,L,V,P,L,\r\n+,,sp|Q15181|IPYR_HUMAN, , , , , ,G,I,xS,C,M,N,T,T,L,S,\r\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN, , , ,A,V,F,P,xS,I,V,G,R,P,R, ,\r\n+,,sp|Q08945|SSRP1_HUMAN, , , , , ,Q,G,xT,Q,Y,T,F,S,S,I,\r\n+,,sp|Q14690|RRP5_HUMAN, , , , , ,A,G,xT,Y,F,S,N,Q,A,V,\r\n+,,sp|P07900|HS90A_HUMAN, , , , , , , ,xT,L,T,I,V,D,T,G,\r\n+,,sp|O14980|XPO1_HUMAN, ,E,T,L,V,Y,L,xT,H,L,D,Y,V,D,T,\r\n+,,sp|P04075|ALDOA_HUMAN,H,E,E,I,A,M,A,xT,V,T,A,L,R, , ,\r\n+,,sp|O75521|ECI2_HUMAN, , , , , , ,A,xT,F,H,T,P,F,S,H,\r\n+,,sp|P23193|TCEA1_HUMAN,S,A,D,E,P,M,T,xT,F,V,V,C,N,E,C,\r\n+,,sp|P08865|RSSA_HUMAN,A,S,Y,V,N,L,P,xT,I,A,L,C,N,T,D,\r\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN,T,V,L,S,G,G,T,xT,M,Y,P,G,I,A,D,\r\n+,,sp|P62807|H2B1C_HUMAN; sp|P57053|H2BFS_HUMAN; sp|O60814|H2B1K_HUMAN; sp|Q99880|H2B1L_HUMAN; sp|Q99879|H2B1M_HUMAN; sp|Q99877|H2B1N_HUMAN; sp|Q93079|H2B1H_HUMAN; sp|Q5QNW6|H2B2F_HUMAN; sp|P58876|H2B1D_HUMAN, , , , ,E,S,Y,xS,V,Y,V,Y,K, , ,\r\n+,,sp|Q15005|SPCS2_HUMAN, , , , , , ,L,xT,F,I,S,G,R, , ,\r\n'
b
diff -r 000000000000 -r 23eea82f5192 all stuff/test-data/1R2 SBf.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/test-data/1R2 SBf.csv Wed Jan 16 13:55:22 2019 -0500
b
b'@@ -0,0 +1,127 @@\n+Accession Numbers,Amino Acids,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,Properties,Hydrophobic,Polar,Small,Negative,Postive,Amide,Large Aliphatic,Small Aliphatic,Aromatic,Hydroxy,X,Number of ST,Number of pST\r\n+sp|Q15738|NSDHL_HUMAN,7.238605898123325,1.3404825737265416,4.557640750670242,5.361930294906166,5.093833780160858,6.434316353887399,2.680965147453083,5.898123324396782,5.361930294906166,9.115281501340483,2.680965147453083,5.093833780160858,6.166219839142091,4.557640750670242,5.093833780160858,4.289544235924933,6.166219839142091,8.310991957104557,0.5361930294906166,4.021447721179625,,50.670241286863266,32.707774798927616,49.59785522788203,9.919571045576408,13.136729222520106,9.651474530831099,15.013404825737265,21.983914209115284,9.651474530831099,10.455764075067023,,39,1,373\r\n+sp|Q06830|PRDX1_HUMAN,6.030150753768844,2.0100502512562812,8.040201005025125,3.5175879396984926,7.537688442211055,8.040201005025125,2.0100502512562812,6.532663316582915,9.547738693467336,6.030150753768844,1.507537688442211,3.015075376884422,6.532663316582915,4.522613065326634,3.015075376884422,6.532663316582915,4.522613065326634,7.537688442211055,1.0050251256281406,2.512562814070352,,48.743718592964825,33.66834170854271,52.26130653266331,11.557788944723617,14.572864321608039,7.5376884422110555,12.56281407035176,21.608040201005025,11.055276381909547,11.055276381909549,,22,2,199\r\n+sp|Q9Y371|SHLB1_HUMAN,8.493150684931507,1.36986301369863,4.931506849315069,7.9452054794520555,3.0136986301369864,5.205479452054795,1.095890410958904,4.931506849315069,6.8493150684931505,12.054794520547945,2.4657534246575343,6.027397260273973,3.287671232876712,4.931506849315069,4.931506849315069,7.397260273972603,7.123287671232877,4.10958904109589,0.547945205479452,3.287671232876712,,45.479452054794514,36.71232876712329,47.945205479452056,12.876712328767123,12.876712328767123,10.958904109589042,16.986301369863014,17.808219178082194,6.8493150684931505,14.520547945205479,,53,1,365\r\n+sp|P08238|HS90B_HUMAN,5.248618784530387,0.8287292817679558,7.044198895027624,13.259668508287293,3.591160220994475,4.696132596685083,1.7955801104972375,6.629834254143646,10.359116022099448,8.56353591160221,2.6243093922651934,3.867403314917127,3.1767955801104977,2.9005524861878453,4.41988950276243,6.629834254143646,4.696132596685083,5.801104972375691,0.5524861878453038,3.314917127071823,,41.85082872928177,43.646408839779006,41.988950276243095,20.303867403314918,16.574585635359114,6.767955801104972,15.193370165745858,15.745856353591162,7.458563535911601,11.32596685082873,,82,7,724\r\n+sp|O15530|PDPK1_HUMAN,5.935251798561151,1.618705035971223,5.0359712230215825,7.374100719424461,4.856115107913669,6.115107913669065,2.697841726618705,4.496402877697841,6.83453237410072,10.071942446043165,1.7985611510791366,3.237410071942446,6.83453237410072,5.0359712230215825,5.39568345323741,7.913669064748201,4.676258992805756,4.856115107913669,1.079136690647482,4.136690647482014,,44.96402877697841,35.61151079136691,46.22302158273381,12.410071942446043,14.928057553956835,8.273381294964029,14.568345323741006,16.906474820143885,10.071942446043165,12.589928057553957,,70,15,556\r\n+sp|P22626|ROA2_HUMAN,3.1161473087818696,0.28328611898017,4.815864022662889,6.232294617563739,5.382436260623229,24.362606232294617,1.9830028328611897,2.5495750708215295,5.382436260623229,2.8328611898017,1.9830028328611897,5.382436260623229,4.2492917847025495,2.8328611898017,7.0821529745042495,7.0821529745042495,3.39943342776204,4.53257790368272,0.28328611898017,6.232294617563739,,51.55807365439094,33.711048158640224,57.22379603399433,11.048158640226628,14.44759206798867,8.215297450424929,5.38243626062323,32.01133144475921,11.898016997167138,10.48158640226629,,37,9,353\r\n+sp|Q32P44|EMAL3_HUMAN,6.8080357142857135,2.1205357142857144,5.580357142857143,4.575892857142857,2.7901785714285716,12.834821428571427,2.7901785714285716,3.3482142857142856,3.125,9.151785714285714,1.0044642857142858,2.455357142857143,7.142857142857142,3.2366071428571432,'..b'170506912442393,8.755760368663594,1.3824884792626728,6.221198156682028,8.755760368663594,8.294930875576037,2.0737327188940093,5.529953917050691,3.686635944700461,2.5345622119815667,3.9170506912442393,5.990783410138248,4.147465437788019,7.603686635944701,0.6912442396313364,2.5345622119815667,,51.38248847926267,34.7926267281106,52.995391705069125,12.672811059907833,14.055299539170505,8.064516129032258,14.516129032258064,26.267281105990786,7.142857142857142,10.138248847926267,,44,3,434\r\n+sp|P49368|TCPG_HUMAN,7.339449541284404,1.834862385321101,6.238532110091743,7.706422018348624,0.9174311926605505,6.605504587155964,1.651376146788991,9.541284403669724,6.605504587155964,8.440366972477065,4.036697247706423,3.486238532110092,3.302752293577982,4.587155963302752,6.422018348623854,5.871559633027523,6.238532110091743,6.7889908256880735,0.7339449541284404,1.651376146788991,,47.88990825688073,36.69724770642202,47.70642201834862,13.944954128440367,14.678899082568808,8.073394495412845,17.98165137614679,20.733944954128443,3.302752293577982,12.110091743119266,,66,2,545\r\n+sp|Q9BTE3|MCMBP_HUMAN,4.984423676012461,2.336448598130841,5.451713395638629,6.853582554517133,3.894080996884735,4.205607476635514,2.959501557632399,3.894080996884735,4.8286604361370715,12.77258566978193,2.0249221183800623,5.763239875389408,6.386292834890965,5.451713395638629,4.672897196261682,7.320872274143301,5.14018691588785,6.697819314641744,1.2461059190031152,3.115264797507788,,45.17133956386292,35.98130841121495,48.28660436137071,12.305295950155763,12.461059190031154,11.214953271028037,16.666666666666664,15.88785046728972,8.255451713395638,12.46105919003115,,80,2,642\r\n+sp|Q13126|MTAP_HUMAN,7.420494699646643,3.180212014134275,4.593639575971731,7.06713780918728,2.8268551236749118,7.06713780918728,3.180212014134275,7.773851590106007,6.36042402826855,7.420494699646643,3.180212014134275,2.8268551236749118,4.593639575971731,2.4734982332155475,4.946996466431095,6.007067137809187,9.187279151943462,6.713780918727916,1.4134275618374559,1.76678445229682,,48.76325088339223,31.448763250883392,51.590106007067135,11.660777385159012,14.48763250883392,5.30035335689046,15.194346289752652,21.20141342756184,6.007067137809187,15.19434628975265,,43,3,283\r\n+sp|P50452|SPB8_HUMAN,7.754010695187167,2.6737967914438503,5.614973262032086,9.893048128342247,7.4866310160427805,4.545454545454546,1.8716577540106951,2.941176470588235,8.288770053475936,9.62566844919786,3.4759358288770055,4.010695187165775,2.941176470588235,2.6737967914438503,4.545454545454546,6.149732620320856,5.88235294117647,6.149732620320856,0.8021390374331552,2.6737967914438503,,48.12834224598931,36.89839572192514,45.721925133689844,15.508021390374333,14.705882352941178,6.684491978609626,12.566844919786096,18.44919786096257,10.962566844919786,12.032085561497325,,45,1,374\r\n+sp|P35237|SPB6_HUMAN,6.648936170212766,1.5957446808510638,4.787234042553192,9.308510638297872,7.446808510638298,5.851063829787234,1.3297872340425532,3.4574468085106385,7.180851063829788,9.574468085106384,5.319148936170213,4.521276595744681,2.6595744680851063,3.4574468085106385,3.9893617021276597,7.446808510638298,6.117021276595745,6.382978723404255,0.7978723404255319,2.127659574468085,,49.20212765957447,34.57446808510638,46.01063829787234,14.095744680851062,12.500000000000002,7.9787234042553195,13.031914893617023,18.882978723404257,10.372340425531915,13.563829787234042,,51,1,376\r\n+tr|A0A182DWH4|A0A182DWH4_HUMAN,4.511278195488721,1.8796992481203008,2.631578947368421,7.142857142857142,6.015037593984962,9.398496240601503,3.3834586466165413,1.1278195488721803,3.007518796992481,10.902255639097744,1.5037593984962405,3.3834586466165413,4.135338345864661,5.639097744360902,7.142857142857142,8.270676691729323,5.639097744360902,9.398496240601503,1.5037593984962405,3.3834586466165413,,49.62406015037594,32.330827067669176,49.24812030075188,9.774436090225564,13.533834586466165,9.022556390977444,12.030075187969924,23.308270676691727,10.902255639097744,13.909774436090224,,37,4,266\r\n'
b
diff -r 000000000000 -r 23eea82f5192 all stuff/test-data/1R2 subs.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/test-data/1R2 subs.csv Wed Jan 16 13:55:22 2019 -0500
b
@@ -0,0 +1,119 @@
+Substrate,Species,Reference,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,Phosphite
+,,sp|Q15738|NSDHL_HUMAN, , , ,Q,A,T,S,xT,A,S,T,F,V,K,P,
+,,sp|Q06830|PRDX1_HUMAN, , , , , ,N,S,xT,F,S,E,I,F,K, ,
+,,sp|Q9Y371|SHLB1_HUMAN, , , , , , ,S,xS,G,P,T,S,L,F,A,
+,,sp|P08238|HS90B_HUMAN, , , , , , ,Q,xS,T,I,V,C,H,N,R,
+,,sp|O15530|PDPK1_HUMAN, , , , , , ,A,xT,F,M,V,G,S,Y,G,
+,,sp|P22626|ROA2_HUMAN, , , , , , , ,xS,T,F,A,A,F,F,T,
+,,sp|Q32P44|EMAL3_HUMAN,N,S,T,M,L,G,A,xS,G,D,Y,A,D,F,Q,
+,,sp|P41252|SYIC_HUMAN, , , , ,R,P,P,xS,A,F,F,L,F, , ,
+,,sp|Q13283|G3BP1_HUMAN,V,T,F,I,G,N,S,xT,A,I,Q,E,L,F,K,
+,,sp|P08238|HS90B_HUMAN; sp|P07900|HS90A_HUMAN, , , , , , , ,xS,P,G,S,Q,A,P,D,
+,,sp|Q9Y266|NUDC_HUMAN, ,A,I,Q,G,G,T,xS,H,H,L,G,Q,N,F,
+,,sp|Q8NHW5|RLA0L_HUMAN; sp|P05388|RLA0_HUMAN, , , , , ,V,H,xS,F,P,T,L,K, , ,
+,,sp|P22314|UBA1_HUMAN, , , , , ,A,G,xT,L,F,G,E,G,F,R,
+,,sp|Q4KMQ1|TPRN_HUMAN, , , ,D,K,E,V,xS,D,D,E,A,E,E,K,
+,,sp|O43760|SNG2_HUMAN, , , , , ,S,S,xT,P,L,P,T,I,S,S,
+,,sp|Q9Y277|VDAC3_HUMAN, , , , , , , ,xS,P,E,G,E,Q,E,D,
+,,sp|O75521|ECI2_HUMAN, , , , ,M,S,A,xT,F,I,G,N,S,T,A,
+,,sp|P68104|EF1A1_HUMAN; sp|Q5VTE0|EF1A3_HUMAN,S,P,T,G,P,S,N,xS,F,L,A,N,M,G,G,
+,,sp|O60907|TBL1X_HUMAN; sp|Q9BZK7|TBL1R_HUMAN; sp|Q9BQ87|TBL1Y_HUMAN, , , , , , ,G,xT,L,D,P,V,E,K, ,
+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN, , , , ,V,Y,A,xT,I,L,N,A,G,T,N,
+,,sp|P07900|HS90A_HUMAN, , , , , ,V,C,xT,L,A,I,I,D,P,G,
+,,sp|Q92499|DDX1_HUMAN, , , , , ,L,A,xT,F,W,Y,Y,A,K, ,
+,,sp|P63167|DYL1_HUMAN; sp|Q96FJ2|DYL2_HUMAN, , , , , ,T,G,xT,A,E,M,S,S,I,L,
+,,sp|P24752|THIL_HUMAN, , , , ,Y,F,P,xT,Q,A,L,N,F,A,F,
+,,sp|Q14161|GIT2_HUMAN, , , , , , ,D,xT,L,Y,E,A,V,R, ,
+,,sp|Q99832|TCPH_HUMAN, , , , , , ,Q,xT,L,M,W,S,A,T,W,
+,,sp|A0FGR8|ESYT2_HUMAN, , , , , ,G,G,xT,I,L,A,P,T,V,S,
+,,sp|Q15019|SEPT2_HUMAN, , , , ,I,Q,A,xS,T,M,A,F,K, , ,
+,,sp|Q15007|FL2D_HUMAN,S,A,T,F,I,G,N,xS,T,A,I,Q,E,L,F,
+,,sp|O95218|ZRAB2_HUMAN, , , , , ,A,G,xT,F,I,A,P,P,V,Y,
+,,sp|P22392|NDKB_HUMAN, , , , ,S,A,Q,xT,F,L,S,E,I,R, ,
+,,sp|O00148|DX39A_HUMAN; sp|Q13838|DX39B_HUMAN, , , , , , ,S,xS,F,S,H,Y,S,G,L,
+,,sp|P00558|PGK1_HUMAN, , , , ,V,N,N,xS,T,M,L,G,A,S,G,
+,,sp|Q14974|IMB1_HUMAN,Q,A,T,S,T,A,S,xT,F,V,K,P,I,F,S,
+,,sp|Q8WUA2|PPIL4_HUMAN,C,A,N,P,A,A,G,xS,V,I,L,L,E,N,L,
+,,sp|Q06830|PRDX1_HUMAN, , , ,T,G,Q,A,xT,V,A,S,G,I,P,A,
+,,sp|P31323|KAP3_HUMAN, , , , , , ,T,xT,F,L,H,I,S,K, ,
+,,sp|Q71DI3|H32_HUMAN; sp|P84243|H33_HUMAN; sp|P68431|H31_HUMAN; sp|Q16695|H31T_HUMAN; tr|Q5TEC6|Q5TEC6_HUMAN; sp|Q6NXT2|H3C_HUMAN, , , , , ,Y,H,xT,I,N,G,H,N,C,E,
+,,sp|P22626|ROA2_HUMAN, , , , ,I,S,S,xT,L,Y,Q,A,A,A,P,
+,,sp|Q13263|TIF1B_HUMAN, , , , , , , ,xS,P,I,I,N,E,S,R,
+,,sp|P67809|YBOX1_HUMAN, , , , ,T,F,T,xT,Q,E,T,I,T,N,A,
+,,sp|P23193|TCEA1_HUMAN, , , , , , , ,xS,P,A,V,K,P,A,A,
+,,sp|O15530|PDPK1_HUMAN, , , , , , , ,xS,P,T,T,V,L,F,T,
+,,sp|P62995|TRA2B_HUMAN, , , , , , ,G,xS,F,T,L,L,W,P,S,
+,,sp|Q9Y4E1|WAC2C_HUMAN; sp|Q641Q2|WAC2A_HUMAN, , , , , ,G,Y,xS,F,S,L,T,T,F,S,
+,,sp|P49915|GUAA_HUMAN, , , ,G,A,G,G,xT,I,I,T,N,F,E,R,
+,,sp|Q16666|IF16_HUMAN, , , , , , ,I,xT,V,T,S,E,V,P,F,
+,,sp|P11940|PABP1_HUMAN, , , , , ,S,R,xS,P,P,P,V,S,K, ,
+,,sp|P55884|EIF3B_HUMAN, , , ,S,S,G,P,xT,S,L,F,A,V,T,V,
+,,sp|O15530|PDPK1_HUMAN, , ,A,S,F,N,D,xT,F,V,H,V,T,D,L,
+,,sp|P50748|KNTC1_HUMAN, , , ,G,L,G,A,xT,F,I,L,R, , , ,
+,,sp|P26583|HMGB2_HUMAN;sp|P09429|HMGB1_HUMAN;, , , , , , ,A,xT,I,I,S,E,Q,Q,A,
+,,sp|O95619|YETS4_HUMAN, , , ,A,A,V,P,xS,G,A,S,T,G,I,Y,
+,,sp|P49321|NASP_HUMAN,E,A,E,A,A,H,G,xT,V,T,R, , , , ,
+,,sp|Q14C86|GAPD1_HUMAN, , , , , , ,S,xT,F,A,A,F,F,T,R,
+,,sp|P47756|CAPZB_HUMAN, , , ,E,G,T,E,xT,F,A,D,H,R, , ,
+,,sp|Q7Z5L9|I2BP2_HUMAN, , , , , , ,Q,xT,P,S,F,W,I,L,A,
+,,sp|P27824|CALX_HUMAN, , , ,V,G,A,F,xT,M,V,C,K, , , ,
+,,sp|P49407|ARRB1_HUMAN, , , , , ,F,E,xT,F,C,L,D,P,S,L,
+,,sp|Q07020|RL18_HUMAN, , , , , ,T,G,xT,L,T,T,N,Q,M,S,
+,,sp|Q71DI3|H32_HUMAN; sp|P84243|H33_HUMAN; sp|P68431|H31_HUMAN; sp|Q16695|H31T_HUMAN; tr|Q5TEC6|Q5TEC6_HUMAN; sp|Q6NXT2|H3C_HUMAN, , , , ,G,L,A,xT,F,C,L,D,K, , ,
+,,sp|Q8IY67|RAVR1_HUMAN, ,A,N,S,F,V,G,xT,A,Q,Y,V,S,P,E,
+,,sp|Q9UKW4|VAV3_HUMAN, ,G,G,Y,T,S,G,xT,F,R, , , , , ,
+,,sp|P00352|AL1A1_HUMAN, , , , , ,T,F,xT,T,Q,E,T,I,T,N,
+,,sp|Q7KZF4|SND1_HUMAN, , , , , , ,K,xT,Q,T,V,C,N,F,T,
+,,sp|P35244|RFA3_HUMAN, , , ,V,N,N,S,xT,M,L,G,A,S,G,D,
+,,sp|P54886|P5CS_HUMAN,Q,N,T,S,R,P,P,xS,M,H,V,D,D,F,V,
+,,tr|A0A096LPI6|A0A096LPI6_HUMAN; sp|P0DPI2|GAL3A_HUMAN; sp|A0A0B4J2D5|GAL3B_HUMAN,P,Q,G,E,E,E,E,xS,M,E,T,Q,E, , ,
+,,sp|Q16563|SYPL1_HUMAN, , , , , ,Y,P,xT,F,I,D,A,L,R, ,
+,,sp|E9PAV3|NACAM_HUMAN, , , ,Y,R,P,G,xT,V,A,L,R, , , ,
+,,sp|Q99547|MPH6_HUMAN, , , , , , ,G,xT,F,Y,Q,G,Y,R, ,
+,,sp|P49189|AL9A1_HUMAN, , , , , ,V,N,xT,L,I,R,P,D,G,E,
+,,sp|P54819|KAD2_HUMAN, , , , ,A,L,D,xT,M,N,F,D,V,I,K,
+,,sp|O00567|NOP56_HUMAN, , , , , , , ,xS,S,G,P,T,S,L,F,
+,,sp|P62995|TRA2B_HUMAN, , , , , ,I,S,xS,T,L,Y,Q,A,A,A,
+,,sp|O00541|PESC_HUMAN, , , , , , ,H,xT,L,T,Q,I,K, , ,
+,,sp|Q92900|RENT1_HUMAN, , , , , ,A,Y,xS,F,A,M,G,C,W,P,
+,,sp|P40926|MDHM_HUMAN, , , , , , , ,xT,F,Y,N,Q,A,I,M,
+,,tr|H7C0C1|H7C0C1_HUMAN; sp|P48047|ATPO_HUMAN, , , , , , ,S,xT,F,I,E,Y,F,G,K,
+,,sp|P78527|PRKDC_HUMAN, , , ,V,A,V,A,xT,P,A,K, , , , ,
+,,sp|Q9Y3D7|TIM16_HUMAN, , , , , , , ,xS,P,S,V,S,S,P,E,
+,,sp|Q9HC35|EMAL4_HUMAN, , , , , , ,G,xT,I,Q,V,I,T,Q,G,
+,,sp|P29590|PML_HUMAN,T,G,Q,A,T,V,A,xS,G,I,P,A,G,W,M,
+,,sp|P31943|HNRH1_HUMAN, , , , , , , ,xT,F,C,Q,L,I,L,D,
+,,sp|Q01469|FABP5_HUMAN,I,T,A,L,A,P,S,xT,M,K, , , , , ,
+,,sp|P49321|NASP_HUMAN, , , , , ,A,P,xT,I,V,G,K, , , ,
+,,sp|Q02878|RL6_HUMAN,I,F,P,N,P,E,A,xT,F,V,K, , , , ,
+,,sp|P18124|RL7_HUMAN, , , , , ,T,F,xS,Y,A,G,F,E,M,Q,
+,,sp|Q15942|ZYX_HUMAN, , , , , , , ,xT,F,T,T,Q,E,T,I,
+,,sp|P30086|PEBP1_HUMAN, , , , , ,G,T,xT,I,T,L,V,L,K, ,
+,,sp|O43491|E41L2_HUMAN,G,A,S,Q,A,G,M,xT,G,Y,G,M,P,R, ,
+,,sp|P68104|EF1A1_HUMAN; sp|Q5VTE0|EF1A3_HUMAN, , , , , , , ,xS,R,S,P,P,P,V,S,
+,,sp|Q8ND56|LS14A_HUMAN, ,V,C,V,E,H,H,xT,F,Y,R, , , , ,
+,,sp|Q99873|ANM1_HUMAN, ,V,P,S,L,V,G,xS,F,I,R, , , , ,
+,,sp|P55265|DSRAD_HUMAN, , , , , , ,A,xT,L,W,Y,V,P,L,S,
+,,sp|P53597|SUCA_HUMAN, , , , , ,S,P,xT,W,F,G,I,P,R, ,
+,,sp|O15530|PDPK1_HUMAN,L,L,Y,N,R,P,G,xT,V,S,S,L,K, , ,
+,,sp|C9JLW8|MCRI1_HUMAN, , ,S,S,G,P,T,xS,L,F,A,V,T,V,A,
+,,sp|P25098|ARBK1_HUMAN,A,G,Y,D,P,T,P,xT,M,R, , , , , ,
+,,sp|Q86U90|YRDC_HUMAN, , , , , ,G,G,xT,M,V,T,Y,G,G,M,
+,,sp|P52209|6PGD_HUMAN, , , , ,G,N,P,xT,V,E,V,D,L,F,T,
+,,sp|Q9UQ35|SRRM2_HUMAN, , , , ,R,V,A,xT,P,V,D,W,K, , ,
+,,sp|Q6PI48|SYDM_HUMAN,A,G,G,P,R,P,E,xS,P,V,P,A,G,R, ,
+,,sp|P12956|XRCC6_HUMAN, , , , , , , ,xS,K,L,Q,I,H,R, ,
+,,sp|Q9NVI7|ATD3A_HUMAN; sp|Q5T9A4|ATD3B_HUMAN, , , , ,G,Y,P,xT,L,L,W,F,R, , ,
+,,sp|Q9NZL9|MAT2B_HUMAN,G,A,P,G,Q,P,G,xT,I,L,R, , , , ,
+,,sp|Q2KHR2|RFX7_HUMAN, , , , , ,T,A,xT,I,T,E,P,R, , ,
+,,sp|P26373|RL13_HUMAN, , , , ,Q,A,T,xS,T,A,S,T,F,V,K,
+,,sp|P53396|ACLY_HUMAN, , , , , , , ,xS,S,F,S,H,Y,S,G,
+,,sp|O00148|DX39A_HUMAN; sp|Q13838|DX39B_HUMAN, , , ,E,R,S,P,xS,P,S,F,R, , , ,
+,,sp|Q71UI9|H2AV_HUMAN; sp|P0C0S5|H2AZ_HUMAN, , ,S,P,T,G,P,xS,N,S,F,L,A,N,M,
+,,sp|P06733|ENOA_HUMAN,F,L,A,G,Y,D,P,xT,P,T,M,R, , , ,
+,,sp|O60907|TBL1X_HUMAN; sp|Q9BZK7|TBL1R_HUMAN; sp|Q9BQ87|TBL1Y_HUMAN, , , , , ,S,T,xT,L,F,S,R, , , ,
+,,sp|P49368|TCPG_HUMAN, , , , , , , ,xT,G,T,L,T,T,N,Q,
+,,sp|Q9BTE3|MCMBP_HUMAN, , , ,L,A,P,G,xT,I,V,E,V,W,K, ,
+,,sp|Q13126|MTAP_HUMAN, , ,A,I,Q,G,G,xT,S,H,H,L,G,Q,N,
+,,sp|P50452|SPB8_HUMAN; sp|P35237|SPB6_HUMAN, ,V,C,N,Y,G,L,xT,F,T,Q,K, , , ,
+,,tr|A0A182DWH4|A0A182DWH4_HUMAN, , , , ,D,A,V,xT,Y,T,E,H,A,K, ,
b
diff -r 000000000000 -r 23eea82f5192 all stuff/test-data/R1 SBF.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/test-data/R1 SBF.csv Wed Jan 16 13:55:22 2019 -0500
b
b'@@ -0,0 +1,88 @@\n+Accession Numbers,Amino Acids,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,Properties,Hydrophobic,Polar,Small,Negative,Postive,Amide,Large Aliphatic,Small Aliphatic,Aromatic,Hydroxy,X,Number of ST,Number of pST\r\n+sp|P19338|NUCL_HUMAN,10.140845070422536,0.14084507042253522,10.28169014084507,14.507042253521126,3.6619718309859155,10.985915492957748,0.14084507042253522,2.3943661971830985,12.676056338028168,3.943661971830986,1.1267605633802817,2.9577464788732395,4.366197183098591,1.971830985915493,3.6619718309859155,5.070422535211268,5.352112676056338,5.211267605633803,0.28169014084507044,1.1267605633802817,,39.014084507042256,46.197183098591545,54.50704225352113,24.788732394366196,16.47887323943662,4.929577464788732,6.338028169014084,26.338028169014088,5.070422535211268,10.422535211267606,,74,13,710\r\n+sp|P09651|ROA1_HUMAN,2.956989247311828,0.5376344086021506,4.838709677419355,4.838709677419355,6.182795698924731,25.0,2.1505376344086025,2.1505376344086025,4.838709677419355,2.1505376344086025,1.6129032258064515,5.64516129032258,2.6881720430107525,3.225806451612903,6.720430107526881,11.29032258064516,3.225806451612903,4.56989247311828,0.2688172043010753,5.10752688172043,,50.537634408602145,32.25806451612903,60.752688172043015,9.67741935483871,13.70967741935484,8.870967741935484,4.301075268817205,32.52688172043011,11.559139784946236,14.516129032258064,,54,6,372\r\n+sp|Q32P51|RA1L2_HUMAN,3.4375000000000004,0.9375,5.625,5.3125,6.875000000000001,19.6875,2.5,2.5,5.9375,2.5,1.875,5.3125,3.125,3.4375000000000004,6.875000000000001,10.0,4.375,5.625,0.3125,3.75,,47.5,35.0,58.125,10.9375,15.3125,8.75,5.0,28.75,10.9375,14.375,,46,4,320\r\n+sp|P22626|ROA2_HUMAN,3.1161473087818696,0.28328611898017,4.815864022662889,6.232294617563739,5.382436260623229,24.362606232294617,1.9830028328611897,2.5495750708215295,5.382436260623229,2.8328611898017,1.9830028328611897,5.382436260623229,4.2492917847025495,2.8328611898017,7.0821529745042495,7.0821529745042495,3.39943342776204,4.53257790368272,0.28328611898017,6.232294617563739,,51.55807365439094,33.711048158640224,57.22379603399433,11.048158640226628,14.44759206798867,8.215297450424929,5.38243626062323,32.01133144475921,11.898016997167138,10.48158640226629,,37,9,353\r\n+sp|P60709|ACTB_HUMAN,7.733333333333333,1.6,6.133333333333333,6.933333333333333,3.4666666666666663,7.466666666666668,2.4,7.466666666666668,5.066666666666666,7.199999999999999,4.533333333333333,2.4,5.066666666666666,3.2,4.8,6.666666666666667,6.933333333333333,5.866666666666666,1.0666666666666667,4.0,,50.400000000000006,30.93333333333333,49.86666666666666,13.066666666666666,12.266666666666666,5.6,14.666666666666668,21.066666666666666,8.533333333333333,13.6,,51,15,375\r\n+sp|P63261|ACTG_HUMAN,7.733333333333333,1.6,5.333333333333334,7.733333333333333,3.4666666666666663,7.466666666666668,2.4,7.733333333333333,5.066666666666666,7.199999999999999,4.533333333333333,2.4,5.066666666666666,3.2,4.8,6.666666666666667,6.933333333333333,5.6000000000000005,1.0666666666666667,4.0,,50.400000000000006,30.93333333333333,48.8,13.066666666666666,12.266666666666666,5.6,14.933333333333334,20.8,8.533333333333333,13.6,,51,15,375\r\n+sp|P07195|LDHB_HUMAN,6.287425149700598,1.4970059880239521,5.688622754491018,6.287425149700598,1.4970059880239521,6.88622754491018,2.095808383233533,7.18562874251497,7.784431137724551,10.778443113772456,2.9940119760479043,5.089820359281437,3.293413173652695,3.293413173652695,2.3952095808383236,7.784431137724551,3.8922155688622757,11.377245508982035,1.7964071856287425,2.095808383233533,,52.395209580838326,32.634730538922156,51.796407185628745,11.976047904191617,12.275449101796408,8.383233532934131,17.964071856287426,24.550898203592816,5.389221556886227,11.676646706586826,,39,2,334\r\n+sp|O75746|CMC1_HUMAN,9.587020648967552,1.0324483775811208,4.277286135693215,5.3097345132743365,6.1946902654867255,8.259587020648967,1.9174041297935103,5.752212389380531,5.014749262536873,10.176991150442479,1.4749262536873156,3.687315634218289,5.014749262536873,4'..b'968,,41.26984126984127,37.3015873015873,50.0,7.936507936507937,24.6031746031746,4.761904761904762,9.523809523809524,23.809523809523807,5.555555555555555,16.666666666666664,,21,2,126\r\n+sp|Q99880|H2B1L_HUMAN,10.317460317460316,0.0,2.380952380952381,5.555555555555555,1.5873015873015872,4.761904761904762,2.380952380952381,4.761904761904762,15.873015873015872,5.555555555555555,2.380952380952381,2.380952380952381,3.968253968253968,2.380952380952381,6.349206349206349,11.904761904761903,6.349206349206349,7.142857142857142,0.0,3.968253968253968,,40.476190476190474,37.3015873015873,49.2063492063492,7.936507936507937,24.6031746031746,4.761904761904762,10.317460317460316,22.22222222222222,5.555555555555555,18.253968253968253,,23,2,126\r\n+sp|P58876|H2B1D_HUMAN,9.523809523809524,0.0,2.380952380952381,5.555555555555555,1.5873015873015872,5.555555555555555,2.380952380952381,4.761904761904762,15.873015873015872,4.761904761904762,2.380952380952381,2.380952380952381,4.761904761904762,2.380952380952381,6.349206349206349,11.11111111111111,7.142857142857142,7.142857142857142,0.0,3.968253968253968,,39.68253968253968,37.3015873015873,50.0,7.936507936507937,24.6031746031746,4.761904761904762,9.523809523809524,22.22222222222222,5.555555555555555,18.253968253968253,,23,2,126\r\n+sp|Q93079|H2B1H_HUMAN,10.317460317460316,0.0,3.1746031746031744,4.761904761904762,1.5873015873015872,5.555555555555555,2.380952380952381,4.761904761904762,15.873015873015872,4.761904761904762,2.380952380952381,2.380952380952381,4.761904761904762,2.380952380952381,6.349206349206349,11.11111111111111,6.349206349206349,7.142857142857142,0.0,3.968253968253968,,40.476190476190474,37.3015873015873,50.793650793650784,7.936507936507937,24.6031746031746,4.761904761904762,9.523809523809524,23.015873015873012,5.555555555555555,17.46031746031746,,22,2,126\r\n+sp|Q5QNW6|H2B2F_HUMAN,9.523809523809524,0.0,3.1746031746031744,4.761904761904762,1.5873015873015872,5.555555555555555,2.380952380952381,4.761904761904762,15.873015873015872,4.761904761904762,2.380952380952381,2.380952380952381,4.761904761904762,2.380952380952381,6.349206349206349,11.11111111111111,6.349206349206349,7.936507936507936,0.0,3.968253968253968,,40.476190476190474,37.3015873015873,50.79365079365079,7.936507936507937,24.6031746031746,4.761904761904762,9.523809523809524,23.015873015873016,5.555555555555555,17.46031746031746,,22,2,126\r\n+sp|Q99877|H2B1N_HUMAN,9.523809523809524,0.0,2.380952380952381,5.555555555555555,1.5873015873015872,5.555555555555555,2.380952380952381,4.761904761904762,15.873015873015872,4.761904761904762,2.380952380952381,2.380952380952381,4.761904761904762,2.380952380952381,6.349206349206349,11.904761904761903,6.349206349206349,7.142857142857142,0.0,3.968253968253968,,39.68253968253968,37.3015873015873,50.0,7.936507936507937,24.6031746031746,4.761904761904762,9.523809523809524,22.22222222222222,5.555555555555555,18.253968253968253,,23,2,126\r\n+sp|P57053|H2BFS_HUMAN,10.317460317460316,0.0,2.380952380952381,5.555555555555555,1.5873015873015872,5.555555555555555,2.380952380952381,4.761904761904762,15.079365079365079,4.761904761904762,2.380952380952381,2.380952380952381,5.555555555555555,2.380952380952381,7.142857142857142,10.317460317460316,6.349206349206349,7.142857142857142,0.0,3.968253968253968,,40.476190476190474,37.301587301587304,50.0,7.936507936507937,24.6031746031746,4.761904761904762,9.523809523809524,23.015873015873012,5.555555555555555,16.666666666666664,,21,2,126\r\n+sp|Q99879|H2B1M_HUMAN,8.73015873015873,0.0,2.380952380952381,5.555555555555555,1.5873015873015872,5.555555555555555,2.380952380952381,5.555555555555555,15.873015873015872,4.761904761904762,2.380952380952381,3.1746031746031744,4.761904761904762,2.380952380952381,6.349206349206349,11.11111111111111,5.555555555555555,7.936507936507936,0.0,3.968253968253968,,40.476190476190474,38.095238095238095,49.2063492063492,7.936507936507937,24.6031746031746,5.555555555555555,10.317460317460316,22.22222222222222,5.555555555555555,16.666666666666664,,21,2,126\r\n'
b
diff -r 000000000000 -r 23eea82f5192 all stuff/test-data/R1 substrates.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/test-data/R1 substrates.csv Wed Jan 16 13:55:22 2019 -0500
b
b'@@ -0,0 +1,292 @@\n+       YSSPAPAD,sp|Q13283|G3BP1_HUMAN\r\n+     TFYAWCNSHL,sp|Q08043|ACTN3_HUMAN; sp|P35609|ACTN2_HUMAN; sp|P12814|ACTN1_HUMAN; sp|O43707|ACTN4_HUMAN\r\n+DEEEDDDYEEDEEDD,sp|P19338|NUCL_HUMAN\r\n+  VALAGYFHYYSCE,sp|Q15738|NSDHL_HUMAN\r\n+ LAADAGYFLSR   ,sp|Q9Y371|SHLB1_HUMAN\r\n+SHFEQWGYLTDCVVM,sp|P09651|ROA1_HUMAN; sp|Q32P51|RA1L2_HUMAN\r\n+       YTIIGESI,sp|P54136|SYRC_HUMAN\r\n+  GFGFVYFSSMAEV,sp|P22626|ROA2_HUMAN\r\n+    GYPYLLLFR  ,sp|Q8NBS9|TXND5_HUMAN\r\n+ TVLSGGYTMYPGIA,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN\r\n+     SSYTFFELQE,sp|Q96JH7|VCIP1_HUMAN\r\n+      SYSPAPADI,sp|Q13283|G3BP1_HUMAN\r\n+     GLYSVINQK ,sp|P07195|LDHB_HUMAN\r\n+      SYMVDPAIN,sp|Q15007|FL2D_HUMAN\r\n+      AYFSEFAAK,sp|O14776|TCRG1_HUMAN\r\n+LCPTHADYLNNLANI,sp|O15294|OGT1_HUMAN\r\n+    GIGYLHLKPTA,sp|Q9UKX7|NUP50_HUMAN\r\n+   RPASYAGLPTTL,sp|O43312|MTSS1_HUMAN\r\n+     LQYSSVLVSG,sp|P22314|UBA1_HUMAN\r\n+     LAYATFAGIE,sp|O75746|CMC1_HUMAN\r\n+ FFDHSGYLVMDAYE,sp|Q15005|SPCS2_HUMAN\r\n+    RPHYPTPGIY ,sp|P62995|TRA2B_HUMAN\r\n+  AVGTQYLSGAGLL,sp|Q13596|SNX1_HUMAN\r\n+SDEEREPYPVLGSGA,sp|P42166|LAP2A_HUMAN\r\n+      VYFSSMAEV,sp|P22626|ROA2_HUMAN\r\n+      IYITNDK  ,sp|P11142|HSP7C_HUMAN; sp|P0DMV9|HS71B_HUMAN; sp|P0DMV8|HS71A_HUMAN\r\n+      TYIFSPEGR,sp|P25789|PSA4_HUMAN\r\n+   VNNAYLIGLGYT,sp|Q9Y277|VDAC3_HUMAN\r\n+PFSHLGQYPEGCSSY,sp|O75521|ECI2_HUMAN\r\n+     ALYFSYGR  ,sp|P04075|ALDOA_HUMAN\r\n+     DKYPVREPID,sp|Q14498|RBM39_HUMAN\r\n+ GPPDFSYDEEREPT,sp|P42166|LAP2A_HUMAN\r\n+    ISGYFSSGWTV,sp|Q9UPN3|MACF1_HUMAN\r\n+       YTLFVPR ,sp|P12955|PEPD_HUMAN\r\n+ PEVNGGYGDAVPSG,sp|P49321|NASP_HUMAN\r\n+     GCYIVKPFNL,sp|Q9ULW0|TPX2_HUMAN\r\n+  STMVGYPYWMAPE,sp|Q13177|PAK2_HUMAN\r\n+      EYTLHLVLR,sp|P62987|RL40_HUMAN; sp|P62979|RS27A_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG48|UBC_HUMAN; sp|P0CG47|UBB_HUMAN; sp|P0CG47|UBB_HUMAN; sp|P0CG47|UBB_HUMAN; tr|A0A2R8Y422|A0A2R8Y422_HUMAN\r\n+      TYYLVLDEA,sp|P17844|DDX5_HUMAN\r\n+   TFNTYTGGLLLP,sp|P12956|XRCC6_HUMAN\r\n+    LCPYHADSLNN,sp|O15294|OGT1_HUMAN\r\n+KLFIGGLYFETTDES,sp|P09651|ROA1_HUMAN; sp|Q32P51|RA1L2_HUMAN\r\n+       YPPTTMLL,sp|Q9UGU5|HMGX4_HUMAN\r\n+ GISCMNYTLSESPF,sp|Q15181|IPYR_HUMAN\r\n+     VIYGTTLGYL,sp|Q9NUL3|STAU2_HUMAN\r\n+   TPIGYFLGSLSL,sp|P24752|THIL_HUMAN\r\n+     HIYIFSPEGR,sp|P60900|PSA6_HUMAN\r\n+  LSHANYAVVLSAV,sp|P63010|AP2B1_HUMAN; sp|Q10567|AP1B1_HUMAN\r\n+GDGTTSVYLLAAEFL,sp|Q99832|TCPH_HUMAN\r\n+     GSYFMDHVLR,sp|P53396|ACLY_HUMAN\r\n+GANTHLSYFSFTK  ,sp|A0FGR8|ESYT2_HUMAN\r\n+ADDTVAPYSTFCK  ,sp|P49407|ARRB1_HUMAN\r\n+       YTMVDPAI,sp|Q15007|FL2D_HUMAN\r\n+    SSTYFFELQES,sp|Q96JH7|VCIP1_HUMAN\r\n+AEVGDGTYSVTLLAA,sp|Q99832|TCPH_HUMAN\r\n+       YPTGPSNS,sp|Q96I25|SPF45_HUMAN\r\n+  GYAFVYFCTK   ,sp|O60506|HNRPQ_HUMAN\r\n+      LYLHGLQQY,sp|O00148|DX39A_HUMAN; sp|Q13838|DX39B_HUMAN\r\n+EVGDGTTYVTLLAAE,sp|Q99832|TCPH_HUMAN\r\n+AAVENLPYFLVELSR,sp|Q14974|IMB1_HUMAN\r\n+    INEYFVDKDFV,sp|Q8WUA2|PPIL4_HUMAN\r\n+VARPEPAYGYTLEFR,sp|Q16658|FSCN1_HUMAN\r\n+     AAYITATSPG,sp|P31323|KAP3_HUMAN\r\n+GDGTTTAYVLAR   ,sp|P10809|CH60_HUMAN\r\n+    VQAYLAANTFT,sp|P20290|BTF3_HUMAN\r\n+  GCAFVYFSTR   ,sp|O95319|CELF2_HUMAN\r\n+     AEYFMFR   ,sp|Q13126|MTAP_HUMAN\r\n+LINKPDGYFLLR   ,sp|P51692|STA5B_HUMAN; sp|P42229|STA5A_HUMAN\r\n+      SYMVGTPYW,sp|Q13177|PAK2_HUMAN\r\n+ LIAHAGYLLNLAK ,sp|Q9Y2X3|NOP58_HUMAN\r\n+FGFVTFSYMAEVDAA,sp|P22626|ROA2_HUMAN\r\n+  ATVTPYPVKGK  ,sp|Q9H1E3|NUCKS_HUMAN\r\n+AASAAAAYAASGSPG,sp|Q13263|TIF1B_HUMAN\r\n+      SYIIGESIS,sp|P54136|SYRC_HUMAN\r\n+       YFYNELR ,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN\r\n+    IYPYFLHLHGK,sp|Q08945|SSRP1_HUMAN\r\n+      GYTFMDHVL,sp|P53396|ACLY_HUMAN\r\n+TQTVCNFYDGALVQH,sp|Q01469|FABP5_HUMAN\r\n+     TGYTIAGVVY,sp|Q99436|PSB7_HUMAN\r\n+    LGGYPTSLGTW,sp|P14314|GLU2B_HUMAN\r\n+       YTIAGVVY,sp|Q99436|PSB7_HUMAN\r\n+KENQGDEYQGQQPPQ,sp|P67809|YBOX1_HUMAN\r\n+    GLGYGTLYIAE,sp|P54105|ICLN_HUMAN\r\n+KPIGMGGYFIIQK  ,tr|A0A1W2PRB8|A0A1W2PRB8_HUMAN; sp|Q9H0W9|CK054_HUMAN\r\n+DLINNLGYIAK    ,sp|P08238|HS90B_HUMAN; sp|P07900|H'..b'ATM_HUMAN\r\n+     SPYTLLPK  ,sp|P27816|MAP4_HUMAN\r\n+GFGFVTFYSMAEVDA,sp|P22626|ROA2_HUMAN\r\n+     LGYFLENR  ,sp|Q09472|EP300_HUMAN\r\n+       YTPSYVAF,sp|P11142|HSP7C_HUMAN; sp|P0DMV9|HS71B_HUMAN; sp|P0DMV8|HS71A_HUMAN\r\n+      RYFNTSTGG,sp|P12956|XRCC6_HUMAN\r\n+      NYTFTEHLY,sp|Q9BTE3|MCMBP_HUMAN\r\n+  SGEVYYCQVEHPS,tr|A0A182DWH4|A0A182DWH4_HUMAN\r\n+TLPLTGSYFHDQIAM,sp|P55265|DSRAD_HUMAN\r\n+  AMYPGYFYFQFK ,sp|Q9NRW3|ABC3C_HUMAN\r\n+   FFDHYGTLVMDA,sp|Q15005|SPCS2_HUMAN\r\n+      SYTTFFELQ,sp|Q96JH7|VCIP1_HUMAN\r\n+  DKVNCYFYFK   ,tr|M0R2N4|M0R2N4_HUMAN; sp|Q8WU68|U2AF4_HUMAN; sp|Q01081|U2AF1_HUMAN; sp|P0DN76|U2AF5_HUMAN\r\n+NIVLSGGYTMFR   ,sp|P61158|ARP3_HUMAN\r\n+      LYQTFLQLS,sp|Q92608|DOCK2_HUMAN\r\n+      EYLVYLTHL,sp|O14980|XPO1_HUMAN\r\n+     TGYFCALSTV,sp|P18433|PTPRA_HUMAN\r\n+     DMYMFVTASK,sp|Q13347|EIF3I_HUMAN\r\n+     ASYIFLSK  ,sp|Q8ND76|CCNY_HUMAN; sp|Q8N7R7|CCYL1_HUMAN\r\n+GISCMNTYLSESPFK,sp|Q15181|IPYR_HUMAN\r\n+    KTVYAMDVVYA,sp|P62805|H4_HUMAN\r\n+ADDKYNDYFWK    ,sp|P14625|ENPL_HUMAN\r\n+     ILYPIIFK  ,sp|P82650|RT22_HUMAN\r\n+    TPAYPVVHIR ,sp|P14866|HNRPL_HUMAN\r\n+ ERDHSPYPSVFNSD,sp|Q6UN15|FIP1_HUMAN\r\n+     LLYISGK   ,sp|P06748|NPM_HUMAN\r\n+  LGGSPYSLGTWGS,sp|P14314|GLU2B_HUMAN\r\n+VEHPSVTYPLTVEWR,sp|P01911|2B1F_HUMAN; sp|Q30154|DRB5_HUMAN\r\n+FGGSFAGYFGGAGGH,sp|P52272|HNRPM_HUMAN\r\n+VEHPSVTYPLTVEWS,tr|A0A182DWH4|A0A182DWH4_HUMAN\r\n+ATFHTPFYHLGQSPE,sp|O75521|ECI2_HUMAN\r\n+     NSYFTEHLYR,sp|Q9BTE3|MCMBP_HUMAN\r\n+   AVFPYIVGR   ,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN\r\n+     LGYLSALDIL,sp|Q86VP6|CAND1_HUMAN\r\n+     GMYLVTPLQL,sp|Q08211|DHX9_HUMAN\r\n+    SSMYFLTR   ,sp|Q9Y3I0|RTCB_HUMAN\r\n+       YIFAYFTG,sp|Q9BRA2|TXD17_HUMAN\r\n+    TFNYSTGGLLL,sp|P12956|XRCC6_HUMAN\r\n+    LIFYTITSK  ,sp|P43686|PRS6B_HUMAN\r\n+QAYHTQTYPLIEYYR,sp|P54819|KAD2_HUMAN\r\n+     GQYLVVQFTV,sp|P27797|CALR_HUMAN\r\n+      LYFQHDPET,sp|Q14697|GANAB_HUMAN\r\n+    SPSYKPSLAVA,sp|O60508|PRP17_HUMAN\r\n+   WGTLYDCVVMRD,sp|P51991|ROA3_HUMAN\r\n+    ISVYFYHVK  ,sp|P55884|EIF3B_HUMAN\r\n+   YITAYTFAQAR ,sp|Q99575|POP1_HUMAN\r\n+      GYGIVSAPV,sp|P15880|RS2_HUMAN\r\n+     IGYIYGDLK ,sp|P49588|SYAC_HUMAN\r\n+RPASVSSYAAVEHEQ,sp|Q7Z5L9|I2BP2_HUMAN\r\n+       YTIFSPEG,sp|P25789|PSA4_HUMAN\r\n+      TYFYTALGR,sp|Q9UIA9|XPO7_HUMAN\r\n+GTGSGMGYLLISK  ,sp|P07437|TBB5_HUMAN; sp|Q13509|TBB3_HUMAN; tr|A0A0B4J269|A0A0B4J269_HUMAN\r\n+     LYYILQGDSP,sp|O15042|SR140_HUMAN\r\n+    GRLYPVPVPR ,sp|Q9UKM9|RALY_HUMAN\r\n+  NTLFGYFHVAHSS,sp|Q13586|STIM1_HUMAN\r\n+       YSTTFFEL,sp|Q96JH7|VCIP1_HUMAN\r\n+  RPASVYSSAAVEH,sp|Q7Z5L9|I2BP2_HUMAN\r\n+     DGYFVISK  ,sp|Q8WUK0|PTPM1_HUMAN\r\n+    SDGYFLVR   ,sp|P15498|VAV_HUMAN\r\n+     YHYVNGHNCE,sp|P09651|ROA1_HUMAN; tr|A0A2R8Y4L2|A0A2R8Y4L2_HUMAN\r\n+LSVVSGIYATSEDIP,sp|Q14C86|GAPD1_HUMAN\r\n+   QRSPYPAPAPAP,sp|Q9H7N4|SFR19_HUMAN\r\n+    KQGYIFLAGPP,sp|Q9HCC0|MCCB_HUMAN\r\n+   YNQAYPTFHQWR,sp|Q9UI08|EVL_HUMAN\r\n+     SSYPAPADIA,sp|Q13283|G3BP1_HUMAN\r\n+       YTMVGTPY,sp|Q13177|PAK2_HUMAN\r\n+QASLAANYFTITGHA,sp|P20290|BTF3_HUMAN\r\n+       YTLNEIYF,sp|P47756|CAPZB_HUMAN\r\n+  INQDGYFLVR   ,sp|Q13094|LCP2_HUMAN\r\n+     QRYPSPAPAP,sp|Q9H7N4|SFR19_HUMAN\r\n+      YYIHSQLEH,sp|Q9BWJ5|SF3B5_HUMAN\r\n+YGVSGYPYLK     ,sp|P30101|PDIA3_HUMAN\r\n+   ERPVYMSFPLSS,sp|Q6ZNL6|FGD5_HUMAN\r\n+ TLPLTGYTFHDQIA,sp|P55265|DSRAD_HUMAN\r\n+    QIGYFASVEQF,sp|O60573|IF4E2_HUMAN\r\n+   ARPAYAAVLVPL,sp|Q8WV74|NUDT8_HUMAN\r\n+     GIYCMNTTLS,sp|Q15181|IPYR_HUMAN\r\n+   AVFPYIVGRPR ,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN\r\n+     QGYQYTFSSI,sp|Q08945|SSRP1_HUMAN\r\n+     AGYYFSNQAV,sp|Q14690|RRP5_HUMAN\r\n+       YLTIVDTG,sp|P07900|HS90A_HUMAN\r\n+ ETLVYLYHLDYVDT,sp|O14980|XPO1_HUMAN\r\n+HEEIAMAYVTALR  ,sp|P04075|ALDOA_HUMAN\r\n+      AYFHTPFSH,sp|O75521|ECI2_HUMAN\r\n+SADEPMTYFVVCNEC,sp|P23193|TCEA1_HUMAN\r\n+ASYVNLPYIALCNTD,sp|P08865|RSSA_HUMAN\r\n+TVLSGGTYMYPGIAD,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN\r\n+    ESYYVYVYK  ,sp|P62807|H2B1C_HUMAN; sp|P57053|H2BFS_HUMAN; sp|O60814|H2B1K_HUMAN; sp|Q99880|H2B1L_HUMAN; sp|Q99879|H2B1M_HUMAN; sp|Q99877|H2B1N_HUMAN; sp|Q93079|H2B1H_HUMAN; sp|Q5QNW6|H2B2F_HUMAN; sp|P58876|H2B1D_HUMAN\r\n+      LYFISGR  ,sp|Q15005|SPCS2_HUMAN\r\n'
b
diff -r 000000000000 -r 23eea82f5192 all stuff/test-data/R2 SBf.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/test-data/R2 SBf.csv Wed Jan 16 13:55:22 2019 -0500
b
b'@@ -0,0 +1,127 @@\n+Accession Numbers,Amino Acids,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,Properties,Hydrophobic,Polar,Small,Negative,Postive,Amide,Large Aliphatic,Small Aliphatic,Aromatic,Hydroxy,X,Number of ST,Number of pST\r\n+sp|Q15738|NSDHL_HUMAN,7.238605898123325,1.3404825737265416,4.557640750670242,5.361930294906166,5.093833780160858,6.434316353887399,2.680965147453083,5.898123324396782,5.361930294906166,9.115281501340483,2.680965147453083,5.093833780160858,6.166219839142091,4.557640750670242,5.093833780160858,4.289544235924933,6.166219839142091,8.310991957104557,0.5361930294906166,4.021447721179625,,50.670241286863266,32.707774798927616,49.59785522788203,9.919571045576408,13.136729222520106,9.651474530831099,15.013404825737265,21.983914209115284,9.651474530831099,10.455764075067023,,39,1,373\r\n+sp|Q06830|PRDX1_HUMAN,6.030150753768844,2.0100502512562812,8.040201005025125,3.5175879396984926,7.537688442211055,8.040201005025125,2.0100502512562812,6.532663316582915,9.547738693467336,6.030150753768844,1.507537688442211,3.015075376884422,6.532663316582915,4.522613065326634,3.015075376884422,6.532663316582915,4.522613065326634,7.537688442211055,1.0050251256281406,2.512562814070352,,48.743718592964825,33.66834170854271,52.26130653266331,11.557788944723617,14.572864321608039,7.5376884422110555,12.56281407035176,21.608040201005025,11.055276381909547,11.055276381909549,,22,2,199\r\n+sp|Q9Y371|SHLB1_HUMAN,8.493150684931507,1.36986301369863,4.931506849315069,7.9452054794520555,3.0136986301369864,5.205479452054795,1.095890410958904,4.931506849315069,6.8493150684931505,12.054794520547945,2.4657534246575343,6.027397260273973,3.287671232876712,4.931506849315069,4.931506849315069,7.397260273972603,7.123287671232877,4.10958904109589,0.547945205479452,3.287671232876712,,45.479452054794514,36.71232876712329,47.945205479452056,12.876712328767123,12.876712328767123,10.958904109589042,16.986301369863014,17.808219178082194,6.8493150684931505,14.520547945205479,,53,1,365\r\n+sp|P08238|HS90B_HUMAN,5.248618784530387,0.8287292817679558,7.044198895027624,13.259668508287293,3.591160220994475,4.696132596685083,1.7955801104972375,6.629834254143646,10.359116022099448,8.56353591160221,2.6243093922651934,3.867403314917127,3.1767955801104977,2.9005524861878453,4.41988950276243,6.629834254143646,4.696132596685083,5.801104972375691,0.5524861878453038,3.314917127071823,,41.85082872928177,43.646408839779006,41.988950276243095,20.303867403314918,16.574585635359114,6.767955801104972,15.193370165745858,15.745856353591162,7.458563535911601,11.32596685082873,,82,7,724\r\n+sp|O15530|PDPK1_HUMAN,5.935251798561151,1.618705035971223,5.0359712230215825,7.374100719424461,4.856115107913669,6.115107913669065,2.697841726618705,4.496402877697841,6.83453237410072,10.071942446043165,1.7985611510791366,3.237410071942446,6.83453237410072,5.0359712230215825,5.39568345323741,7.913669064748201,4.676258992805756,4.856115107913669,1.079136690647482,4.136690647482014,,44.96402877697841,35.61151079136691,46.22302158273381,12.410071942446043,14.928057553956835,8.273381294964029,14.568345323741006,16.906474820143885,10.071942446043165,12.589928057553957,,70,15,556\r\n+sp|P22626|ROA2_HUMAN,3.1161473087818696,0.28328611898017,4.815864022662889,6.232294617563739,5.382436260623229,24.362606232294617,1.9830028328611897,2.5495750708215295,5.382436260623229,2.8328611898017,1.9830028328611897,5.382436260623229,4.2492917847025495,2.8328611898017,7.0821529745042495,7.0821529745042495,3.39943342776204,4.53257790368272,0.28328611898017,6.232294617563739,,51.55807365439094,33.711048158640224,57.22379603399433,11.048158640226628,14.44759206798867,8.215297450424929,5.38243626062323,32.01133144475921,11.898016997167138,10.48158640226629,,37,9,353\r\n+sp|Q32P44|EMAL3_HUMAN,6.8080357142857135,2.1205357142857144,5.580357142857143,4.575892857142857,2.7901785714285716,12.834821428571427,2.7901785714285716,3.3482142857142856,3.125,9.151785714285714,1.0044642857142858,2.455357142857143,7.142857142857142,3.2366071428571432,'..b'170506912442393,8.755760368663594,1.3824884792626728,6.221198156682028,8.755760368663594,8.294930875576037,2.0737327188940093,5.529953917050691,3.686635944700461,2.5345622119815667,3.9170506912442393,5.990783410138248,4.147465437788019,7.603686635944701,0.6912442396313364,2.5345622119815667,,51.38248847926267,34.7926267281106,52.995391705069125,12.672811059907833,14.055299539170505,8.064516129032258,14.516129032258064,26.267281105990786,7.142857142857142,10.138248847926267,,44,3,434\r\n+sp|P49368|TCPG_HUMAN,7.339449541284404,1.834862385321101,6.238532110091743,7.706422018348624,0.9174311926605505,6.605504587155964,1.651376146788991,9.541284403669724,6.605504587155964,8.440366972477065,4.036697247706423,3.486238532110092,3.302752293577982,4.587155963302752,6.422018348623854,5.871559633027523,6.238532110091743,6.7889908256880735,0.7339449541284404,1.651376146788991,,47.88990825688073,36.69724770642202,47.70642201834862,13.944954128440367,14.678899082568808,8.073394495412845,17.98165137614679,20.733944954128443,3.302752293577982,12.110091743119266,,66,2,545\r\n+sp|Q9BTE3|MCMBP_HUMAN,4.984423676012461,2.336448598130841,5.451713395638629,6.853582554517133,3.894080996884735,4.205607476635514,2.959501557632399,3.894080996884735,4.8286604361370715,12.77258566978193,2.0249221183800623,5.763239875389408,6.386292834890965,5.451713395638629,4.672897196261682,7.320872274143301,5.14018691588785,6.697819314641744,1.2461059190031152,3.115264797507788,,45.17133956386292,35.98130841121495,48.28660436137071,12.305295950155763,12.461059190031154,11.214953271028037,16.666666666666664,15.88785046728972,8.255451713395638,12.46105919003115,,80,2,642\r\n+sp|Q13126|MTAP_HUMAN,7.420494699646643,3.180212014134275,4.593639575971731,7.06713780918728,2.8268551236749118,7.06713780918728,3.180212014134275,7.773851590106007,6.36042402826855,7.420494699646643,3.180212014134275,2.8268551236749118,4.593639575971731,2.4734982332155475,4.946996466431095,6.007067137809187,9.187279151943462,6.713780918727916,1.4134275618374559,1.76678445229682,,48.76325088339223,31.448763250883392,51.590106007067135,11.660777385159012,14.48763250883392,5.30035335689046,15.194346289752652,21.20141342756184,6.007067137809187,15.19434628975265,,43,3,283\r\n+sp|P50452|SPB8_HUMAN,7.754010695187167,2.6737967914438503,5.614973262032086,9.893048128342247,7.4866310160427805,4.545454545454546,1.8716577540106951,2.941176470588235,8.288770053475936,9.62566844919786,3.4759358288770055,4.010695187165775,2.941176470588235,2.6737967914438503,4.545454545454546,6.149732620320856,5.88235294117647,6.149732620320856,0.8021390374331552,2.6737967914438503,,48.12834224598931,36.89839572192514,45.721925133689844,15.508021390374333,14.705882352941178,6.684491978609626,12.566844919786096,18.44919786096257,10.962566844919786,12.032085561497325,,45,1,374\r\n+sp|P35237|SPB6_HUMAN,6.648936170212766,1.5957446808510638,4.787234042553192,9.308510638297872,7.446808510638298,5.851063829787234,1.3297872340425532,3.4574468085106385,7.180851063829788,9.574468085106384,5.319148936170213,4.521276595744681,2.6595744680851063,3.4574468085106385,3.9893617021276597,7.446808510638298,6.117021276595745,6.382978723404255,0.7978723404255319,2.127659574468085,,49.20212765957447,34.57446808510638,46.01063829787234,14.095744680851062,12.500000000000002,7.9787234042553195,13.031914893617023,18.882978723404257,10.372340425531915,13.563829787234042,,51,1,376\r\n+tr|A0A182DWH4|A0A182DWH4_HUMAN,4.511278195488721,1.8796992481203008,2.631578947368421,7.142857142857142,6.015037593984962,9.398496240601503,3.3834586466165413,1.1278195488721803,3.007518796992481,10.902255639097744,1.5037593984962405,3.3834586466165413,4.135338345864661,5.639097744360902,7.142857142857142,8.270676691729323,5.639097744360902,9.398496240601503,1.5037593984962405,3.3834586466165413,,49.62406015037594,32.330827067669176,49.24812030075188,9.774436090225564,13.533834586466165,9.022556390977444,12.030075187969924,23.308270676691727,10.902255639097744,13.909774436090224,,37,4,266\r\n'
b
diff -r 000000000000 -r 23eea82f5192 all stuff/test-data/R2 subs.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/test-data/R2 subs.csv Wed Jan 16 13:55:22 2019 -0500
b
@@ -0,0 +1,118 @@
+   QATSYASTFVKP,sp|Q15738|NSDHL_HUMAN
+     NSYFSEIFK ,sp|Q06830|PRDX1_HUMAN
+      SYGPTSLFA,sp|Q9Y371|SHLB1_HUMAN
+      QYTIVCHNR,sp|P08238|HS90B_HUMAN
+      AYFMVGSYG,sp|O15530|PDPK1_HUMAN
+       YTFAAFFT,sp|P22626|ROA2_HUMAN
+NSTMLGAYGDYADFQ,sp|Q32P44|EMAL3_HUMAN
+    RPPYAFFLF  ,sp|P41252|SYIC_HUMAN
+VTFIGNSYAIQELFK,sp|Q13283|G3BP1_HUMAN
+       YPGSQAPD,sp|P08238|HS90B_HUMAN; sp|P07900|HS90A_HUMAN
+ AIQGGTYHHLGQNF,sp|Q9Y266|NUDC_HUMAN
+     VHYFPTLK  ,sp|Q8NHW5|RLA0L_HUMAN; sp|P05388|RLA0_HUMAN
+     AGYLFGEGFR,sp|P22314|UBA1_HUMAN
+   DKEVYDDEAEEK,sp|Q4KMQ1|TPRN_HUMAN
+     SSYPLPTISS,sp|O43760|SNG2_HUMAN
+       YPEGEQED,sp|Q9Y277|VDAC3_HUMAN
+    MSAYFIGNSTA,sp|O75521|ECI2_HUMAN
+SPTGPSNYFLANMGG,sp|P68104|EF1A1_HUMAN; sp|Q5VTE0|EF1A3_HUMAN
+      GYLDPVEK ,sp|O60907|TBL1X_HUMAN; sp|Q9BZK7|TBL1R_HUMAN; sp|Q9BQ87|TBL1Y_HUMAN
+    VYAYILNAGTN,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN
+     VCYLAIIDPG,sp|P07900|HS90A_HUMAN
+     LAYFWYYAK ,sp|Q92499|DDX1_HUMAN
+     TGYAEMSSIL,sp|P63167|DYL1_HUMAN; sp|Q96FJ2|DYL2_HUMAN
+    YFPYQALNFAF,sp|P24752|THIL_HUMAN
+      DYLYEAVR ,sp|Q14161|GIT2_HUMAN
+      QYLMWSATW,sp|Q99832|TCPH_HUMAN
+     GGYILAPTVS,sp|A0FGR8|ESYT2_HUMAN
+    IQAYTMAFK  ,sp|Q15019|SEPT2_HUMAN
+SATFIGNYTAIQELF,sp|Q15007|FL2D_HUMAN
+     AGYFIAPPVY,sp|O95218|ZRAB2_HUMAN
+    SAQYFLSEIR ,sp|P22392|NDKB_HUMAN
+      SYFSHYSGL,sp|O00148|DX39A_HUMAN; sp|Q13838|DX39B_HUMAN
+    VNNYTMLGASG,sp|P00558|PGK1_HUMAN
+QATSTASYFVKPIFS,sp|Q14974|IMB1_HUMAN
+CANPAAGYVILLENL,sp|Q8WUA2|PPIL4_HUMAN
+   TGQAYVASGIPA,sp|Q06830|PRDX1_HUMAN
+      TYFLHISK ,sp|P31323|KAP3_HUMAN
+     YHYINGHNCE,sp|Q71DI3|H32_HUMAN; sp|P84243|H33_HUMAN; sp|P68431|H31_HUMAN; sp|Q16695|H31T_HUMAN; tr|Q5TEC6|Q5TEC6_HUMAN; sp|Q6NXT2|H3C_HUMAN
+    ISSYLYQAAAP,sp|P22626|ROA2_HUMAN
+       YPIINESR,sp|Q13263|TIF1B_HUMAN
+    TFTYQETITNA,sp|P67809|YBOX1_HUMAN
+       YPAVKPAA,sp|P23193|TCEA1_HUMAN
+       YPTTVLFT,sp|O15530|PDPK1_HUMAN
+      GYFTLLWPS,sp|P62995|TRA2B_HUMAN
+     GYYFSLTTFS,sp|Q9Y4E1|WAC2C_HUMAN; sp|Q641Q2|WAC2A_HUMAN
+   GAGGYIITNFER,sp|P49915|GUAA_HUMAN
+      IYVTSEVPF,sp|Q16666|IF16_HUMAN
+     SRYPPPVSK ,sp|P11940|PABP1_HUMAN
+   SSGPYSLFAVTV,sp|P55884|EIF3B_HUMAN
+  ASFNDYFVHVTDL,sp|O15530|PDPK1_HUMAN
+   GLGAYFILR   ,sp|P50748|KNTC1_HUMAN
+      AYIISEQQA,sp|P26583|HMGB2_HUMAN;sp|P09429|HMGB1_HUMAN;
+   AAVPYGASTGIY,sp|O95619|YETS4_HUMAN
+EAEAAHGYVTR    ,sp|P49321|NASP_HUMAN
+      SYFAAFFTR,sp|Q14C86|GAPD1_HUMAN
+   EGTEYFADHR  ,sp|P47756|CAPZB_HUMAN
+      QYPSFWILA,sp|Q7Z5L9|I2BP2_HUMAN
+   VGAFYMVCK   ,sp|P27824|CALX_HUMAN
+     FEYFCLDPSL,sp|P49407|ARRB1_HUMAN
+     TGYLTTNQMS,sp|Q07020|RL18_HUMAN
+    GLAYFCLDK  ,sp|Q71DI3|H32_HUMAN; sp|P84243|H33_HUMAN; sp|P68431|H31_HUMAN; sp|Q16695|H31T_HUMAN; tr|Q5TEC6|Q5TEC6_HUMAN; sp|Q6NXT2|H3C_HUMAN
+ ANSFVGYAQYVSPE,sp|Q8IY67|RAVR1_HUMAN
+ GGYTSGYFR     ,sp|Q9UKW4|VAV3_HUMAN
+     TFYTQETITN,sp|P00352|AL1A1_HUMAN
+      KYQTVCNFT,sp|Q7KZF4|SND1_HUMAN
+   VNNSYMLGASGD,sp|P35244|RFA3_HUMAN
+QNTSRPPYMHVDDFV,sp|P54886|P5CS_HUMAN
+PQGEEEEYMETQE  ,tr|A0A096LPI6|A0A096LPI6_HUMAN; sp|P0DPI2|GAL3A_HUMAN; sp|A0A0B4J2D5|GAL3B_HUMAN
+     YPYFIDALR ,sp|Q16563|SYPL1_HUMAN
+   YRPGYVALR   ,sp|E9PAV3|NACAM_HUMAN
+      GYFYQGYR ,sp|Q99547|MPH6_HUMAN
+     VNYLIRPDGE,sp|P49189|AL9A1_HUMAN
+    ALDYMNFDVIK,sp|P54819|KAD2_HUMAN
+       YSGPTSLF,sp|O00567|NOP56_HUMAN
+     ISYTLYQAAA,sp|P62995|TRA2B_HUMAN
+      HYLTQIK  ,sp|O00541|PESC_HUMAN
+     AYYFAMGCWP,sp|Q92900|RENT1_HUMAN
+       YFYNQAIM,sp|P40926|MDHM_HUMAN
+      SYFIEYFGK,tr|H7C0C1|H7C0C1_HUMAN; sp|P48047|ATPO_HUMAN
+   VAVAYPAK    ,sp|P78527|PRKDC_HUMAN
+       YPSVSSPE,sp|Q9Y3D7|TIM16_HUMAN
+      GYIQVITQG,sp|Q9HC35|EMAL4_HUMAN
+TGQATVAYGIPAGWM,sp|P29590|PML_HUMAN
+       YFCQLILD,sp|P31943|HNRH1_HUMAN
+ITALAPSYMK     ,sp|Q01469|FABP5_HUMAN
+     APYIVGK   ,sp|P49321|NASP_HUMAN
+IFPNPEAYFVK    ,sp|Q02878|RL6_HUMAN
+     TFYYAGFEMQ,sp|P18124|RL7_HUMAN
+       YFTTQETI,sp|Q15942|ZYX_HUMAN
+     GTYITLVLK ,sp|P30086|PEBP1_HUMAN
+GASQAGMYGYGMPR ,sp|O43491|E41L2_HUMAN
+       YRSPPPVS,sp|P68104|EF1A1_HUMAN; sp|Q5VTE0|EF1A3_HUMAN
+ VCVEHHYFYR    ,sp|Q8ND56|LS14A_HUMAN
+ VPSLVGYFIR    ,sp|Q99873|ANM1_HUMAN
+      AYLWYVPLS,sp|P55265|DSRAD_HUMAN
+     SPYWFGIPR ,sp|P53597|SUCA_HUMAN
+LLYNRPGYVSSLK  ,sp|O15530|PDPK1_HUMAN
+  SSGPTYLFAVTVA,sp|C9JLW8|MCRI1_HUMAN
+AGYDPTPYMR     ,sp|P25098|ARBK1_HUMAN
+     GGYMVTYGGM,sp|Q86U90|YRDC_HUMAN
+    GNPYVEVDLFT,sp|P52209|6PGD_HUMAN
+    RVAYPVDWK  ,sp|Q9UQ35|SRRM2_HUMAN
+AGGPRPEYPVPAGR ,sp|Q6PI48|SYDM_HUMAN
+       YKLQIHR ,sp|P12956|XRCC6_HUMAN
+    GYPYLLWFR  ,sp|Q9NVI7|ATD3A_HUMAN; sp|Q5T9A4|ATD3B_HUMAN
+GAPGQPGYILR    ,sp|Q9NZL9|MAT2B_HUMAN
+     TAYITEPR  ,sp|Q2KHR2|RFX7_HUMAN
+    QATYTASTFVK,sp|P26373|RL13_HUMAN
+       YSFSHYSG,sp|P53396|ACLY_HUMAN
+   ERSPYPSFR   ,sp|O00148|DX39A_HUMAN; sp|Q13838|DX39B_HUMAN
+  SPTGPYNSFLANM,sp|Q71UI9|H2AV_HUMAN; sp|P0C0S5|H2AZ_HUMAN
+FLAGYDPYPTMR   ,sp|P06733|ENOA_HUMAN
+     STYLFSR   ,sp|O60907|TBL1X_HUMAN; sp|Q9BZK7|TBL1R_HUMAN; sp|Q9BQ87|TBL1Y_HUMAN
+       YGTLTTNQ,sp|P49368|TCPG_HUMAN
+   LAPGYIVEVWK ,sp|Q9BTE3|MCMBP_HUMAN
+  AIQGGYSHHLGQN,sp|Q13126|MTAP_HUMAN
+ VCNYGLYFTQK   ,sp|P50452|SPB8_HUMAN; sp|P35237|SPB6_HUMAN
+    DAVYYTEHAK ,tr|A0A182DWH4|A0A182DWH4_HUMAN
b
diff -r 000000000000 -r 23eea82f5192 all stuff/test-data/S1.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/test-data/S1.csv Wed Jan 16 13:55:22 2019 -0500
b
b'@@ -0,0 +1,698 @@\n+Substrates,Species,Reference,X.7,X.6,X.5,X.4,X.3,X.2,X.1,X0,X1,X2,X3,X4,X5,X6,X7,PhosphiteA,B\n+,,sp|Q13283|G3BP1_HUMAN,,,,,,,,xS,S,S,P,A,P,A,D,,\n+,,sp|Q08043|ACTN3_HUMAN; sp|P35609|ACTN2_HUMAN; sp|P12814|ACTN1_HUMAN; sp|O43707|ACTN4_HUMAN,,,,,,T,F,xT,A,W,C,N,S,H,L,,\n+,,sp|P19338|NUCL_HUMAN,D,E,E,E,D,D,D,xS,E,E,D,E,E,D,D,,\n+,,sp|P19338|NUCL_HUMAN,,,A,A,A,P,A,xS,E,D,E,D,D,E,D,,\n+,,sp|Q15738|NSDHL_HUMAN,,,V,A,L,A,G,xT,F,H,Y,Y,S,C,E,,\n+,,sp|P13639|EF2_HUMAN,,,,,,T,G,xT,I,T,T,F,E,H,A,,\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN,L,Y,A,N,T,V,L,xS,G,G,T,T,M,Y,P,,\n+,,sp|P11021|BIP_HUMAN,,,,,,,V,xT,H,A,V,V,T,V,P,,\n+,,sp|Q9HD20|AT131_HUMAN,,,,,,T,G,xT,L,T,S,D,S,L,V,,\n+,,sp|Q9UQB8|BAIP2_HUMAN,,,,Q,R,P,Y,xS,V,A,V,P,A,F,S,,\n+,,sp|Q9UQ35|SRRM2_HUMAN,T,S,T,Q,R,P,S,xS,P,E,T,A,T,K,,,\n+,,sp|Q06830|PRDX1_HUMAN,,,,,,,,xT,I,A,Q,D,Y,G,V,,\n+,,sp|Q9Y371|SHLB1_HUMAN,,L,A,A,D,A,G,xT,F,L,S,R,,,,,\n+,,sp|P08238|HS90B_HUMAN,,,I,E,D,V,G,xS,D,E,E,D,D,S,G,,\n+,,sp|Q14974|IMB1_HUMAN,,,,,,,T,xT,L,V,I,M,E,R,,,\n+,,sp|P09651|ROA1_HUMAN; sp|Q32P51|RA1L2_HUMAN,S,H,F,E,Q,W,G,xT,L,T,D,C,V,V,M,,\n+,,sp|O15530|PDPK1_HUMAN,,,,,,,,xT,Y,Y,L,M,D,P,S,,\n+,,sp|Q14258|TRI25_HUMAN,,,,,,F,D,xT,I,Y,Q,I,L,L,K,,\n+,,sp|Q9Y5S9|RBM8A_HUMAN,,,,,,G,Y,xT,L,V,E,Y,E,T,Y,,\n+,,sp|P54136|SYRC_HUMAN,,,,,,,,xS,T,I,I,G,E,S,I,,\n+,,sp|P30086|PEBP1_HUMAN,,,G,N,D,I,S,xS,G,T,V,L,S,D,Y,,\n+,,sp|P22626|ROA2_HUMAN,,,G,F,G,F,V,xT,F,S,S,M,A,E,V,,\n+,,sp|P19338|NUCL_HUMAN,,A,A,A,A,P,A,xS,E,D,E,D,D,E,D,,\n+,,sp|Q32P44|EMAL3_HUMAN,,,,G,R,P,I,xT,M,Y,I,P,S,G,I,,\n+,,sp|Q8NBS9|TXND5_HUMAN,,,,,G,Y,P,xT,L,L,L,F,R,,,,\n+,,sp|P41252|SYIC_HUMAN,,,,,,T,G,xT,I,V,V,E,G,H,E,,\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN,,T,V,L,S,G,G,xT,T,M,Y,P,G,I,A,,\n+,,sp|Q96JH7|VCIP1_HUMAN,,,,,,S,S,xT,T,F,F,E,L,Q,E,,\n+,,sp|P09211|GSTP1_HUMAN,,,,,,,,xT,V,V,Y,F,P,V,R,,\n+,,sp|Q13283|G3BP1_HUMAN,,,,,,,S,xS,S,P,A,P,A,D,I,,\n+,,sp|P04083|ANXA1_HUMAN,S,A,V,S,P,Y,P,xT,F,N,P,S,S,D,V,,\n+,,sp|P07195|LDHB_HUMAN,,,,,,G,L,xT,S,V,I,N,Q,K,,,\n+,,sp|P08238|HS90B_HUMAN; sp|P07900|HS90A_HUMAN,,,,,,S,L,xT,N,D,W,E,D,H,L,,\n+,,sp|P49321|NASP_HUMAN,G,S,G,D,A,V,P,xS,G,N,E,V,S,E,N,,\n+,,sp|P68363|TBA1B_HUMAN; sp|Q71U36|TBA1A_HUMAN; sp|Q9BQE3|TBA1C_HUMAN,D,V,N,A,A,I,A,xT,I,K,,,,,,,\n+,,sp|P14625|ENPL_HUMAN,P,E,E,E,P,E,E,xT,A,E,D,T,T,E,D,,\n+,,sp|Q14554|PDIA5_HUMAN,,,,,G,Y,P,xT,F,H,Y,Y,H,Y,G,,\n+,,sp|Q15007|FL2D_HUMAN,,,,,,,S,xT,M,V,D,P,A,I,N,,\n+,,sp|Q9Y266|NUDC_HUMAN,,L,I,T,Q,T,F,xS,H,H,N,Q,L,A,Q,,\n+,,sp|P06748|NPM_HUMAN,,,,,,T,V,xS,L,G,A,G,A,K,,,\n+,,sp|O14776|TCRG1_HUMAN,,,,,,,A,xT,F,S,E,F,A,A,K,,\n+,,sp|O75534|CSDE1_HUMAN,,,,,,L,L,xT,S,Y,G,F,I,Q,C,,\n+,,sp|O15294|OGT1_HUMAN,L,C,P,T,H,A,D,xS,L,N,N,L,A,N,I,,\n+,,sp|Q8NHW5|RLA0L_HUMAN; sp|P05388|RLA0_HUMAN,,,,,,,,xT,S,F,F,Q,A,L,G,,\n+,,sp|Q9UKX7|NUP50_HUMAN,,,,,G,I,G,xT,L,H,L,K,P,T,A,,\n+,,sp|O43312|MTSS1_HUMAN,,,,R,P,A,S,xT,A,G,L,P,T,T,L,,\n+,,sp|Q00613|HSF1_HUMAN,D,T,E,G,R,P,P,xS,P,P,P,T,S,T,P,,\n+,,sp|P47712|PA24A_HUMAN,,,,,,Y,G,xT,F,M,A,P,D,L,F,,\n+,,sp|P68363|TBA1B_HUMAN; sp|Q71U36|TBA1A_HUMAN; sp|Q9BQE3|TBA1C_HUMAN,T,I,G,G,G,D,D,xS,F,N,T,F,F,S,E,,\n+,,sp|P22314|UBA1_HUMAN,,,,,,L,Q,xT,S,S,V,L,V,S,G,,\n+,,sp|P53597|SUCA_HUMAN,,,,,,Q,G,xT,F,H,S,Q,Q,A,L,,\n+,,sp|O75746|CMC1_HUMAN,,,,,,L,A,xT,A,T,F,A,G,I,E,,\n+,,sp|Q96A26|F162A_HUMAN,,,,,,H,E,xT,L,T,S,L,N,L,E,,\n+,,sp|P51570|GALK1_HUMAN,I,Q,E,H,Y,G,G,xT,A,T,F,Y,L,S,Q,,\n+,,sp|Q15005|SPCS2_HUMAN,,F,F,D,H,S,G,xT,L,V,M,D,A,Y,E,,\n+,,sp|P30101|PDIA3_HUMAN,,,,,G,F,P,xT,I,Y,F,S,P,A,N,,\n+,,sp|P23193|TCEA1_HUMAN; sp|Q15560|TCEA2_HUMAN,,,,,T,G,G,xT,Q,T,D,L,F,T,C,,\n+,,sp|Q16851|UGPA_HUMAN,,,,,,I,Y,xT,F,N,Q,S,R,,,,\n+,,sp|P62995|TRA2B_HUMAN,,,,,R,P,H,xT,P,T,P,G,I,Y,,,\n+,,sp|Q13596|SNX1_HUMAN,,,A,V,G,T,Q,xT,L,S,G,A,G,L,L,,\n+,,sp|Q4KMQ1|TPRN_HUMAN,,,,W,Q,R,P,xS,S,P,P,P,F,L,P,,\n+,,sp|Q13200|PSMD2_HUMAN,,,,,,,G,xT,L,T,L,C,P,Y,H,,\n+,,sp|Q9UQ35|SRRM2_HUMAN,,,,,,,,xS,P,V,P,S,A,F,S,,\n+,,sp|P42166|LAP2A_HUMAN,S,D,E,E,R,E,P,xT,P,V,L,G,S,G,A,,\n+,,sp|O43776|SYNC_HUMAN,,,,K,E,D,G,xT,F,Y,E,F,G,E,D,,\n+,,sp|Q9HCC0|MCCB_HUMAN,,,,,,Q,G,xT,I,F,L,A,G,P,P,,\n+,,sp|P42166|LAP2A_HUMAN,,,,,,G,G,xT,L,F,'..b',\n+,,sp|P07737|PROF1_HUMAN,,,,,,,,xT,L,V,L,L,M,G,K,,\n+,,sp|Q6NYC8|PPR18_HUMAN,,,,,,,,xS,G,H,T,F,T,V,N,,\n+,,sp|Q8NBQ5|DHB11_HUMAN,,,,,,V,H,xT,F,V,V,D,C,S,N,,\n+,,sp|Q14C86|GAPD1_HUMAN,L,S,V,V,S,G,I,xS,A,T,S,E,D,I,P,,\n+,,sp|Q9H7N4|SFR19_HUMAN,,,,Q,R,S,P,xS,P,A,P,A,P,A,P,,\n+,,sp|P84090|ERH_HUMAN,,,,,,S,H,xT,I,L,L,V,Q,P,T,,\n+,,sp|P04406|G3P_HUMAN,,,,,V,I,I,xS,A,P,S,A,D,A,P,,\n+,,sp|Q9Y5K5|UCHL5_HUMAN,,,,,,L,D,xT,I,F,F,A,K,,,,\n+,,sp|Q68EM7|RHG17_HUMAN,,,,K,R,P,A,xS,M,A,V,M,E,G,D,,\n+,,sp|Q9HCC0|MCCB_HUMAN,,,,,K,Q,G,xT,I,F,L,A,G,P,P,,\n+,,sp|Q9UI08|EVL_HUMAN,,,,Y,N,Q,A,xT,P,T,F,H,Q,W,R,,\n+,,sp|Q13283|G3BP1_HUMAN,,,,,,S,S,xS,P,A,P,A,D,I,A,,\n+,,sp|Q13177|PAK2_HUMAN,,,,,,,,xS,T,M,V,G,T,P,Y,,\n+,,sp|P26641|EF1G_HUMAN,,,,,,,,xS,T,F,V,L,D,E,F,,\n+,,sp|O15530|PDPK1_HUMAN,L,G,E,G,S,F,S,xT,V,V,L,A,R,,,,\n+,,sp|P20292|AL5AP_HUMAN,,,,,,T,G,xT,L,A,F,E,R,,,,\n+,,sp|Q99460|PSMD1_HUMAN,,S,N,C,K,P,S,xT,F,A,Y,P,A,P,L,,\n+,,sp|P20290|BTF3_HUMAN,Q,A,S,L,A,A,N,xT,F,T,I,T,G,H,A,,\n+,,sp|P68363|TBA1B_HUMAN; sp|Q71U36|TBA1A_HUMAN; sp|Q9BQE3|TBA1C_HUMAN,G,G,D,D,S,F,N,xT,F,F,S,E,T,G,A,,\n+,,sp|Q01469|FABP5_HUMAN,,,,,K,T,Q,xT,V,C,N,F,T,D,G,,\n+,,sp|P47756|CAPZB_HUMAN,,,,,,,,xS,T,L,N,E,I,Y,F,,\n+,,sp|Q13094|LCP2_HUMAN,,,I,N,Q,D,G,xT,F,L,V,R,,,,,\n+,,sp|Q9H7N4|SFR19_HUMAN,,,,,,Q,R,xS,P,S,P,A,P,A,P,,\n+,,sp|P04040|CATA_HUMAN,,,,N,A,I,H,xT,F,V,Q,S,G,S,H,,\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN,,,,I,W,H,H,xT,F,Y,N,E,L,R,,,\n+,,sp|Q9BWJ5|SF3B5_HUMAN,,,,,,,Y,xT,I,H,S,Q,L,E,H,,\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN,,,,,,,H,xT,F,Y,N,E,L,R,,,\n+,,sp|P30101|PDIA3_HUMAN,Y,G,V,S,G,Y,P,xT,L,K,,,,,,,\n+,,sp|Q00839|HNRPU_HUMAN,,,,,,L,N,xT,L,L,Q,R,,,,,\n+,,sp|Q6ZNL6|FGD5_HUMAN,,,,E,R,P,V,xS,M,S,F,P,L,S,S,,\n+,,sp|P55265|DSRAD_HUMAN,,T,L,P,L,T,G,xS,T,F,H,D,Q,I,A,,\n+,,sp|O60573|IF4E2_HUMAN,,,,,Q,I,G,xT,F,A,S,V,E,Q,F,,\n+,,sp|P52272|HNRPM_HUMAN,,,,,L,G,S,xT,V,F,V,A,N,L,D,,\n+,,sp|Q9UQ35|SRRM2_HUMAN,F,S,E,P,G,T,T,xS,T,Q,R,P,S,S,P,,\n+,,sp|Q9BRB3|PIGQ_HUMAN,,,,,,G,G,xT,F,W,S,C,E,A,T,,\n+,,sp|Q96A35|RM24_HUMAN,,,,,,,G,xT,M,I,P,S,E,A,P,,\n+,,sp|Q8WV74|NUDT8_HUMAN,,,,A,R,P,A,xS,A,A,V,L,V,P,L,,\n+,,sp|Q07020|RL18_HUMAN,,,,,T,N,S,xT,F,N,Q,V,V,L,K,,\n+,,sp|P22626|ROA2_HUMAN,,,G,F,G,F,V,xT,F,D,D,H,D,P,V,,\n+,,sp|Q15181|IPYR_HUMAN,,,,,,G,I,xS,C,M,N,T,T,L,S,,\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN,,,,A,V,F,P,xS,I,V,G,R,P,R,,,\n+,,sp|P23246|SFPQ_HUMAN,,,,,,F,A,xT,H,A,A,A,L,S,V,,\n+,,sp|Q15366|PCBP2_HUMAN,I,P,Y,R,P,K,P,xS,S,S,P,V,I,F,A,,\n+,,sp|P26641|EF1G_HUMAN,,,,,A,A,G,xT,L,Y,T,Y,P,E,N,,\n+,,sp|P11142|HSP7C_HUMAN; sp|P0DMV9|HS71B_HUMAN; sp|P0DMV8|HS71A_HUMAN,,,,,T,T,P,xS,Y,V,A,F,T,D,T,,\n+,,sp|P35269|T2FA_HUMAN,,G,N,S,R,P,G,xT,P,S,A,E,G,G,S,,\n+,,sp|P49327|FAS_HUMAN,,,,S,F,Y,G,xS,T,L,F,L,C,R,,,\n+,,sp|Q08945|SSRP1_HUMAN,,,,,,Q,G,xT,Q,Y,T,F,S,S,I,,\n+,,sp|Q14690|RRP5_HUMAN,,,,,,A,G,xT,Y,F,S,N,Q,A,V,,\n+,,sp|P07900|HS90A_HUMAN,,,,,,,,xT,L,T,I,V,D,T,G,,\n+,,sp|O14980|XPO1_HUMAN,,E,T,L,V,Y,L,xT,H,L,D,Y,V,D,T,,\n+,,sp|P04075|ALDOA_HUMAN,H,E,E,I,A,M,A,xT,V,T,A,L,R,,,,\n+,,sp|O75521|ECI2_HUMAN,,,,,,,A,xT,F,H,T,P,F,S,H,,\n+,,sp|P21796|VDAC1_HUMAN,,,,,,,L,xT,F,D,S,S,F,S,P,,\n+,,sp|Q86VP6|CAND1_HUMAN,,,,,,A,L,xT,L,I,A,G,S,P,L,,\n+,,sp|P23193|TCEA1_HUMAN,S,A,D,E,P,M,T,xT,F,V,V,C,N,E,C,,\n+,,sp|P04080|CYTB_HUMAN,,S,Q,V,V,A,G,xT,N,Y,F,I,K,,,,\n+,,sp|P06748|NPM_HUMAN,D,E,N,E,H,Q,L,xS,L,R,,,,,,,\n+,,sp|Q96C86|DCPS_HUMAN,,,,,,,,xT,T,V,V,Y,P,A,T,,\n+,,sp|P61604|CH10_HUMAN,,,,V,L,Q,A,xT,V,V,A,V,G,S,G,,\n+,,sp|P08865|RSSA_HUMAN,A,S,Y,V,N,L,P,xT,I,A,L,C,N,T,D,,\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN,T,V,L,S,G,G,T,xT,M,Y,P,G,I,A,D,,\n+,,sp|O00461|GOLI4_HUMAN,Y,G,E,N,D,E,N,xT,D,D,K,N,N,D,G,,\n+,,sp|P19338|NUCL_HUMAN,,,,,,S,I,xS,L,Y,Y,T,G,E,K,,\n+,,sp|P30086|PEBP1_HUMAN,,,,,,L,Y,xT,L,V,L,T,D,P,D,,\n+,,sp|O00299|CLIC1_HUMAN,,,,,,G,V,xT,F,N,V,T,T,V,D,,\n+,,sp|P62807|H2B1C_HUMAN; sp|P57053|H2BFS_HUMAN; sp|O60814|H2B1K_HUMAN; sp|Q99880|H2B1L_HUMAN; sp|Q99879|H2B1M_HUMAN; sp|Q99877|H2B1N_HUMAN; sp|Q93079|H2B1H_HUMAN; sp|Q5QNW6|H2B2F_HUMAN; sp|P58876|H2B1D_HUMAN,,,,,E,S,Y,xS,V,Y,V,Y,K,,,,\n+,,sp|Q15005|SPCS2_HUMAN,,,,,,,L,xT,F,I,S,G,R,,,,\n'
b
diff -r 000000000000 -r 23eea82f5192 all stuff/test-data/S2.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/test-data/S2.csv Wed Jan 16 13:55:22 2019 -0500
b
b'@@ -0,0 +1,524 @@\n+Substrates,Species,Reference,X.7,X.6,X.5,X.4,X.3,X.2,X.1,X0,X1,X2,X3,X4,X5,X6,X7,PhosphiteA,B\n+,,sp|P19338|NUCL_HUMAN,,,A,A,A,P,A,xS,E,D,E,D,D,E,D,,\n+,,sp|P13639|EF2_HUMAN,,,,,,T,G,xT,I,T,T,F,E,H,A,,\n+,,sp|P60709|ACTB_HUMAN,L,Y,A,N,T,V,L,xS,G,G,T,T,M,Y,P,,\n+,,sp|P11021|BIP_HUMAN,,,,,,,V,xT,H,A,V,V,T,V,P,,\n+,,sp|P56556|NDUA6_HUMAN,,,,Q,A,T,S,xT,A,S,T,F,V,K,P,,\n+,,sp|Q9HD20|AT131_HUMAN,,,,,,T,G,xT,L,T,S,D,S,L,V,,\n+,,sp|Q9UQB8|BAIP2_HUMAN,,,,Q,R,P,Y,xS,V,A,V,P,A,F,S,,\n+,,sp|Q9UQ35|SRRM2_HUMAN,T,S,T,Q,R,P,S,xS,P,E,T,A,T,K,,,\n+,,sp|Q06830|PRDX1_HUMAN,,,,,,,,xT,I,A,Q,D,Y,G,V,,\n+,,sp|P08238|HS90B_HUMAN,,,I,E,D,V,G,xS,D,E,E,D,D,S,G,,\n+,,sp|Q14974|IMB1_HUMAN,,,,,,,T,xT,L,V,I,M,E,R,,,\n+,,sp|P29401|TKT_HUMAN,,,,,,N,S,xT,F,S,E,I,F,K,,,\n+,,sp|Q00839|HNRPU_HUMAN,,,,,,,S,xS,G,P,T,S,L,F,A,,\n+,,sp|Q16531|DDB1_HUMAN,,,,,,,Q,xS,T,I,V,C,H,N,R,,\n+,,sp|O15530|PDPK1_HUMAN,,,,,,,,xT,Y,Y,L,M,D,P,S,,\n+,,sp|Q14258|TRI25_HUMAN,,,,,,F,D,xT,I,Y,Q,I,L,L,K,,\n+,,sp|P52566|GDIR2_HUMAN,,,,,,,A,xT,F,M,V,G,S,Y,G,,\n+,,sp|Q9Y5S9|RBM8A_HUMAN,,,,,,G,Y,xT,L,V,E,Y,E,T,Y,,\n+,,sp|P30086|PEBP1_HUMAN,,,G,N,D,I,S,xS,G,T,V,L,S,D,Y,,\n+,,sp|P19338|NUCL_HUMAN,,A,A,A,A,P,A,xS,E,D,E,D,D,E,D,,\n+,,sp|Q32P44|EMAL3_HUMAN,,,,G,R,P,I,xT,M,Y,I,P,S,G,I,,\n+,,sp|P29401|TKT_HUMAN,,,,,,,,xS,T,F,A,A,F,F,T,,\n+,,sp|P41252|SYIC_HUMAN,,,,,,T,G,xT,I,V,V,E,G,H,E,,\n+,,sp|P28070|PSB4_HUMAN,N,S,T,M,L,G,A,xS,G,D,Y,A,D,F,Q,,\n+,,sp|P09211|GSTP1_HUMAN,,,,,,,,xT,V,V,Y,F,P,V,R,,\n+,,sp|P09429|HMGB1_HUMAN; sp|P26583|HMGB2_HUMAN,,,,,R,P,P,xS,A,F,F,L,F,,,,\n+,,sp|P04083|ANXA1_HUMAN,S,A,V,S,P,Y,P,xT,F,N,P,S,S,D,V,,\n+,,sp|P07900|HS90A_HUMAN; sp|P08238|HS90B_HUMAN,,,,,,S,L,xT,N,D,W,E,D,H,L,,\n+,,sp|P49321|NASP_HUMAN,G,S,G,D,A,V,P,xS,G,N,E,V,S,E,N,,\n+,,sp|P07437|TBB5_HUMAN,V,T,F,I,G,N,S,xT,A,I,Q,E,L,F,K,,\n+,,sp|P68363|TBA1B_HUMAN; sp|Q71U36|TBA1A_HUMAN,D,V,N,A,A,I,A,xT,I,K,,,,,,,\n+,,sp|P14625|ENPL_HUMAN,P,E,E,E,P,E,E,xT,A,E,D,T,T,E,D,,\n+,,sp|P10075|GLI4_HUMAN,,,,,,,,xS,P,G,S,Q,A,P,D,,\n+,,sp|Q14554|PDIA5_HUMAN,,,,,G,Y,P,xT,F,H,Y,Y,H,Y,G,,\n+,,sp|Q9Y266|NUDC_HUMAN,,L,I,T,Q,T,F,xS,H,H,N,Q,L,A,Q,,\n+,,sp|P06748|NPM_HUMAN,,,,,,T,V,xS,L,G,A,G,A,K,,,\n+,,sp|O75534|CSDE1_HUMAN,,,,,,L,L,xT,S,Y,G,F,I,Q,C,,\n+,,sp|Q8NHW5|RLA0L_HUMAN; sp|P05388|RLA0_HUMAN,,,,,,,,xT,S,F,F,Q,A,L,G,,\n+,,sp|P07814|SYEP_HUMAN,,A,I,Q,G,G,T,xS,H,H,L,G,Q,N,F,,\n+,,sp|Q00613|HSF1_HUMAN,D,T,E,G,R,P,P,xS,P,P,P,T,S,T,P,,\n+,,sp|P47712|PA24A_HUMAN,,,,,,Y,G,xT,F,M,A,P,D,L,F,,\n+,,sp|P68363|TBA1B_HUMAN; sp|Q71U36|TBA1A_HUMAN,T,I,G,G,G,D,D,xS,F,N,T,F,F,S,E,,\n+,,sp|P53597|SUCA_HUMAN,,,,,,Q,G,xT,F,H,S,Q,Q,A,L,,\n+,,sp|P07237|PDIA1_HUMAN,,,,,,V,H,xS,F,P,T,L,K,,,,\n+,,sp|Q96A26|F162A_HUMAN,,,,,,H,E,xT,L,T,S,L,N,L,E,,\n+,,sp|P51570|GALK1_HUMAN,I,Q,E,H,Y,G,G,xT,A,T,F,Y,L,S,Q,,\n+,,sp|P30101|PDIA3_HUMAN,,,,,G,F,P,xT,I,Y,F,S,P,A,N,,\n+,,sp|P23193|TCEA1_HUMAN; sp|Q15560|TCEA2_HUMAN,,,,,T,G,G,xT,Q,T,D,L,F,T,C,,\n+,,sp|Q16851|UGPA_HUMAN,,,,,,I,Y,xT,F,N,Q,S,R,,,,\n+,,sp|Q9NVI7|ATD3A_HUMAN; sp|Q5T9A4|ATD3B_HUMAN,,,,,,A,G,xT,L,F,G,E,G,F,R,,\n+,,sp|Q4KMQ1|TPRN_HUMAN,,,,W,Q,R,P,xS,S,P,P,P,F,L,P,,\n+,,sp|Q13200|PSMD2_HUMAN,,,,,,,G,xT,L,T,L,C,P,Y,H,,\n+,,sp|Q9UQ35|SRRM2_HUMAN,,,,,,,,xS,P,V,P,S,A,F,S,,\n+,,sp|O43776|SYNC_HUMAN,,,,K,E,D,G,xT,F,Y,E,F,G,E,D,,\n+,,sp|Q9HCC0|MCCB_HUMAN,,,,,,Q,G,xT,I,F,L,A,G,P,P,,\n+,,sp|P42166|LAP2A_HUMAN,,,,,,G,G,xT,L,F,G,G,E,V,C,,\n+,,sp|O43760|SNG2_HUMAN,,,,,A,G,G,xS,F,D,L,R,,,,,\n+,,sp|P30086|PEBP1_HUMAN,G,N,D,I,S,S,G,xT,V,L,S,D,Y,V,G,,\n+,,sp|P19338|NUCL_HUMAN,,,,V,A,V,A,xT,P,A,K,K,,,,,\n+,,sp|Q8TD47|RS4Y2_HUMAN; sp|P62701|RS4X_HUMAN,,,,,,,L,xT,I,A,E,E,R,,,,\n+,,sp|P07900|HS90A_HUMAN,,,,D,K,E,V,xS,D,D,E,A,E,E,K,,\n+,,sp|Q6UN15|FIP1_HUMAN,,,,E,R,D,H,xS,P,T,P,S,V,F,N,,\n+,,sp|P40926|MDHM_HUMAN,,A,G,A,G,S,A,xT,L,S,M,A,Y,A,G,,\n+,,sp|P00387|NB5R3_HUMAN,N,L,V,V,R,P,Y,xT,P,I,S,S,D,D,D,,\n+,,sp|P07737|PROF1_HUMAN,,,,,,,S,xS,F,Y,V,N,G,L,T,,\n+,,sp|Q9NYF8|BCLF1_HUMAN,,,,,S,P,H,xS,P,S,P,I,A,T,P,,\n+,,sp|P49189|AL9A1_HUMAN,,,,,V,I,A,xT,F,T,C,S,G,E,K,,\n+,,sp|P42166|LAP2A_HUMAN,,,,,,S,S,xT,P,L,P,T,I,S,S,,\n+,,sp|P62942|FKB1A_HUMAN,,,,,R,G,Q,xT,C,V,V,H,Y,T,G,,\n+,,sp|P1102'..b'D,,\n+,,sp|Q15366|PCBP2_HUMAN,P,Y,R,P,K,P,S,xS,S,P,V,I,F,A,G,,\n+,,sp|O75436|VP26A_HUMAN,A,G,Y,D,P,T,P,xT,M,R,,,,,,,\n+,,sp|Q9BV79|MECR_HUMAN,,,,,,G,G,xT,M,V,T,Y,G,G,M,,\n+,,sp|O75643|U520_HUMAN,,,,,,S,P,xT,L,Y,G,I,S,H,D,,\n+,,sp|P06733|ENOA_HUMAN,,,,,G,N,P,xT,V,E,V,D,L,F,T,,\n+,,sp|P30041|PRDX6_HUMAN,,,,,R,V,A,xT,P,V,D,W,K,,,,\n+,,sp|Q9BVG9|PTSS2_HUMAN,A,G,G,P,R,P,E,xS,P,V,P,A,G,R,,,\n+,,sp|Q9Y6X4|F169A_HUMAN,,,,,R,P,M,xS,G,E,Y,G,P,A,S,,\n+,,sp|P22626|ROA2_HUMAN,,,,,,I,D,xT,I,E,I,I,T,D,R,,\n+,,sp|O15530|PDPK1_HUMAN,,,I,L,G,E,G,xS,F,S,T,V,V,L,A,,\n+,,sp|Q7L2R6|ZN765_HUMAN,,,,,,,,xS,K,L,Q,I,H,R,,,\n+,,sp|P62937|PPIA_HUMAN,,,,,V,N,P,xT,V,F,F,D,I,A,V,,\n+,,sp|Q9NYF8|BCLF1_HUMAN,,,,,,,,xS,P,H,S,P,S,P,I,,\n+,,sp|P20042|IF2B_HUMAN,,,,,,,,xT,S,F,V,N,F,T,D,,\n+,,sp|Q2KHR2|RFX7_HUMAN,,,,,,S,P,xT,T,V,L,F,T,S,S,,\n+,,sp|Q15365|PCBP1_HUMAN,,,,,,A,I,xT,I,A,G,V,P,Q,S,,\n+,,sp|Q8NBS9|TXND5_HUMAN,,,,,G,Y,P,xT,L,L,W,F,R,,,,\n+,,sp|Q8IYB3|SRRM1_HUMAN,,,,,,,,xS,P,S,P,A,P,P,P,,\n+,,sp|Q07020|RL18_HUMAN,,T,A,V,V,V,G,xT,I,T,D,D,V,R,,,\n+,,sp|Q13200|PSMD2_HUMAN,,,,,G,T,L,xT,L,C,P,Y,H,S,D,,\n+,,sp|P51610|HCFC1_HUMAN,G,A,P,G,Q,P,G,xT,I,L,R,,,,,,\n+,,sp|Q6NYC8|PPR18_HUMAN,,,,,S,G,H,xT,F,T,V,N,P,R,,,\n+,,sp|Q15029|U5S1_HUMAN,,,,,,T,A,xT,I,T,E,P,R,,,,\n+,,sp|Q8IYD1|ERF3B_HUMAN; sp|P15170|ERF3A_HUMAN,,,,,T,A,G,xT,I,C,L,E,T,F,K,,\n+,,sp|P56556|NDUA6_HUMAN,,,,,Q,A,T,xS,T,A,S,T,F,V,K,,\n+,,sp|P07737|PROF1_HUMAN,,,,,,,,xT,L,V,L,L,M,G,K,,\n+,,sp|Q8NC51|PAIRB_HUMAN,,,,,,,,xS,S,F,S,H,Y,S,G,,\n+,,sp|Q6NYC8|PPR18_HUMAN,,,,,,,,xS,G,H,T,F,T,V,N,,\n+,,sp|Q8NBQ5|DHB11_HUMAN,,,,,,V,H,xT,F,V,V,D,C,S,N,,\n+,,sp|P84090|ERH_HUMAN,,,,,,S,H,xT,I,L,L,V,Q,P,T,,\n+,,sp|P04406|G3P_HUMAN,,,,,V,I,I,xS,A,P,S,A,D,A,P,,\n+,,sp|Q9Y5K5|UCHL5_HUMAN,,,,,,L,D,xT,I,F,F,A,K,,,,\n+,,sp|P49585|PCY1A_HUMAN,,,,E,R,S,P,xS,P,S,F,R,,,,,\n+,,sp|Q68EM7|RHG17_HUMAN,,,,K,R,P,A,xS,M,A,V,M,E,G,D,,\n+,,sp|Q96I25|SPF45_HUMAN,,,S,P,T,G,P,xS,N,S,F,L,A,N,M,,\n+,,sp|O75436|VP26A_HUMAN,F,L,A,G,Y,D,P,xT,P,T,M,R,,,,,\n+,,sp|P53396|ACLY_HUMAN,,,,,,S,T,xT,L,F,S,R,,,,,\n+,,sp|P26641|EF1G_HUMAN,,,,,,,,xS,T,F,V,L,D,E,F,,\n+,,sp|O15530|PDPK1_HUMAN,L,G,E,G,S,F,S,xT,V,V,L,A,R,,,,\n+,,sp|P20292|AL5AP_HUMAN,,,,,,T,G,xT,L,A,F,E,R,,,,\n+,,sp|Q99460|PSMD1_HUMAN,,S,N,C,K,P,S,xT,F,A,Y,P,A,P,L,,\n+,,sp|P68363|TBA1B_HUMAN; sp|Q71U36|TBA1A_HUMAN,G,G,D,D,S,F,N,xT,F,F,S,E,T,G,A,,\n+,,sp|Q01469|FABP5_HUMAN,,,,,K,T,Q,xT,V,C,N,F,T,D,G,,\n+,,sp|P04040|CATA_HUMAN,,,,N,A,I,H,xT,F,V,Q,S,G,S,H,,\n+,,sp|P60709|ACTB_HUMAN,,,,I,W,H,H,xT,F,Y,N,E,L,R,,,\n+,,sp|P60709|ACTB_HUMAN,,,,,,,H,xT,F,Y,N,E,L,R,,,\n+,,sp|Q00839|HNRPU_HUMAN,,,,,,L,N,xT,L,L,Q,R,,,,,\n+,,sp|Q93084|AT2A3_HUMAN; sp|P16615|AT2A2_HUMAN,,,,,,,,xT,G,T,L,T,T,N,Q,,\n+,,sp|P52272|HNRPM_HUMAN,,,,,L,G,S,xT,V,F,V,A,N,L,D,,\n+,,sp|Q9UQ35|SRRM2_HUMAN,F,S,E,P,G,T,T,xS,T,Q,R,P,S,S,P,,\n+,,sp|Q9BRB3|PIGQ_HUMAN,,,,,,G,G,xT,F,W,S,C,E,A,T,,\n+,,sp|Q96A35|RM24_HUMAN,,,,,,,G,xT,M,I,P,S,E,A,P,,\n+,,sp|Q07020|RL18_HUMAN,,,,,T,N,S,xT,F,N,Q,V,V,L,K,,\n+,,sp|P22626|ROA2_HUMAN,,,G,F,G,F,V,xT,F,D,D,H,D,P,V,,\n+,,sp|P23246|SFPQ_HUMAN,,,,,,F,A,xT,H,A,A,A,L,S,V,,\n+,,sp|Q15366|PCBP2_HUMAN,I,P,Y,R,P,K,P,xS,S,S,P,V,I,F,A,,\n+,,sp|P26641|EF1G_HUMAN,,,,,A,A,G,xT,L,Y,T,Y,P,E,N,,\n+,,sp|P11142|HSP7C_HUMAN,,,,,T,T,P,xS,Y,V,A,F,T,D,T,,\n+,,sp|P35269|T2FA_HUMAN,,G,N,S,R,P,G,xT,P,S,A,E,G,G,S,,\n+,,sp|P49327|FAS_HUMAN,,,,S,F,Y,G,xS,T,L,F,L,C,R,,,\n+,,sp|P07686|HEXB_HUMAN,,,,L,A,P,G,xT,I,V,E,V,W,K,,,\n+,,sp|P07814|SYEP_HUMAN,,,A,I,Q,G,G,xT,S,H,H,L,G,Q,N,,\n+,,sp|Q9Y277|VDAC3_HUMAN,,V,C,N,Y,G,L,xT,F,T,Q,K,,,,,\n+,,sp|P62805|H4_HUMAN,,,,,D,A,V,xT,Y,T,E,H,A,K,,,\n+,,sp|P21796|VDAC1_HUMAN,,,,,,,L,xT,F,D,S,S,F,S,P,,\n+,,sp|Q86VP6|CAND1_HUMAN,,,,,,A,L,xT,L,I,A,G,S,P,L,,\n+,,sp|P04080|CYTB_HUMAN,,S,Q,V,V,A,G,xT,N,Y,F,I,K,,,,\n+,,sp|P06748|NPM_HUMAN,D,E,N,E,H,Q,L,xS,L,R,,,,,,,\n+,,sp|Q96C86|DCPS_HUMAN,,,,,,,,xT,T,V,V,Y,P,A,T,,\n+,,sp|P61604|CH10_HUMAN,,,,V,L,Q,A,xT,V,V,A,V,G,S,G,,\n+,,sp|O00461|GOLI4_HUMAN,Y,G,E,N,D,E,N,xT,D,D,K,N,N,D,G,,\n+,,sp|P19338|NUCL_HUMAN,,,,,,S,I,xS,L,Y,Y,T,G,E,K,,\n+,,sp|P30086|PEBP1_HUMAN,,,,,,L,Y,xT,L,V,L,T,D,P,D,,\n+,,sp|O00299|CLIC1_HUMAN,,,,,,G,V,xT,F,N,V,T,T,V,D,,\n'
b
diff -r 000000000000 -r 23eea82f5192 all stuff/test-data/SBF1.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/test-data/SBF1.csv Wed Jan 16 13:55:22 2019 -0500
b
b'@@ -0,0 +1,36 @@\n+Amino Acids,sp|P47756|CAPZB_HUMAN,sp|P49189|AL9A1_HUMAN,sp|P19338|NUCL_HUMAN,sp|O00567|NOP56_HUMAN,sp|Q9UQ35|SRRM2_HUMAN,sp|P13639|EF2_HUMAN,sp|P60709|ACTB_HUMAN,sp|P63261|ACTG_HUMAN,sp|P11021|BIP_HUMAN,sp|Q16698|DECR_HUMAN,sp|Q9HD20|AT131_HUMAN,sp|Q9UQB8|BAIP2_HUMAN,sp|P11142|HSP7C_HUMAN,sp|O00541|PESC_HUMAN,sp|P08238|HS90B_HUMAN,sp|Q06830|PRDX1_HUMAN,sp|P35249|RFC4_HUMAN,sp|Q14974|IMB1_HUMAN,sp|O15530|PDPK1_HUMAN,sp|Q14258|TRI25_HUMAN,sp|Q53H96|P5CR3_HUMAN,sp|Q9Y5S9|RBM8A_HUMAN,sp|P30086|PEBP1_HUMAN,sp|P62081|RS7_HUMAN,sp|Q92900|RENT1_HUMAN,sp|P05455|LA_HUMAN,sp|Q32P44|EMAL3_HUMAN,sp|P41252|SYIC_HUMAN,sp|P48047|ATPO_HUMAN,tr|H7C0C1|H7C0C1_HUMAN,sp|P26373|RL13_HUMAN,sp|Q9Y3D7|TIM16_HUMAN,sp|P00338|LDHA_HUMAN,sp|P55884|EIF3B_HUMAN,sp|Q16658|FSCN1_HUMAN,sp|Q9HC35|EMAL4_HUMAN,sp|P09211|GSTP1_HUMAN,sp|P04083|ANXA1_HUMAN,sp|P29590|PML_HUMAN,sp|P07900|HS90A_HUMAN,sp|P31943|HNRH1_HUMAN,sp|P07737|PROF1_HUMAN,sp|P49321|NASP_HUMAN,sp|P49736|MCM2_HUMAN,sp|P14625|ENPL_HUMAN,sp|Q9BQE3|TBA1C_HUMAN,sp|P68363|TBA1B_HUMAN,sp|Q71U36|TBA1A_HUMAN,sp|P62191|PRS4_HUMAN,sp|Q01469|FABP5_HUMAN,sp|Q14554|PDIA5_HUMAN,sp|Q9Y266|NUDC_HUMAN,sp|P06748|NPM_HUMAN,sp|O75534|CSDE1_HUMAN,sp|Q02878|RL6_HUMAN,sp|Q8NHW5|RLA0L_HUMAN,sp|P05388|RLA0_HUMAN,sp|Q7Z5L9|I2BP2_HUMAN,sp|Q00613|HSF1_HUMAN,sp|P47712|PA24A_HUMAN,sp|Q8IVH4|MMAA_HUMAN,sp|P53597|SUCA_HUMAN,sp|Q15942|ZYX_HUMAN,sp|Q9H0L4|CSTFT_HUMAN,sp|P33240|CSTF2_HUMAN,sp|O75937|DNJC8_HUMAN,sp|Q96A26|F162A_HUMAN,sp|P51570|GALK1_HUMAN,sp|P30101|PDIA3_HUMAN,sp|Q16851|UGPA_HUMAN,sp|P23193|TCEA1_HUMAN,sp|Q15560|TCEA2_HUMAN,sp|Q9H6F5|CCD86_HUMAN,sp|O14672|ADA10_HUMAN,sp|Q5H9R7|PP6R3_HUMAN,sp|P49411|EFTU_HUMAN,sp|P23246|SFPQ_HUMAN,sp|Q4KMQ1|TPRN_HUMAN,sp|O94826|TOM70_HUMAN,sp|O75995|SASH3_HUMAN,sp|Q13200|PSMD2_HUMAN,sp|O43776|SYNC_HUMAN,sp|Q9HCC0|MCCB_HUMAN,sp|P42166|LAP2A_HUMAN,sp|O43760|SNG2_HUMAN,sp|Q8ND56|LS14A_HUMAN,sp|P29401|TKT_HUMAN,sp|Q99832|TCPH_HUMAN,sp|P62701|RS4X_HUMAN,sp|Q8TD47|RS4Y2_HUMAN,sp|O95433|AHSA1_HUMAN,sp|Q00610|CLH1_HUMAN,sp|Q6UN15|FIP1_HUMAN,sp|P49821|NDUV1_HUMAN,sp|P0DP23|CALM1_HUMAN,sp|P0DP25|CALM3_HUMAN,sp|P0DP24|CALM2_HUMAN,sp|P40926|MDHM_HUMAN,sp|P56556|NDUA6_HUMAN,sp|P00387|NB5R3_HUMAN,sp|P40123|CAP2_HUMAN,sp|Q01518|CAP1_HUMAN,sp|Q8NF37|PCAT1_HUMAN,sp|C9JLW8|MCRI1_HUMAN,sp|Q99829|CPNE1_HUMAN,sp|Q9NYF8|BCLF1_HUMAN,sp|Q8IYB3|SRRM1_HUMAN,sp|P62942|FKB1A_HUMAN,sp|P30041|PRDX6_HUMAN,sp|P54578|UBP14_HUMAN,sp|O60934|NBN_HUMAN,sp|O43660|PLRG1_HUMAN,sp|Q6UWD8|CP054_HUMAN,sp|Q5VTE0|EF1A3_HUMAN,sp|P68104|EF1A1_HUMAN,sp|P63244|RACK1_HUMAN,sp|Q9BZK7|TBL1R_HUMAN,sp|O60907|TBL1X_HUMAN,sp|Q9BQ87|TBL1Y_HUMAN,sp|Q16563|SYPL1_HUMAN,sp|P37198|NUP62_HUMAN,sp|P08567|PLEK_HUMAN,sp|Q6ZT62|BGIN_HUMAN,sp|Q9Y3L3|3BP1_HUMAN,sp|P25098|ARBK1_HUMAN,sp|Q86U90|YRDC_HUMAN,sp|P26641|EF1G_HUMAN,sp|Q92974|ARHG2_HUMAN,sp|P63010|AP2B1_HUMAN,sp|Q9NP66|HM20A_HUMAN,sp|Q92841|DDX17_HUMAN,sp|Q7KZF4|SND1_HUMAN,sp|P12956|XRCC6_HUMAN,sp|Q9Y617|SERC_HUMAN,sp|O94903|PLPHP_HUMAN,sp|P32519|ELF1_HUMAN,sp|Q9NVI7|ATD3A_HUMAN,sp|Q5T9A4|ATD3B_HUMAN,sp|P09651|ROA1_HUMAN,sp|Q32P51|RA1L2_HUMAN,sp|P51991|ROA3_HUMAN,sp|P55263|ADK_HUMAN,sp|Q96FV9|THOC1_HUMAN,sp|Q6L8Q7|PDE12_HUMAN,sp|Q9H0D6|XRN2_HUMAN,sp|Q92499|DDX1_HUMAN,sp|Q9NZL9|MAT2B_HUMAN,sp|Q01082|SPTB2_HUMAN,sp|Q2KHR2|RFX7_HUMAN,sp|P52701|MSH6_HUMAN,sp|P17947|SPI1_HUMAN,sp|P53396|ACLY_HUMAN,sp|P31146|COR1A_HUMAN,sp|P63167|DYL1_HUMAN,sp|Q96FJ2|DYL2_HUMAN,sp|Q14181|DPOA2_HUMAN,sp|Q96JM3|CHAP1_HUMAN,sp|Q969T9|WBP2_HUMAN,sp|P13667|PDIA4_HUMAN,sp|P07195|LDHB_HUMAN,sp|Q9Y678|COPG1_HUMAN,sp|O00148|DX39A_HUMAN,sp|Q13838|DX39B_HUMAN,sp|P07437|TBB5_HUMAN,sp|P06733|ENOA_HUMAN,sp|P48735|IDHP_HUMAN,sp|Q14161|GIT2_HUMAN,sp|P28066|PSA5_HUMAN,sp|Q9HB71|CYBP_HUMAN,sp|Q15084|PDIA6_HUMAN,sp|Q13509|TBB3_HUMAN,tr|A0A0B4J269|A0A0B4J269_HUMAN,sp|P49368|TCPG_HUMAN,sp|Q92608|DOCK2_HUMAN,sp|P62807|H2B1C_HUMAN,sp|O60814|H2B1K_HUMAN,sp|Q99880|H2B1L_HUMAN,sp|P58876|H2B1D_HUMAN,sp|Q93079|H2B1H_HUMAN,sp|Q5QNW6|H2B2F_HUMAN,sp|Q99877|H2B1N_HUMAN,sp|P57053|H2BFS_HUMAN,sp|Q99879|H2B1M_HUMAN,sp|Q9Y5B9|SP16H_HU'..b'8479,7.551487414187642,12.105263157894736,13.829787234042554,13.664596273291925,15.08704061895551,11.386138613861386,10.035419126328216,9.297912713472485,8.121212121212121,12.265625,11.897356143079314,10.789473684210526,10.185185185185183,11.377245508982035,11.700468018720748,11.700468018720748,10.060975609756097,8.264462809917356,12.504978096375945,8.605341246290802,10.074626865671641,9.67741935483871,10.074626865671641,16.96113074204947,14.390243902439025,11.401869158878505,13.26530612244898,28.0,8.823529411764707,6.542056074766355,6.074766355140186,18.367346938775512,10.48951048951049,10.373443983402488,9.195402298850574\n+X,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n+Number of ST,34,53,74,81,814,85,51,51,77,49,145,83,82,50,82,22,47,92,70,83,34,13,24,20,137,32,138,151,34,47,19,11,38,86,71,164,19,41,113,85,53,21,100,99,104,52,52,51,47,21,48,33,36,88,30,38,37,84,95,103,52,45,65,61,50,23,23,47,50,67,46,43,43,94,149,42,60,88,50,60,95,56,49,124,27,72,74,55,27,27,46,149,97,50,16,16,16,46,14,29,67,67,60,18,65,193,190,12,23,66,119,81,30,51,52,61,81,100,88,37,135,38,95,107,63,33,51,136,104,51,88,97,71,37,29,113,64,68,54,46,37,35,90,79,79,74,30,268,304,194,31,124,61,8,8,96,139,27,59,39,123,38,35,57,44,53,131,35,32,49,59,101,66,217,22,21,23,23,22,22,23,21,21,116,100,35,30,18,108,83,51,47,17,43,45,51,90,12,42,59,41,67,12,34,139,31,66,92,52,94,86,46,16,15,16,17,16,16,112,68,57,18,18,38,40,69,77,113,103,145,19,98,8,74,46,50,108,77,42,131,98,37,102,61,19,107,134,161,74,46,29,54,38,79,47,67,291,16,21,127,158,236,37,66,115,51,189,146,19,13,80,60,38,23,48,116,121,487,76,82,49,23,41,61,51,17,12,30,128,33,46,91,22,78,69,85,49,67,157,153,82,22,57,75,75,33,10,314,29,27,27,27,48,177,61,13,112,9,14,13,54,30,25,24\n+Number of pST,4,5,13,1,10,6,15,15,7,3,1,1,7,1,7,2,1,2,15,1,3,1,11,1,4,3,2,1,1,1,2,2,3,3,4,1,3,3,2,6,2,6,4,1,9,4,4,4,2,5,1,3,7,2,2,2,2,6,1,1,1,3,1,1,1,1,1,2,2,3,3,1,1,1,2,4,2,2,2,2,2,1,2,4,1,2,5,9,2,2,2,3,2,1,1,1,1,3,1,1,1,1,1,3,1,5,4,1,1,1,1,1,1,7,8,1,2,2,2,2,2,3,1,1,1,1,5,2,2,1,4,2,6,1,1,1,1,1,6,4,2,1,1,1,1,1,2,1,2,2,1,8,5,1,1,3,1,2,1,2,1,2,2,7,3,1,1,2,1,3,4,4,2,2,2,2,2,2,2,2,2,2,2,1,1,3,1,1,2,3,1,1,1,3,1,1,1,1,2,4,3,1,1,2,2,1,1,2,2,1,1,1,2,2,2,2,2,2,2,5,1,1,1,1,2,2,2,1,2,1,1,1,1,2,4,1,2,1,4,2,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,5,1,1,1,1,1,1,9,2,1,1,1,1,1,1,2,1,2,3,1,1,1,2,2,1,1,2,1,1,1,3,1,1,2,1,1,1,1,2,1,1,1,1,1,1,1,1,1,4,4,1,1,1,2,1,1,1,1,2,2,1,1,1,1,1,5,1,1,2\n+Total AAs,277,494,710,594,2752,858,375,375,654,335,1204,552,646,588,724,199,363,876,556,630,274,174,187,194,1129,408,896,1262,213,242,211,125,332,814,493,981,210,346,882,732,449,140,788,904,803,449,451,451,440,135,519,331,294,798,288,317,317,587,529,749,418,346,572,616,577,253,154,392,505,508,301,299,360,748,873,452,707,711,608,380,908,548,563,694,224,463,623,543,263,263,338,1675,594,464,149,149,149,338,128,301,477,475,534,97,537,920,904,108,224,494,754,514,224,462,462,317,514,577,522,259,522,350,677,701,689,279,437,986,937,347,729,910,609,370,275,619,634,648,372,320,378,362,657,609,950,740,334,2364,1363,1360,270,1101,461,89,89,598,812,261,645,334,874,427,428,444,434,452,759,241,228,440,450,797,545,1830,126,126,126,126,126,126,126,126,126,1047,783,293,361,160,808,679,435,488,118,283,374,376,330,152,696,573,417,511,148,237,906,235,553,776,238,764,763,459,136,136,136,135,136,136,949,614,505,151,201,543,243,636,539,707,953,1312,257,770,132,595,730,417,944,585,335,785,795,333,710,589,166,1382,1086,1657,678,464,215,456,390,775,482,365,2209,215,209,1014,1034,2136,353,484,670,435,1361,1122,165,76,651,356,295,188,592,633,946,2078,613,628,499,240,300,614,624,199,104,329,881,437,380,658,161,517,606,847,527,825,1280,1286,760,216,501,641,641,328,121,2511,337,268,279,268,283,1230,535,98,400,102,214,214,294,286,241,261\n'
b
diff -r 000000000000 -r 23eea82f5192 all stuff/test-data/SBF2.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all stuff/test-data/SBF2.csv Wed Jan 16 13:55:22 2019 -0500
b
b'@@ -0,0 +1,36 @@\n+Amino Acids,sp|Q13283|G3BP1_HUMAN,sp|O43707|ACTN4_HUMAN,sp|P35609|ACTN2_HUMAN,sp|Q08043|ACTN3_HUMAN,sp|P12814|ACTN1_HUMAN,sp|P19338|NUCL_HUMAN,sp|Q15738|NSDHL_HUMAN,sp|P13639|EF2_HUMAN,sp|P60709|ACTB_HUMAN,sp|P63261|ACTG_HUMAN,sp|P11021|BIP_HUMAN,sp|Q9HD20|AT131_HUMAN,sp|Q9UQB8|BAIP2_HUMAN,sp|Q9UQ35|SRRM2_HUMAN,sp|Q06830|PRDX1_HUMAN,sp|Q9Y371|SHLB1_HUMAN,sp|P08238|HS90B_HUMAN,sp|Q14974|IMB1_HUMAN,sp|P09651|ROA1_HUMAN,sp|Q32P51|RA1L2_HUMAN,sp|O15530|PDPK1_HUMAN,sp|Q14258|TRI25_HUMAN,sp|Q9Y5S9|RBM8A_HUMAN,sp|P54136|SYRC_HUMAN,sp|P30086|PEBP1_HUMAN,sp|P22626|ROA2_HUMAN,sp|Q32P44|EMAL3_HUMAN,sp|Q8NBS9|TXND5_HUMAN,sp|P41252|SYIC_HUMAN,sp|Q96JH7|VCIP1_HUMAN,sp|P09211|GSTP1_HUMAN,sp|P04083|ANXA1_HUMAN,sp|P07195|LDHB_HUMAN,sp|P07900|HS90A_HUMAN,sp|P49321|NASP_HUMAN,sp|Q9BQE3|TBA1C_HUMAN,sp|P68363|TBA1B_HUMAN,sp|Q71U36|TBA1A_HUMAN,sp|P14625|ENPL_HUMAN,sp|Q14554|PDIA5_HUMAN,sp|Q15007|FL2D_HUMAN,sp|Q9Y266|NUDC_HUMAN,sp|P06748|NPM_HUMAN,sp|O14776|TCRG1_HUMAN,sp|O75534|CSDE1_HUMAN,sp|O15294|OGT1_HUMAN,sp|Q8NHW5|RLA0L_HUMAN,sp|P05388|RLA0_HUMAN,sp|Q9UKX7|NUP50_HUMAN,sp|O43312|MTSS1_HUMAN,sp|Q00613|HSF1_HUMAN,sp|P47712|PA24A_HUMAN,sp|P22314|UBA1_HUMAN,sp|P53597|SUCA_HUMAN,sp|O75746|CMC1_HUMAN,sp|Q96A26|F162A_HUMAN,sp|P51570|GALK1_HUMAN,sp|Q15005|SPCS2_HUMAN,sp|P30101|PDIA3_HUMAN,sp|P23193|TCEA1_HUMAN,sp|Q15560|TCEA2_HUMAN,sp|Q16851|UGPA_HUMAN,sp|P62995|TRA2B_HUMAN,sp|Q13596|SNX1_HUMAN,sp|Q4KMQ1|TPRN_HUMAN,sp|Q13200|PSMD2_HUMAN,sp|P42166|LAP2A_HUMAN,sp|O43776|SYNC_HUMAN,sp|Q9HCC0|MCCB_HUMAN,sp|O43760|SNG2_HUMAN,sp|P62701|RS4X_HUMAN,sp|Q8TD47|RS4Y2_HUMAN,sp|Q6UN15|FIP1_HUMAN,sp|P40926|MDHM_HUMAN,sp|P11142|HSP7C_HUMAN,sp|P0DMV8|HS71A_HUMAN,sp|P0DMV9|HS71B_HUMAN,sp|P00387|NB5R3_HUMAN,sp|P25789|PSA4_HUMAN,sp|P07737|PROF1_HUMAN,sp|Q9NYF8|BCLF1_HUMAN,sp|P49189|AL9A1_HUMAN,sp|P62942|FKB1A_HUMAN,sp|Q9Y277|VDAC3_HUMAN,sp|O75521|ECI2_HUMAN,sp|O60934|NBN_HUMAN,sp|P04075|ALDOA_HUMAN,sp|Q5VTE0|EF1A3_HUMAN,sp|P68104|EF1A1_HUMAN,sp|P63244|RACK1_HUMAN,sp|Q92900|RENT1_HUMAN,sp|Q9BZK7|TBL1R_HUMAN,sp|O60907|TBL1X_HUMAN,sp|Q9BQ87|TBL1Y_HUMAN,sp|Q14498|RBM39_HUMAN,sp|P37198|NUP62_HUMAN,sp|Q6ZT62|BGIN_HUMAN,sp|Q9Y3L3|3BP1_HUMAN,sp|Q9UPN3|MACF1_HUMAN,sp|P12955|PEPD_HUMAN,sp|P26641|EF1G_HUMAN,sp|Q92974|ARHG2_HUMAN,sp|Q9ULW0|TPX2_HUMAN,sp|Q9NP66|HM20A_HUMAN,sp|Q13177|PAK2_HUMAN,sp|Q92841|DDX17_HUMAN,sp|Q7KZF4|SND1_HUMAN,sp|P62987|RL40_HUMAN,sp|P0CG47|UBB_HUMAN,tr|A0A2R8Y422|A0A2R8Y422_HUMAN,sp|P0CG48|UBC_HUMAN,sp|P62979|RS27A_HUMAN,sp|P51991|ROA3_HUMAN,sp|P55263|ADK_HUMAN,sp|P17844|DDX5_HUMAN,sp|Q96FV9|THOC1_HUMAN,sp|Q92499|DDX1_HUMAN,sp|Q01082|SPTB2_HUMAN,sp|P12956|XRCC6_HUMAN,sp|C9JLW8|MCRI1_HUMAN,sp|P63167|DYL1_HUMAN,sp|Q96FJ2|DYL2_HUMAN,sp|Q9UGU5|HMGX4_HUMAN,sp|Q969T9|WBP2_HUMAN,sp|Q15181|IPYR_HUMAN,sp|Q9Y678|COPG1_HUMAN,sp|P07437|TBB5_HUMAN,sp|Q9NUL3|STAU2_HUMAN,sp|P24752|THIL_HUMAN,sp|P60900|PSA6_HUMAN,sp|P63010|AP2B1_HUMAN,sp|Q10567|AP1B1_HUMAN,sp|Q14161|GIT2_HUMAN,sp|Q15084|PDIA6_HUMAN,sp|Q92608|DOCK2_HUMAN,sp|Q99832|TCPH_HUMAN,sp|P53396|ACLY_HUMAN,sp|A0FGR8|ESYT2_HUMAN,sp|Q15019|SEPT2_HUMAN,sp|Q15185|TEBP_HUMAN,sp|P49407|ARRB1_HUMAN,sp|Q15370|ELOB_HUMAN,sp|O95218|ZRAB2_HUMAN,sp|P22392|NDKB_HUMAN,sp|O00461|GOLI4_HUMAN,sp|Q96I25|SPF45_HUMAN,sp|P10809|CH60_HUMAN,sp|O60506|HNRPQ_HUMAN,sp|O00148|DX39A_HUMAN,sp|Q13838|DX39B_HUMAN,sp|P00558|PGK1_HUMAN,sp|Q8WUA2|PPIL4_HUMAN,sp|Q02878|RL6_HUMAN,sp|P46776|RL27A_HUMAN,sp|P54105|ICLN_HUMAN,sp|Q16658|FSCN1_HUMAN,sp|P31323|KAP3_HUMAN,sp|P20290|BTF3_HUMAN,sp|Q9UJ68|MSRA_HUMAN,sp|P25705|ATPA_HUMAN,sp|Q16629|SRSF7_HUMAN,sp|O95319|CELF2_HUMAN,sp|Q13126|MTAP_HUMAN,sp|P17480|UBF1_HUMAN,sp|P42229|STA5A_HUMAN,sp|P51692|STA5B_HUMAN,sp|Q00610|CLH1_HUMAN,sp|Q16695|H31T_HUMAN,sp|P68431|H31_HUMAN,sp|P84243|H33_HUMAN,sp|Q6NXT2|H3C_HUMAN,sp|Q71DI3|H32_HUMAN,tr|Q5TEC6|Q5TEC6_HUMAN,sp|P49411|EFTU_HUMAN,sp|Q9Y2X3|NOP58_HUMAN,sp|P11940|PABP1_HUMAN,sp|Q9H1E3|NUCKS_HUMAN,sp|Q8TBC3|SHKB1_HUMAN,sp|Q13263|TIF1B_HUMAN,sp|Q53H96|P5CR3_HUMAN,sp|Q08945|SSRP1_HUMAN,sp|Q01469|FABP5_HUMAN,sp|Q99436|PSB7_HUMAN,sp|P14314|GLU2B_HUMAN,sp'..b'1,104,48,73,33,36,140,88,95,38,37,76,149,95,103,119,45,74,23,47,33,50,46,43,67,61,66,88,95,124,56,49,27,27,27,97,46,82,75,75,29,27,21,193,53,12,48,55,119,42,51,52,61,137,81,100,88,78,135,95,107,988,56,51,136,86,51,68,88,97,12,30,16,90,17,37,35,68,90,74,268,71,18,8,8,107,27,24,123,57,73,49,34,104,112,131,49,217,55,124,116,30,18,44,17,90,12,42,44,59,50,38,35,41,58,30,12,34,71,40,26,31,66,52,74,43,94,85,85,149,16,15,16,17,16,16,42,57,69,40,113,117,34,104,21,40,66,8,28,97,28,35,38,26,53,53,210,212,44,37,98,84,102,61,131,19,134,50,8,9,46,29,86,38,79,67,291,84,16,21,25,223,47,139,37,37,40,35,149,51,34,80,22,103,133,48,116,487,23,61,51,23,17,94,46,233,69,128,113,38,85,495,540,111,157,153,169,107,57,108,194,33,10,29,27,27,27,28,74,37,73,61,112,64,54,30,13,23,81,50,50,96,47,20,60,32,34,47,19,510,11,164,21,27,113,47,13,80,58,52,65,61,50,23,94,150,60,72,74,43,46,174,50,66,14,60,65,23,66,30,44,30,63,33,48,76,37,29,113,64,68,79,79,30,304,88,61,59,42,16,17,53,32,59,101,45,100,215,318,51,47,80,45,51,67,22,23,22,23,15,86,46,116,122,57,46,18,18,47,48,38,77,103,145,19,44,60,46,50,77,42,107,177,161,135,42,12,54,37,35,38,47,108,127,158,129,236,101,115,145,19,41,60,190,95,25,76,78,36,82,49,41,250,12,30,72,22,70,49,10,67,213,31,82,22,26,78,314,235,48,13,9,38,25,22,21,23,23,22,22,23,21,21\n+Number of pST,4,4,4,4,4,13,1,6,15,15,7,1,1,10,2,1,7,2,6,4,15,1,1,2,11,9,2,1,1,4,3,3,2,6,4,4,4,4,9,1,2,3,7,1,2,2,2,2,3,2,1,1,3,3,2,1,2,3,2,3,1,3,4,2,2,2,4,1,2,1,2,2,2,3,7,4,4,1,3,6,5,5,1,1,4,1,2,7,8,1,4,2,2,2,2,2,1,1,1,1,5,2,3,1,3,4,2,1,3,1,9,1,2,1,5,1,1,1,6,3,1,1,1,2,3,1,7,2,1,1,2,2,1,3,2,9,8,2,1,1,4,1,1,1,2,1,4,1,2,2,3,1,2,1,2,4,1,2,1,1,2,1,3,1,1,1,3,2,2,2,2,2,2,4,1,2,2,1,2,3,3,5,3,3,1,1,2,2,3,3,3,2,2,2,2,3,2,2,4,1,1,2,2,1,2,1,4,1,1,3,1,1,5,1,6,1,1,1,4,1,2,4,3,3,2,2,1,4,2,1,3,2,1,1,2,2,1,1,3,3,1,1,1,1,2,1,3,1,2,2,1,1,1,2,1,1,1,2,1,1,2,1,1,1,1,2,2,3,2,1,1,5,1,1,1,1,1,1,3,1,1,1,3,1,1,2,3,2,1,1,1,2,2,1,1,1,1,1,1,1,1,1,3,2,2,5,1,2,4,1,2,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,2,2,1,5,1,1,1,1,1,1,4,4,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,2,2,1,1,1,2,4,1,1,4,1,2,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,4,1,1,2,2,2,1,1,1,2,1,1,1,1,2,1,2,1,2,1,1,1,1,2,1,1,1,1,1,2,1,2,2,2,2,2,2,2,2,2\n+Total AAs,466,911,894,901,892,710,373,858,375,375,654,1204,552,2752,199,365,724,876,372,320,556,630,174,660,187,353,896,432,1262,1222,210,346,334,732,788,449,451,451,803,519,396,331,294,1098,798,1046,317,317,468,755,529,749,1058,346,678,154,392,226,505,301,299,508,288,522,711,908,694,548,563,224,263,263,594,338,646,641,641,301,261,140,920,494,108,283,394,754,364,462,462,317,1129,514,577,522,530,522,677,701,7388,493,437,986,747,347,524,729,910,128,229,156,685,156,378,362,614,657,740,2364,609,97,89,89,601,261,289,874,444,570,427,246,937,949,759,440,1830,543,1101,921,361,160,418,118,330,152,696,401,573,623,427,428,417,492,288,148,237,493,418,206,235,553,238,508,283,764,794,787,1675,136,136,136,135,136,136,452,529,636,243,707,835,274,709,135,277,528,132,324,838,315,293,332,239,449,379,1320,1341,434,333,795,693,710,589,785,166,1086,608,83,103,464,215,814,272,775,365,2209,587,215,209,227,1478,499,906,266,266,266,241,873,435,277,651,198,968,913,592,633,2078,240,614,624,188,199,821,380,1268,606,881,847,350,847,3053,3230,719,1280,1286,1087,677,501,582,1360,328,121,337,268,279,268,230,595,259,579,535,400,474,294,286,160,201,594,377,588,598,363,194,483,408,213,242,211,4128,125,981,246,262,882,440,248,582,412,418,572,616,577,253,748,1005,707,463,623,371,338,1226,464,545,128,534,537,224,494,224,418,246,689,279,483,645,370,275,619,634,648,609,950,334,1363,606,461,645,404,128,128,452,228,450,797,430,783,1152,2414,435,488,642,374,376,511,190,240,220,240,97,763,459,1071,802,505,325,151,201,359,341,543,539,953,1312,257,360,589,730,417,585,335,1382,1230,1657,1270,505,123,456,418,417,390,482,944,1014,1034,1024,2136,968,670,1029,165,306,356,904,685,201,613,845,275,628,499,300,1312,104,329,416,161,533,527,86,825,1462,245,760,216,236,517,2511,1871,283,98,102,295,241,126,126,126,126,126,126,126,126,126\n'