Repository 'commonality_finder'
hg clone https://toolshed.g2.bx.psu.edu/repos/jfb/commonality_finder

Changeset 0:3e5fdf933646 (2018-05-25)
Next changeset 1:b791e2bee65c (2019-12-11)
Commit message:
Uploaded
added:
C and D finder/.shed.yml
C and D finder/CandD.R
C and D finder/CandD.xml
C and D finder/test-data/input1.csv
C and D finder/test-data/input2.csv
C and D finder/test-data/input3.csv
C and D finder/test-data/input4.csv
C and D finder/test-data/input5.csv
C and D finder/test-data/input6.csv
b
diff -r 000000000000 -r 3e5fdf933646 C and D finder/.shed.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/C and D finder/.shed.yml Fri May 25 10:56:10 2018 -0400
[
@@ -0,0 +1,8 @@
+categories: [Computational chemistry]
+description: Commonality_and_Difference_Finder_7_7
+homepage_url: https://pubs.acs.org/doi/abs/10.1021/ja507164a
+long_description: This tool is intended for use in conjunction with KinaMine 7-7 and
+  Kinatest 7-7.  This tool allows for comparisons between triplicate replicates of KALIP-KinaMine output
+name: Commonality and Difference finderMADE 7 TO 7.R
+owner: blank121
+remote_repository_url: https://github.umn.edu/blank121/Commonality-and-difference-finder
b
diff -r 000000000000 -r 3e5fdf933646 C and D finder/CandD.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/C and D finder/CandD.R Fri May 25 10:56:10 2018 -0400
[
b'@@ -0,0 +1,1118 @@\n+FirstSubstrateSet<- read.csv("input1.csv", stringsAsFactors=FALSE)\r\n+Firstsubbackfreq<- read.csv("input2.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+SecondSubstrateSet<- read.csv("input3.csv", stringsAsFactors=FALSE)\r\n+Secondsubbackfreq<- read.csv("input4.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+ThirdSubstrateSet<- read.csv("input5.csv", stringsAsFactors=FALSE)\r\n+Thirdsubbackfreq<- read.csv("input6.csv", header=FALSE, stringsAsFactors=FALSE)\r\n+\r\n+\r\n+args = commandArgs(trailingOnly=TRUE)\r\n+\r\n+print(args[1])\r\n+print(args[2])\r\n+print(args[3])\r\n+\r\n+\r\n+#ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps\r\n+FullMotifsOnly_questionmark<-args[1]\r\n+#If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps\r\n+TruncatedMotifsOnly_questionmark<-args[2]\r\n+#if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps)\r\n+Are_You_Looking_For_Commonality<-args[3]\r\n+\r\n+\r\n+#then put the names of your output files here\r\n+Shared_motifs_table<-"sharedmotifs.csv"\r\n+Shared_subbackfreq_table<-"sharedSBF.csv"\r\n+\r\n+# Shared_motifs_table<-"Shared motifs 7-27-17.csv"\r\n+# Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv"\r\n+\r\n+First_unshared_motifs_table<-"R1 substrates.csv"\r\n+First_unshared_subbackfreq<-"R1 SBF.csv"\r\n+\r\n+Second_unshared_motifs_table<-"R2 subs.csv"\r\n+Second_unshared_subbackfreq<-"R2 SBf.csv"\r\n+\r\n+Third_unshared_motifs_table<-"R3 subs.csv"\r\n+Third_unshared_subbackfreq<-"R3 SBF.csv"\r\n+\r\n+#final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles.  I think I\'ll poke around\r\n+#other languages to see if any of them can do it.\r\n+####################################################################################################################################\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+FirstxY<-rep("xY",times=nrow(FirstSubstrateSet))\r\n+FirstSubstrateSet[,11]<-FirstxY\r\n+\r\n+SecondxY<-rep("xY",times=nrow(SecondSubstrateSet))\r\n+SecondSubstrateSet[,11]<-SecondxY\r\n+\r\n+ThirdxY<-rep("xY",times=nrow(ThirdSubstrateSet))\r\n+ThirdSubstrateSet[,11]<-ThirdxY\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+####################################################################################################################################\r\n+####################################################################################################################################\r\n+# better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two \r\n+# separate proteins thus two separate accession numbers?\r\n+# It should actually output the shared motif and BOTH accession numbers.  Right now it does not, it only maps out the second\r\n+# accession number.  So that needs to be fixed BUT you need to keep the commonality between a motif and its accession number\r\n+####################################################################################################################################\r\n+####################################################################################################################################\r\n+####################################################################################################################################\r\n+####################################################################################################################################\r\n+\r\n+#Create the motif sets, deciding wether or not you\'re looking for truncated or full here\r\n+#full only\r\n+if (Are_You_Looking_For_Commonality=="YES"){\r\n+  if (FullMotifsOnly_questionmark=="YES"){\r\n+    FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)\r\n+    FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)\r\n+    leftspaces<-c()\r\n+    rightspaces<-c()\r\n+    for (i in 1:nrow(FirstSubstrateSet)){\r\n+      FTLwtletters<-FirstSubstrateSet[i,4:18]\r\n+      FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]\r\n+      FTLwtletters<-paste(FTLw'..b'utmatrix<-matrix(data=c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)),ncol = 2)\r\n+  \r\n+  #another fucking for loop\r\n+  FLTreference<-FTLoutputmatrix[,2]\r\n+  \r\n+  FirstLine<-colnames(FirstSubstrateSet)\r\n+  FirstLine<-FirstLine[1:23]\r\n+  for (q in 1:nrow(FTLoutputmatrix)) {\r\n+    thismotif<-unlist(strsplit(FTLoutputmatrix[q,1],""))\r\n+    thisoutput<-c("","",FTLoutputmatrix[q,2],thismotif,"","","","","")\r\n+    FirstLine<-rbind(FirstLine,thisoutput)\r\n+  }\r\n+  \r\n+  \r\n+  \r\n+  write.table(x=FirstLine,\r\n+              file=First_unshared_motifs_table,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))\r\n+  columnalheader<-matrix(columnalheader,nrow = 1)\r\n+  \r\n+  # columnalheader<-rbind(columnalheader,FTLFinalMatrix)\r\n+  \r\n+  write.table(x=columnalheader,\r\n+              file=First_unshared_subbackfreq,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  write.table(x=FTLFinalMatrix[2:nrow(FTLFinalMatrix),],\r\n+              file=First_unshared_subbackfreq,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  ############################################################################################################\r\n+  \r\n+  D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2)\r\n+\r\n+  FLTreference<-D835Youtputmatrix[,2]\r\n+  \r\n+  FirstLine<-colnames(FirstSubstrateSet)\r\n+  FirstLine<-FirstLine[1:23]\r\n+  for (q in 1:nrow(D835Youtputmatrix)) {\r\n+    thismotif<-unlist(strsplit(D835Youtputmatrix[q,1],""))\r\n+    thisoutput<-c("","",D835Youtputmatrix[q,2],thismotif,"","","","","")\r\n+    FirstLine<-rbind(FirstLine,thisoutput)\r\n+  }\r\n+  \r\n+  \r\n+    \r\n+  write.table(x=FirstLine,\r\n+              file=Second_unshared_motifs_table,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))\r\n+  columnalheader<-matrix(columnalheader,nrow = 1)\r\n+  \r\n+  # columnalheader<-rbind(columnalheader,D835YFinalMatrix)\r\n+  \r\n+  write.table(x=columnalheader,\r\n+              file=Second_unshared_subbackfreq,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  write.table(x=D835YFinalMatrix[2:nrow(D835YFinalMatrix),],\r\n+              file=Second_unshared_subbackfreq,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  ############################################################################################################\r\n+  \r\n+  ITDoutputmatrix<-matrix(data = c(ITDmotifsFINAL,names(ITDmotifsFINAL)),ncol = 2)\r\n+  \r\n+  FLTreference<-ITDoutputmatrix[,2]\r\n+  \r\n+  FirstLine<-colnames(FirstSubstrateSet)\r\n+  FirstLine<-FirstLine[1:23]\r\n+  for (q in 1:nrow(ITDoutputmatrix)) {\r\n+    thismotif<-unlist(strsplit(ITDoutputmatrix[q,1],""))\r\n+    thisoutput<-c("","",ITDoutputmatrix[q,2],thismotif,"","","","","")\r\n+    FirstLine<-rbind(FirstLine,thisoutput)\r\n+  }\r\n+  \r\n+  \r\n+  write.table(x=FirstLine,\r\n+              file=Third_unshared_motifs_table,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))\r\n+  columnalheader<-matrix(columnalheader,nrow = 1)\r\n+  \r\n+  # columnalheader<-rbind(columnalheader,ITDFinalMatrix)\r\n+\r\n+  write.table(x=columnalheader,\r\n+              file=Third_unshared_subbackfreq,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+  write.table(x=ITDFinalMatrix[2:nrow(ITDFinalMatrix),],\r\n+              file=Third_unshared_subbackfreq,\r\n+              quote=FALSE, sep=",",\r\n+              row.names=FALSE,col.names = FALSE, na="", append=TRUE)\r\n+  \r\n+}\r\n'
b
diff -r 000000000000 -r 3e5fdf933646 C and D finder/CandD.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/C and D finder/CandD.xml Fri May 25 10:56:10 2018 -0400
[
@@ -0,0 +1,44 @@
+<tool id="commonality_r" name="commonality_finder" version="1.0.0">
+    <description>finds commonality</description>
+    <requirements>
+       <requirement type="package">R</requirement>
+    </requirements>
+    <command><![CDATA[
+ ln -s '$input1' input1.csv &&
+ ln -s '$input2' input2.csv &&
+ ln -s '$input3' input3.csv &&
+ ln -s '$input4' input4.csv &&
+ ln -s '$input5' input5.csv &&
+ ln -s '$input6' input6.csv &&
+
+ Rscript '$__tool_directory__/CandD.R' NO NO YES
+    ]]></command>
+    <inputs>
+ <param format="csv" name="input1" type="data" label="First Substrate Set"/>
+        <param format="csv" name="input2" type="data" label="First Substrate Background Frequency"/>
+ <param format="csv" name="input3" type="data" label="Second Substrate Set"/>
+ <param format="csv" name="input4" type="data" label="Second Substrate Background Frequency"/>
+ <param format="csv" name="input5" type="data" label="Third Substrate Set"/>
+ <param format="csv" name="input6" type="data" label="Third Substrate Background Frequency"/>
+ <param name="outGroup" type="text" value="kinase" label="Kinase Name"/>
+    </inputs>      
+    <outputs>
+        <data format="csv" name="substrates" from_work_dir="sharedmotifs.csv" label="${outGroup}_shared Substrates.csv"/>
+ <data format="csv" name="SBF" from_work_dir="sharedSBF.csv" label="${outGroup}_shared SBF.csv"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input1.csv" ftype="csv" value="input1.csv"/>
+            <param name="positives" ftype="csv" value="substrates.csv"/>
+            <param name="SBF" ftype="csv" value="SBF.csv"/>
+ <output name="Negatives" file="negatives.csv"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+ This tool uses the output from 3 Kinamine runs to determine what motifs are shared between the three runs
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1021/ja507164a</citation>
+    </citations>
+</tool>
+
b
diff -r 000000000000 -r 3e5fdf933646 C and D finder/test-data/input1.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/C and D finder/test-data/input1.csv Fri May 25 10:56:10 2018 -0400
b
b'@@ -0,0 +1,251 @@\n+Substrates,Species,Reference,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,,,,,Phosphite,\r\n+,,sp|P13639|EF2_HUMAN,,,,K,E,D,L,Y,L,K,P,I,Q,R,,,,,KEDLYLKPIQR,PGKKEDLYLKPIQRT,\r\n+,,sp|O43390|HNRPR_HUMAN,,,,,,,D,Y,A,F,V,H,F,E,D,,,,DYAFVHFED,RVKKLKDYAFVHFED,\r\n+,,sp|P78347|GTF2I_HUMAN,,,,T,V,E,D,Y,F,C,F,C,Y,G,K,,,,TVEDYFCFCYGK,LRKTVEDYFCFCYGK,\r\n+,,sp|P62899|RL31_HUMAN,,,,,,,L,Y,T,L,V,T,Y,V,P,,,,LYTLVTYVP,EDSPNKLYTLVTYVP,\r\n+,,sp|Q08J23|NSUN2_HUMAN,,L,A,Q,E,G,I,Y,T,L,Y,P,F,I,N,,,,LAQEGIYTLYPFIN,RLAQEGIYTLYPFIN,\r\n+,,sp|P60842|IF4A1_HUMAN,,,,,D,Q,I,Y,D,I,F,Q,K,,,,,,DQIYDIFQK,RGFKDQIYDIFQKLN,\r\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN,T,V,P,I,Y,E,G,Y,A,L,P,H,A,I,L,,,,TVPIYEGYALPHAIL,TVPIYEGYALPHAIL,\r\n+,,sp|Q02790|FKBP4_HUMAN,,G,E,H,S,I,V,Y,L,K,P,S,Y,A,F,,,,GEHSIVYLKPSYAF,KGEHSIVYLKPSYAF,\r\n+,,sp|Q9NR30|DDX21_HUMAN,,,,,,S,T,Y,E,Q,V,D,L,I,G,,,,STYEQVDLIG,KKYMKSTYEQVDLIG,\r\n+,,sp|P31939|PUR9_HUMAN,,,,,N,G,N,Y,C,V,L,Q,M,D,Q,,,,NGNYCVLQMDQ,SKKKNGNYCVLQMDQ,\r\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN,,,,,K,D,L,Y,A,N,T,V,L,S,G,,,,KDLYANTVLSG,VDIRKDLYANTVLSG,\r\n+,,sp|P62805|H4_HUMAN,,,I,S,G,L,I,Y,E,E,T,R,,,,,,,ISGLIYEETR,KRISGLIYEETRGVL,\r\n+,,sp|P22234|PUR6_HUMAN,,,,,,E,V,Y,E,L,L,D,S,P,G,,,,EVYELLDSPG,EGKTKEVYELLDSPG,\r\n+,,sp|P30086|PEBP1_HUMAN,,,,,,,L,Y,E,Q,L,S,G,K,,,,,LYEQLSGK,DDYVPKLYEQLSGK,\r\n+,,sp|Q08945|SSRP1_HUMAN,,N,M,S,G,S,L,Y,E,M,V,S,R,,,,,,NMSGSLYEMVSR,KNMSGSLYEMVSRVM,\r\n+,,sp|Q9Y3I0|RTCB_HUMAN,G,M,A,A,A,G,N,Y,A,W,V,N,R,,,,,,GMAAAGNYAWVNR,GMAAAGNYAWVNRSS,\r\n+,,sp|P52272|HNRPM_HUMAN,N,E,C,G,H,V,L,Y,A,D,I,K,,,,,,,NECGHVLYADIK,NECGHVLYADIKMEN,\r\n+,,sp|O75369|FLNB_HUMAN,,,,,D,G,T,Y,A,V,T,Y,I,P,D,,,,DGTYAVTYIPD,HDNKDGTYAVTYIPD,\r\n+,,sp|P38919|IF4A3_HUMAN,,,,,E,Q,I,Y,D,V,Y,R,,,,,,,EQIYDVYR,KGFKEQIYDVYRYLP,\r\n+,,sp|P10809|CH60_HUMAN,,,G,Y,I,S,P,Y,F,I,N,T,S,K,,,,,GYISPYFINTSK,DRGYISPYFINTSKG,\r\n+,,sp|P61254|RL26_HUMAN,,,,K,Y,V,I,Y,I,E,R,,,,,,,,KYVIYIER,YRKKYVIYIERVQRE,\r\n+,,sp|P62805|H4_HUMAN,V,T,A,M,D,V,V,Y,A,L,K,,,,,,,,VTAMDVVYALK,VTAMDVVYALKRQGR,\r\n+,,sp|P60842|IF4A1_HUMAN,,G,F,K,D,Q,I,Y,D,I,F,Q,K,,,,,,GFKDQIYDIFQK,RGFKDQIYDIFQKLN,\r\n+,,sp|Q9BTM1|H2AJ_HUMAN; sp|Q99878|H2A1J_HUMAN; sp|Q96KK5|H2A1H_HUMAN; sp|Q6FI13|H2A2A_HUMAN; sp|Q16777|H2A2C_HUMAN; sp|P0C0S8|H2A1_HUMAN; sp|Q8IUE6|H2A2B_HUMAN,,,,,K,G,N,Y,A,E,R,,,,,,,,KGNYAER,RLLRKGNYAERVGAG,\r\n+,,sp|P52565|GDIR1_HUMAN,,,I,D,K,T,D,Y,M,V,G,S,Y,G,P,,,,IDKTDYMVGSYGP,VKIDKTDYMVGSYGP,\r\n+,,sp|P08865|RSSA_HUMAN,,,,S,D,G,I,Y,I,I,N,L,K,,,,,,SDGIYIINLK,KRKSDGIYIINLKRT,\r\n+,,sp|P54727|RD23B_HUMAN,,,,,A,V,E,Y,L,L,M,G,I,P,G,,,,AVEYLLMGIPG,NPDRAVEYLLMGIPG,\r\n+,,sp|O43390|HNRPR_HUMAN,,,,,L,K,D,Y,A,F,V,H,F,E,D,,,,LKDYAFVHFED,RVKKLKDYAFVHFED,\r\n+,,sp|Q02543|RL18A_HUMAN,,S,S,G,E,I,V,Y,C,G,Q,V,F,E,K,,,,SSGEIVYCGQVFEK,KSSGEIVYCGQVFEK,\r\n+,,sp|P12956|XRCC6_HUMAN,,,,,,N,I,Y,V,L,Q,E,L,D,N,,,,NIYVLQELDN,SVNFKNIYVLQELDN,\r\n+,,sp|Q13310|PABP4_HUMAN,,,,,,,G,Y,A,F,V,H,F,E,T,,,,GYAFVHFET,DENGSKGYAFVHFET,\r\n+,,sp|Q16881|TRXR1_HUMAN,,,,,S,Y,D,Y,D,L,I,I,I,G,G,,,,SYDYDLIIIGG,DLPKSYDYDLIIIGG,\r\n+,,sp|O43390|HNRPR_HUMAN,,,,,S,T,A,Y,E,D,Y,Y,Y,H,P,,,,STAYEDYYYHP,QASRSTAYEDYYYHP,\r\n+,,sp|P84090|ERH_HUMAN,,,,,,,I,Y,V,L,L,R,,,,,,,IYVLLR,DWIKEKIYVLLRRQA,\r\n+,,sp|Q9UHA4|LTOR3_HUMAN,,,S,I,I,C,Y,Y,N,T,Y,Q,V,V,Q,,,,SIICYYNTYQVVQ,NKSIICYYNTYQVVQ,\r\n+,,sp|Q9UHA4|LTOR3_HUMAN,,,,S,I,I,C,Y,Y,N,T,Y,Q,V,V,,,,SIICYYNTYQVV,KNKSIICYYNTYQVV,\r\n+,,sp|O76094|SRP72_HUMAN,,,,,,E,L,Y,G,Q,V,L,Y,R,,,,,ELYGQVLYR,TDKLKELYGQVLYRL,\r\n+,,sp|Q16778|H2B2E_HUMAN; sp|P23527|H2B1O_HUMAN; sp|P06899|H2B1J_HUMAN; sp|P33778|H2B1B_HUMAN; sp|Q8N257|H2B3B_HUMAN; sp|Q6DRA6|H2B2D_HUMAN; sp|Q6DN03|H2B2C_HUMAN,,K,E,S,Y,S,I,Y,V,Y,K,,,,,,,,KESYSIYVYK,RKESYSIYVYKVLKQ,RKESYSIYVYKVLKR\r\n+,,sp|P22626|ROA2_HUMAN,,,,,,N,Y,Y,E,Q,W,G,K,,,,,,NYYEQWGK,EESLRNYYEQWGKLT,\r\n+,,sp|O14979|HNRDL_HUMAN,,,,D,L,T,E,Y,L,S,R,,,,,,,,DLTEYLSR,SKKDLTEYLSRFGEV,\r\n+,,sp|P09211|GSTP1_HUMAN; sp|GSTP1_HUMAN,,,,,,,P,Y,T,V,V,Y,F,P,V,,,,PYTVVYFPV,MPPYTVVYFPV,PPYTVVYFPV\r\n+,,sp|Q13263|TIF1B_HUMAN,P,G,S,T,T,E,D,Y,N,L,I,V,I,E,R,,,,PGSTTEDYNLIVIER,PGSTTEDYNLIVIER,\r\n+,,sp|P07948|LYN_HUMAN,,,,,,,L,Y,A,V,V,T,R,,,,,,LYAVVTR,HDKLVRL'..b'_HUMAN,D,G,G,S,D,Q,N,Y,D,I,V,T,I,G,A,,,,DGGSDQNYDIVTIGA,DGGSDQNYDIVTIGA,\r\n+,,sp|P24928|RPB1_HUMAN,,,,,,E,L,Y,H,V,I,S,F,D,G,,,,ELYHVISFDG,KALERELYHVISFDG,\r\n+,,sp|Q8TD19|NEK9_HUMAN,,,,,,E,L,Y,T,W,V,N,M,Q,G,,,,ELYTWVNMQG,VTVEKELYTWVNMQG,\r\n+,,sp|P14618|KPYM_HUMAN,,E,A,E,A,A,I,Y,H,L,Q,L,F,E,E,,,,EAEAAIYHLQLFEE,REAEAAIYHLQLFEE,\r\n+,,sp|P25789|PSA4_HUMAN,Q,S,D,P,S,G,N,Y,G,G,W,K,,,,,,,QSDPSGNYGGWK,QSDPSGNYGGWKATC,\r\n+,,sp|P09211|GSTP1_HUMAN; sp|GSTP1_HUMAN,,,,,,P,P,Y,T,V,V,Y,F,P,V,,,,PPYTVVYFPV,MPPYTVVYFPV,PPYTVVYFPV\r\n+,,sp|O43390|HNRPR_HUMAN,,,,,,,G,Y,A,F,I,T,F,C,G,,,,GYAFITFCG,LSGQNRGYAFITFCG,\r\n+,,sp|Q92835|SHIP1_HUMAN,,,,,N,Q,N,Y,M,N,I,L,R,,,,,,NQNYMNILR,KLRRNQNYMNILRFL,\r\n+,,sp|Q14166|TTL12_HUMAN,,,,,,,,Y,I,V,L,L,R,,,,,,YIVLLR,KVKFDIRYIVLLRSV,\r\n+,,sp|P60174|TPIS_HUMAN,,,,V,V,L,A,Y,E,P,V,W,A,I,G,,,,VVLAYEPVWAIG,WSKVVLAYEPVWAIG,\r\n+,,sp|Q9UKK9|NUDT5_HUMAN,,,,,T,L,H,Y,E,C,I,V,L,V,K,,,,TLHYECIVLVK,VLQRTLHYECIVLVK,\r\n+,,sp|Q9Y277|VDAC3_HUMAN,,,,,V,C,N,Y,G,L,T,F,T,Q,K,,,,VCNYGLTFTQK,TKYKVCNYGLTFTQK,\r\n+,,sp|Q16666|IF16_HUMAN,Q,A,S,G,N,I,V,Y,G,V,F,M,L,H,K,,,,QASGNIVYGVFMLHK,QASGNIVYGVFMLHK,\r\n+,,sp|Q96BQ5|CC127_HUMAN,,,,,Q,N,I,Y,C,S,L,F,L,P,R,,,,QNIYCSLFLPR,LTERQNIYCSLFLPR,\r\n+,,sp|P50395|GDIB_HUMAN,D,Y,L,D,Q,P,C,Y,E,T,I,N,R,,,,,,DYLDQPCYETINR,DYLDQPCYETINRIK,\r\n+,,sp|P62805|H4_HUMAN,,,,D,A,V,T,Y,T,E,H,A,K,,,,,,DAVTYTEHAK,VIRDAVTYTEHAKRK,\r\n+,,sp|O75369|FLNB_HUMAN,H,D,N,K,D,G,T,Y,A,V,T,Y,I,P,D,,,,HDNKDGTYAVTYIPD,HDNKDGTYAVTYIPD,\r\n+,,sp|P13639|EF2_HUMAN,,,,,R,C,L,Y,A,S,V,L,T,A,Q,,,,RCLYASVLTAQ,PTARRCLYASVLTAQ,\r\n+,,sp|P60842|IF4A1_HUMAN; sp|P38919|IF4A3_HUMAN; sp|Q14240|IF4A2_HUMAN,,,,,,G,I,Y,A,Y,G,F,E,K,P,,,,GIYAYGFEKP,ESLLRGIYAYGFEKP,EDLLRGIYAYGFEKP\r\n+,,sp|P0DP24|CALM2_HUMAN,F,D,K,D,G,N,G,Y,I,S,A,A,E,L,R,,,,FDKDGNGYISAAELR,FDKDGNGYISAAELR,\r\n+,,sp|E9PAV3|NACAM_HUMAN; sp|Q13765|NACA_HUMAN,,S,P,A,S,D,T,Y,I,V,F,G,E,A,K,,,,SPASDTYIVFGEAK,KSPASDTYIVFGEAK,\r\n+,,sp|P23193|TCEA1_HUMAN,,,,,N,C,T,Y,T,Q,V,Q,T,R,,,,,NCTYTQVQTR,CKKKNCTYTQVQTRS,\r\n+,,sp|Q06830|PRDX1_HUMAN,,,T,I,A,Q,D,Y,G,V,L,K,,,,,,,TIAQDYGVLK,KRTIAQDYGVLKADE,\r\n+,,sp|Q99497|PARK7_HUMAN,,,,,E,G,P,Y,D,V,V,V,L,P,G,,,,EGPYDVVVLPG,DAKKEGPYDVVVLPG,\r\n+,,sp|P84098|RL19_HUMAN,,,,,,H,M,Y,H,S,L,Y,L,K,,,,,HMYHSLYLK,KKIDRHMYHSLYLKV,\r\n+,,sp|Q92544|TM9S4_HUMAN,,T,Q,L,P,Y,E,Y,Y,S,L,P,F,C,Q,,,,TQLPYEYYSLPFCQ,RTQLPYEYYSLPFCQ,\r\n+,,sp|P11940|PABP1_HUMAN; sp|Q13310|PABP4_HUMAN,,,,,S,L,G,Y,A,Y,V,N,F,Q,Q,,,,SLGYAYVNFQQ,ITRRSLGYAYVNFQQ,\r\n+,,sp|Q07020|RL18_HUMAN,,,,S,Q,D,I,Y,L,R,,,,,,,,,SQDIYLR,EPKSQDIYLRLLVKL,\r\n+,,sp|P46783|RS10_HUMAN,,,,,I,A,I,Y,E,L,L,F,K,,,,,,IAIYELLFK,KKNRIAIYELLFKEG,\r\n+,,sp|Q9NWQ8|PHAG1_HUMAN,,,,,E,N,D,Y,E,S,I,S,D,L,Q,,,,ENDYESISDLQ,LVPKENDYESISDLQ,\r\n+,,sp|P26641|EF1G_HUMAN,,,A,A,G,T,L,Y,T,Y,P,E,N,W,R,,,,AAGTLYTYPENWR,MAAGTLYTYPENWR,\r\n+,,sp|P62906|RL10A_HUMAN,,,,,D,T,L,Y,E,A,V,R,,,,,,,DTLYEAVR,KVSRDTLYEAVREVL,\r\n+,,sp|Q9HC35|EMAL4_HUMAN,I,I,N,Q,E,G,E,Y,I,K,M,F,M,R,,,,,IINQEGEYIKMFMR,IINQEGEYIKMFMRG,\r\n+,,sp|P04040|CATA_HUMAN; sp|CATA_HUMAN,,,,L,G,P,N,Y,L,H,I,P,V,N,C,,,,LGPNYLHIPVNC,RHRLGPNYLHIPVNC,\r\n+,,sp|P08238|HS90B_HUMAN,E,M,T,S,L,S,E,Y,V,S,R,,,,,,,,EMTSLSEYVSR,EMTSLSEYVSRMKET,\r\n+,,sp|P42224|STAT1_HUMAN,,,,,S,Q,W,Y,E,L,Q,Q,L,D,S,,,,SQWYELQQLDS,MSQWYELQQLDS,\r\n+,,sp|P19338|NUCL_HUMAN,,,,S,I,S,L,Y,Y,T,G,E,K,,,,,,SISLYYTGEK,DGRSISLYYTGEKGQ,\r\n+,,sp|P78527|PRKDC_HUMAN,,,,,,,F,Y,Q,G,F,L,F,S,E,,,,FYQGFLFSE,VFNELKFYQGFLFSE,\r\n+,,sp|Q08211|DHX9_HUMAN,A,H,N,N,M,T,N,Y,A,T,V,W,A,S,K,,,,AHNNMTNYATVWASK,AHNNMTNYATVWASK,\r\n+,,sp|O60841|IF2P_HUMAN,,,T,S,E,V,P,Y,A,G,I,N,I,G,P,,,,TSEVPYAGINIGP,LKTSEVPYAGINIGP,\r\n+,,sp|P63244|RACK1_HUMAN,,,,D,E,T,N,Y,G,I,P,Q,R,,,,,,DETNYGIPQR,LTRDETNYGIPQRAL,\r\n+,,sp|Q99880|H2B1L_HUMAN; sp|Q99877|H2B1N_HUMAN; sp|Q93079|H2B1H_HUMAN; sp|P62807|H2B1C_HUMAN; sp|P58876|H2B1D_HUMAN; sp|O60814|H2B1K_HUMAN; sp|Q99879|H2B1M_HUMAN; sp|Q5QNW6|H2B2F_HUMAN; sp|P57053|H2BFS_HUMAN,,,,,,E,S,Y,S,V,Y,V,Y,K,,,,,ESYSVYVYK,KRSRKESYSVYVYKV,\r\n+,,sp|P62263|RS14_HUMAN,D,R,D,E,S,S,P,Y,A,A,M,L,A,A,Q,,,,DRDESSPYAAMLAAQ,DRDESSPYAAMLAAQ,\r\n+,,sp|P42704|LPPRC_HUMAN,,,,,,A,L,Y,E,H,L,T,A,K,,,,,ALYEHLTAK,VTSAKALYEHLTAKN,\r\n'
b
diff -r 000000000000 -r 3e5fdf933646 C and D finder/test-data/input2.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/C and D finder/test-data/input2.csv Fri May 25 10:56:10 2018 -0400
b
b'@@ -0,0 +1,36 @@\n+Amino Acids,sp|Q14204|DYHC1_HUMAN,sp|A5A3E0|POTEF_HUMAN,sp|P62269|RS18_HUMAN,sp|E9PAV3|NACAM_HUMAN,sp|P07195|LDHB_HUMAN,sp|Q9NR30|DDX21_HUMAN,sp|P26599|PTBP1_HUMAN,sp|Q14839|CHD4_HUMAN,sp|P78406|RAE1L_HUMAN,sp|Q9BQE3|TBA1C_HUMAN,sp|Q9NWQ8|PHAG1_HUMAN,sp|Q8WUM0|NU133_HUMAN,sp|P08238|HS90B_HUMAN,sp|P31939|PUR9_HUMAN,sp|P60174|TPIS_HUMAN,sp|P23193|TCEA1_HUMAN,sp|Q8TDN6|BRX1_HUMAN,sp|P62750|RL23A_HUMAN,sp|P11940|PABP1_HUMAN,sp|Q9UM73|ALK_HUMAN,sp|P62807|H2B1C_HUMAN,sp|P36897|TGFR1_HUMAN,sp|Q6S8J3|POTEE_HUMAN,sp|P10253|LYAG_HUMAN,sp|P52272|HNRPM_HUMAN,sp|Q9BTM1|H2AJ_HUMAN,sp|Q08211|DHX9_HUMAN,sp|P22234|PUR6_HUMAN,sp|P57053|H2BFS_HUMAN,sp|P10809|CH60_HUMAN,sp|P55884|EIF3B_HUMAN,sp|Q9H8H2|DDX31_HUMAN,sp|Q07020|RL18_HUMAN,sp|P42229|STA5A_HUMAN,sp|Q9NQ50|RM40_HUMAN,sp|P07948|LYN_HUMAN,sp|P60891|PRPS1_HUMAN,sp|Q9Y3I0|RTCB_HUMAN,sp|P60842|IF4A1_HUMAN,sp|P62917|RL8_HUMAN,sp|P63010|AP2B1_HUMAN,sp|Q9GZY6|NTAL_HUMAN,sp|P62979|RS27A_HUMAN,sp|P22626|ROA2_HUMAN,sp|P00558|PGK1_HUMAN,sp|Q5VTE0|EF1A3_HUMAN,sp|Q71U36|TBA1A_HUMAN,sp|P63244|RACK1_HUMAN,sp|P04040|CATA_HUMAN,sp|Q99497|PARK7_HUMAN,sp|P25789|PSA4_HUMAN,sp|Q16778|H2B2E_HUMAN,sp|P48444|COPD_HUMAN,sp|Q02790|FKBP4_HUMAN,sp|P78527|PRKDC_HUMAN,sp|P11310|ACADM_HUMAN,sp|P54136|SYRC_HUMAN,sp|CATA_HUMAN,sp|Q14697|GANAB_HUMAN,sp|GSTP1_HUMAN,sp|O60814|H2B1K_HUMAN,sp|Q9BXJ9|NAA15_HUMAN,sp|P62906|RL10A_HUMAN,sp|Q6FI13|H2A2A_HUMAN,sp|P18124|RL7_HUMAN,sp|P19105|ML12A_HUMAN,sp|O60841|IF2P_HUMAN,sp|Q99878|H2A1J_HUMAN,sp|Q96BQ5|CC127_HUMAN,sp|P23528|COF1_HUMAN,sp|Q07666|KHDR1_HUMAN,sp|P60709|ACTB_HUMAN,sp|P30086|PEBP1_HUMAN,sp|Q13263|TIF1B_HUMAN,sp|Q9UGN4|CLM8_HUMAN,sp|P54727|RD23B_HUMAN,sp|P38919|IF4A3_HUMAN,sp|Q13310|PABP4_HUMAN,sp|P55060|XPO2_HUMAN,sp|Q14699|RFTN1_HUMAN,sp|Q8IUE6|H2A2B_HUMAN,sp|P78347|GTF2I_HUMAN,sp|P39656|OST48_HUMAN,sp|P62987|RL40_HUMAN,sp|Q16881|TRXR1_HUMAN,sp|Q6DN03|H2B2C_HUMAN,sp|Q08J23|NSUN2_HUMAN,sp|Q9BWF3|RBM4_HUMAN,sp|Q9UIB8|SLAF5_HUMAN,sp|Q01780|EXOSX_HUMAN,sp|P19338|NUCL_HUMAN,sp|O75503|CLN5_HUMAN,sp|P14618|KPYM_HUMAN,sp|Q13595|TRA2A_HUMAN,sp|P68363|TBA1B_HUMAN,sp|Q14240|IF4A2_HUMAN,sp|P36896|ACV1B_HUMAN,sp|Q9HC35|EMAL4_HUMAN,sp|Q02880|TOP2B_HUMAN,sp|P50395|GDIB_HUMAN,sp|O75369|FLNB_HUMAN,sp|P47756|CAPZB_HUMAN,sp|O43390|HNRPR_HUMAN,sp|Q5R372|RBG1L_HUMAN,sp|P07900|HS90A_HUMAN,sp|P14625|ENPL_HUMAN,sp|P06733|ENOA_HUMAN,sp|P68104|EF1A1_HUMAN,sp|Q99880|H2B1L_HUMAN,sp|Q2M2I8|AAK1_HUMAN,sp|P26641|EF1G_HUMAN,sp|Q15084|PDIA6_HUMAN,sp|Q96PE3|INP4A_HUMAN,sp|Q14166|TTL12_HUMAN,sp|Q92835|SHIP1_HUMAN,sp|Q16531|DDB1_HUMAN,sp|Q5JPE7|NOMO2_HUMAN,sp|P61604|CH10_HUMAN,sp|P0CG39|POTEJ_HUMAN,sp|Q9BQ04|RBM4B_HUMAN,sp|P62995|TRA2B_HUMAN,sp|P52788|SPSY_HUMAN,sp|P58876|H2B1D_HUMAN,sp|Q9BUQ8|DDX23_HUMAN,sp|P23921|RIR1_HUMAN,sp|O76094|SRP72_HUMAN,sp|Q14152|EIF3A_HUMAN,sp|Q13765|NACA_HUMAN,sp|Q13347|EIF3I_HUMAN,sp|O14602|IF1AY_HUMAN,sp|P42224|STAT1_HUMAN,sp|Q9Y2X3|NOP58_HUMAN,sp|P62195|PRS8_HUMAN,sp|P53396|ACLY_HUMAN,sp|O75368|SH3L1_HUMAN,sp|Q93079|H2B1H_HUMAN,sp|P62258|1433E_HUMAN,sp|Q8NER5|ACV1C_HUMAN,sp|Q9UHA4|LTOR3_HUMAN,sp|Q96KK5|H2A1H_HUMAN,sp|P11908|PRPS2_HUMAN,sp|P62829|RL23_HUMAN,sp|P12956|XRCC6_HUMAN,sp|P11021|BIP_HUMAN,sp|P63173|RL38_HUMAN,sp|P06899|H2B1J_HUMAN,sp|O60506|HNRPQ_HUMAN,sp|Q5QNW6|H2B2F_HUMAN,sp|Q9UKV8|AGO2_HUMAN,sp|P63261|ACTG_HUMAN,sp|P0CG38|POTEI_HUMAN,sp|P15144|AMPN_HUMAN,sp|P68366|TBA4A_HUMAN,sp|P62263|RS14_HUMAN,sp|P13639|EF2_HUMAN,sp|P0CG47|UBB_HUMAN,sp|Q15155|NOMO1_HUMAN,sp|Q9UKK9|NUDT5_HUMAN,sp|P23527|H2B1O_HUMAN,sp|Q15459|SF3A1_HUMAN,sp|P13796|PLSL_HUMAN,sp|Q6DRA6|H2B2D_HUMAN,sp|Q8TD19|NEK9_HUMAN,sp|P62805|H4_HUMAN,sp|Q9UMX5|NENF_HUMAN,sp|P24928|RPB1_HUMAN,sp|Q8N257|H2B3B_HUMAN,sp|P11586|C1TC_HUMAN,sp|O15144|ARPC2_HUMAN,sp|O14950|ML12B_HUMAN,sp|P61254|RL26_HUMAN,sp|P52565|GDIR1_HUMAN,sp|P00338|LDHA_HUMAN,sp|P25705|ATPA_HUMAN,sp|P09211|GSTP1_HUMAN,sp|P0DP24|CALM2_HUMAN,sp|Q06830|PRDX1_HUMAN,sp|P62249|RS16_HUMAN,sp|Q16666|IF16_HUMAN,sp|P33778|H2B1B_HUMAN,sp|Q7KZF4|SND1_HUMAN,sp|P0C0S8|H2A1_HUMAN,sp|Q969V3|NCLN_HUMAN,sp|P08865|RSSA_HUMAN,sp|P61158|ARP3_HU'..b'5465116,8.551068884,8.939393939,9.315589354,11.44067797,9.090909091,16.66666667,8.198614319,9.677419355,6.923076923,5.241935484,11.11111111,9.836065574,7.8125,7.692307692,11.44578313,10.60948081,13.6,12.8342246,14.01197605,17.05685619,17.3594132,12.16545012,10.0931677,11.32852729,14.01384083,6.923076923,12.3246493,13.81578947,9.375,10.93990755,16.58031088,11.2125163,13.46153846,17.68115942,10.96045198,10.42253521,10.05586592,10.73446328,21.9858156,11.52993348,10.56511057,10.69306931,16.71763507,12.48462485,11.46067416,13.29746349,12.27436823,7.898894155,13.86503067,11.61202186,12.95143213,10.13824885,11.25541126,18.25396825,14.77627471,11.67048055,11.13636364,14.53428864,8.850931677,16.14802355,13.59649123,14.91712707,8.823529412,12.62042389,13.64902507,21.18055556,10.10928962,18.25396825,9.390243902,12.12121212,12.07153502,7.742402315,16.74418605,14.15384615,4.166666667,11.46666667,10.77504726,8.128078818,11.26248865,8.771929825,17.46031746,9.411764706,11.96754564,12.90322581,7.03125,11.63522013,8.571428571,11.65845649,11.77370031,8.571428571,16.66666667,8.025682183,17.46031746,11.40861467,13.6,12.8372093,14.68459152,11.16071429,12.58278146,9.906759907,13.10043668,14.89361702,11.87214612,17.46031746,9.457755359,8.931419458,15.85365854,13.38100102,8.737864078,9.302325581,18.98477157,18.25396825,11.3368984,11.66666667,11.04651163,9.655172414,12.25490196,11.44578313,11.93490054,9.047619048,10.73825503,11.05527638,7.534246575,16.68789809,18.25396825,10.65934066,6.923076923,11.90053286,12.88135593,10.52631579,12.42236025,13.73056995,18.25396825,9.134615385,8.108108108,11.93181818,9.6,17.18983558,7.142857143,13.13868613,10.40172166,8.673469388,15.55555556,4.166666667,11.53846154,7.751937984,12.69035533,16.66666667,11.47540984,14.66854725,12.26190476,14.89361702,10.97256858,16.96113074,6.944444444,11.83800623,6.666666667,11.82795699,11.9047619\r\n+,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\r\n+Number of Y,128,28,3,10,7,12,10,48,10,19,10,38,24,18,5,5,8,5,19,37,5,13,29,31,5,3,48,7,5,7,19,12,5,19,5,24,5,13,12,6,29,10,6,22,4,12,19,6,21,3,12,5,10,16,118,15,22,21,36,12,5,41,6,3,11,4,16,3,9,6,22,15,10,10,7,7,13,18,29,12,3,24,19,3,24,4,16,27,16,25,8,18,9,22,19,12,19,24,46,26,89,10,40,30,25,24,11,12,5,15,14,11,22,20,31,35,46,3,28,28,23,19,5,16,39,20,24,2,11,4,18,14,7,40,5,5,12,13,4,3,5,3,21,13,2,5,36,5,28,15,28,40,19,1,21,3,45,7,5,16,17,4,29,4,7,85,5,16,12,4,6,9,8,16,12,2,5,8,14,5,28,3,19,7,18,13,17,5,10,15,8,4,47,3,9,48,3,2,4,8,3,11,5,11,22,23,45,10,12,8,37,7,14,24\r\n+Number of pY,1,2,1,1,1,2,2,1,1,3,1,1,4,2,1,1,1,2,6,2,2,1,3,1,1,1,2,2,2,1,1,1,1,1,1,1,1,1,3,1,2,1,1,1,1,4,3,1,1,1,1,2,1,1,2,1,1,1,1,4,2,1,1,1,1,1,1,1,1,2,1,10,2,2,1,1,2,5,1,2,1,1,1,1,1,2,1,1,1,1,3,1,1,1,3,3,1,2,1,3,2,1,6,1,3,1,3,4,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,2,1,2,1,1,3,1,1,1,1,2,2,2,3,2,1,10,1,1,3,1,3,3,1,1,2,1,1,2,1,5,1,1,2,1,1,1,2,2,2,1,5,1,1,1,1,2,1,1,1,1,2,1,1,2,1,1,1,1,2,1,9,1,1,1,1,1,1,1,2,1,1,2,1,1,1,1,2,1,1,1\r\n+Total AAs,4646,1075,152,2078,334,783,531,1912,368,449,432,1156,724,592,286,301,353,156,636,1620,126,503,1075,952,730,129,1270,425,126,573,814,851,188,794,206,512,318,505,406,257,937,243,156,353,417,462,451,317,527,189,261,126,511,459,4128,421,660,526,944,209,126,866,217,130,248,171,1220,128,260,166,443,375,187,835,299,409,411,644,971,578,130,998,456,128,649,193,767,364,345,885,710,358,531,282,451,407,505,981,1626,445,2602,277,633,815,732,803,434,462,126,961,437,440,977,644,1189,1140,1267,102,1038,359,288,366,126,820,792,671,1382,215,325,144,750,529,406,1101,114,126,255,493,124,128,318,140,609,654,70,126,623,126,859,375,1075,967,448,151,858,229,1222,219,126,793,627,164,979,103,172,1970,126,935,300,172,145,204,332,553,210,149,199,146,785,126,910,130,563,295,418,483,386,126,208,592,176,125,669,56,685,1394,196,135,144,104,129,788,126,427,709,840,1222,401,283,216,642,165,558,420\r\n'
b
diff -r 000000000000 -r 3e5fdf933646 C and D finder/test-data/input3.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/C and D finder/test-data/input3.csv Fri May 25 10:56:10 2018 -0400
b
b'@@ -0,0 +1,36 @@\n+Amino Acids,sp|Q14204|DYHC1_HUMAN,sp|P62269|RS18_HUMAN,sp|E9PAV3|NACAM_HUMAN,sp|P07195|LDHB_HUMAN,sp|P26599|PTBP1_HUMAN,sp|Q9NR30|DDX21_HUMAN,sp|Q14839|CHD4_HUMAN,sp|P78406|RAE1L_HUMAN,sp|Q9NYV4|CDK12_HUMAN,sp|Q9BQE3|TBA1C_HUMAN,sp|Q9NWQ8|PHAG1_HUMAN,sp|P42167|LAP2B_HUMAN,sp|P08238|HS90B_HUMAN,sp|P60174|TPIS_HUMAN,sp|P31939|PUR9_HUMAN,sp|P23193|TCEA1_HUMAN,sp|Q8TDN6|BRX1_HUMAN,sp|P62750|RL23A_HUMAN,sp|P11940|PABP1_HUMAN,sp|Q9UM73|ALK_HUMAN,sp|P62807|H2B1C_HUMAN,sp|P52272|HNRPM_HUMAN,sp|P10253|LYAG_HUMAN,sp|Q9BTM1|H2AJ_HUMAN,sp|Q08211|DHX9_HUMAN,sp|P22234|PUR6_HUMAN,sp|Q15648|MED1_HUMAN,sp|P57053|H2BFS_HUMAN,sp|P55884|EIF3B_HUMAN,sp|Q969T4|UB2E3_HUMAN,sp|Q9H8H2|DDX31_HUMAN,sp|Q07020|RL18_HUMAN,sp|Q9NQ50|RM40_HUMAN,sp|Q96LR5|UB2E2_HUMAN,sp|Q9Y3I0|RTCB_HUMAN,sp|P60842|IF4A1_HUMAN,sp|P62917|RL8_HUMAN,sp|P63010|AP2B1_HUMAN,sp|P35268|RL22_HUMAN,sp|Q5VTE0|EF1A3_HUMAN,sp|Q71U36|TBA1A_HUMAN,sp|P63244|RACK1_HUMAN,sp|Q13200|PSMD2_HUMAN,sp|Q99497|PARK7_HUMAN,sp|P25789|PSA4_HUMAN,sp|P24941|CDK2_HUMAN,sp|Q16778|H2B2E_HUMAN,sp|Q02790|FKBP4_HUMAN,sp|P42166|LAP2A_HUMAN,sp|P78527|PRKDC_HUMAN,sp|Q9NQS7|INCE_HUMAN,sp|Q8NE71|ABCF1_HUMAN,sp|P05141|ADT2_HUMAN,sp|P54136|SYRC_HUMAN,sp|Q14697|GANAB_HUMAN,sp|GSTP1_HUMAN,sp|O60814|H2B1K_HUMAN,sp|P62906|RL10A_HUMAN,sp|Q6FI13|H2A2A_HUMAN,sp|P19105|ML12A_HUMAN,sp|O60841|IF2P_HUMAN,sp|Q99878|H2A1J_HUMAN,sp|P23528|COF1_HUMAN,sp|Q07666|KHDR1_HUMAN,sp|Q9Y617|SERC_HUMAN,sp|P60709|ACTB_HUMAN,sp|P30086|PEBP1_HUMAN,sp|Q13263|TIF1B_HUMAN,sp|Q9UGN4|CLM8_HUMAN,sp|P54727|RD23B_HUMAN,sp|P38919|IF4A3_HUMAN,sp|P55060|XPO2_HUMAN,sp|Q8IUE6|H2A2B_HUMAN,sp|P78347|GTF2I_HUMAN,sp|P39656|OST48_HUMAN,sp|Q16881|TRXR1_HUMAN,sp|Q9Y490|TLN1_HUMAN,sp|Q01130|SRSF2_HUMAN,sp|Q6DN03|H2B2C_HUMAN,sp|Q9BWF3|RBM4_HUMAN,sp|Q9UIB8|SLAF5_HUMAN,sp|Q01780|EXOSX_HUMAN,sp|P19338|NUCL_HUMAN,sp|O00571|DDX3X_HUMAN,sp|O75503|CLN5_HUMAN,sp|P14618|KPYM_HUMAN,sp|P68363|TBA1B_HUMAN,sp|Q14240|IF4A2_HUMAN,sp|Q9HC35|EMAL4_HUMAN,sp|Q02880|TOP2B_HUMAN,sp|P11142|HSP7C_HUMAN,sp|Q9UKM9|RALY_HUMAN,sp|P50395|GDIB_HUMAN,sp|O75369|FLNB_HUMAN,sp|P47756|CAPZB_HUMAN,sp|Q96AE4|FUBP1_HUMAN,sp|O43390|HNRPR_HUMAN,sp|Q5R372|RBG1L_HUMAN,sp|P07900|HS90A_HUMAN,sp|P14625|ENPL_HUMAN,sp|P06733|ENOA_HUMAN,sp|P68104|EF1A1_HUMAN,sp|Q99880|H2B1L_HUMAN,sp|Q2M2I8|AAK1_HUMAN,sp|Q92835|SHIP1_HUMAN,sp|Q16531|DDB1_HUMAN,sp|Q5JPE7|NOMO2_HUMAN,sp|P61604|CH10_HUMAN,sp|Q9BQ04|RBM4B_HUMAN,sp|Q16629|SRSF7_HUMAN,sp|P58876|H2B1D_HUMAN,sp|Q9BUQ8|DDX23_HUMAN,sp|P06493|CDK1_HUMAN,sp|P23921|RIR1_HUMAN,sp|O76094|SRP72_HUMAN,sp|P62847|RS24_HUMAN,sp|Q13765|NACA_HUMAN,sp|Q14152|EIF3A_HUMAN,sp|Q13347|EIF3I_HUMAN,sp|P33992|MCM5_HUMAN,sp|Q9Y2X3|NOP58_HUMAN,sp|P53396|ACLY_HUMAN,sp|O75368|SH3L1_HUMAN,sp|Q93079|H2B1H_HUMAN,sp|Q9UHA4|LTOR3_HUMAN,sp|Q96KK5|H2A1H_HUMAN,sp|P12956|XRCC6_HUMAN,sp|P11021|BIP_HUMAN,sp|P63173|RL38_HUMAN,sp|P06899|H2B1J_HUMAN,sp|P21281|VATB2_HUMAN,sp|O60506|HNRPQ_HUMAN,sp|Q5QNW6|H2B2F_HUMAN,sp|Q9UKV8|AGO2_HUMAN,sp|P63261|ACTG_HUMAN,sp|O75306|NDUS2_HUMAN,sp|P57764|GSDMD_HUMAN,sp|P17844|DDX5_HUMAN,sp|P62263|RS14_HUMAN,sp|O15523|DDX3Y_HUMAN,sp|P13639|EF2_HUMAN,sp|Q15155|NOMO1_HUMAN,sp|Q9UKK9|NUDT5_HUMAN,sp|P23527|H2B1O_HUMAN,sp|Q00526|CDK3_HUMAN,sp|P13796|PLSL_HUMAN,sp|Q6DRA6|H2B2D_HUMAN,sp|P62805|H4_HUMAN,sp|Q9BY32|ITPA_HUMAN,sp|Q8N257|H2B3B_HUMAN,sp|P11586|C1TC_HUMAN,sp|P09429|HMGB1_HUMAN,sp|Q9H299|SH3L3_HUMAN,sp|O14950|ML12B_HUMAN,sp|P61254|RL26_HUMAN,sp|P52565|GDIR1_HUMAN,sp|P26583|HMGB2_HUMAN,sp|P00338|LDHA_HUMAN,sp|P25705|ATPA_HUMAN,sp|P09211|GSTP1_HUMAN,sp|P0DP24|CALM2_HUMAN,sp|Q06830|PRDX1_HUMAN,sp|P62249|RS16_HUMAN,sp|Q96JH7|VCIP1_HUMAN,sp|P49327|FAS_HUMAN,sp|O43242|PSMD3_HUMAN,sp|Q9Y5S9|RBM8A_HUMAN,sp|O95602|RPA1_HUMAN,sp|P33778|H2B1B_HUMAN,sp|P02786|TFR1_HUMAN,sp|P0C0S8|H2A1_HUMAN,sp|Q969V3|NCLN_HUMAN,sp|P08865|RSSA_HUMAN,sp|P61158|ARP3_HUMAN,sp|P26196|DDX6_HUMAN,sp|P31483|TIA1_HUMAN,sp|Q99877|H2B1N_HUMAN,sp|P27824|CALX_HUMAN,sp|P62899|RL31_HUMAN,sp|Q02543|RL18A_HUMAN,sp|Q96PK6|RBM14_HUMAN,sp|P62273|RS29_HUMAN,sp|P84090|ERH_HUMAN,sp|P84098|RL19_HUMAN,sp|Q16777|H2A'..b'507,15.39365452,12.23404255,10.67961165,17.91044776,8.316831683,11.8226601,7.392996109,11.09925293,9.375,11.03896104,11.30820399,19.24290221,10.46255507,8.994708995,10.34482759,10.40268456,17.46031746,10.02178649,17.86743516,12.35465116,12.96296296,8.047337278,9.731543624,8.939393939,11.44067797,9.090909091,16.66666667,9.677419355,6.923076923,11.11111111,9.836065574,7.8125,11.44578313,10.60948081,10,13.6,12.8342246,14.01197605,17.05685619,17.3594132,12.16545012,11.32852729,6.923076923,12.3246493,13.81578947,10.93990755,13.14443133,24.88687783,16.58031088,13.46153846,17.68115942,10.96045198,10.42253521,14.95468278,10.05586592,10.73446328,11.52993348,10.56511057,16.71763507,12.48462485,12.69349845,13.39869281,11.46067416,13.29746349,12.27436823,7.919254658,7.898894155,13.86503067,11.61202186,12.95143213,10.13824885,11.25541126,18.25396825,14.77627471,16.14802355,13.59649123,14.91712707,8.823529412,13.64902507,21.8487395,18.25396825,9.390243902,11.44781145,12.12121212,12.07153502,11.27819549,16.74418605,7.742402315,14.15384615,11.98910082,10.77504726,11.26248865,8.771929825,17.46031746,12.90322581,7.03125,11.65845649,11.77370031,8.571428571,16.66666667,11.15459883,8.025682183,17.46031746,11.40861467,13.6,7.991360691,13.63636364,11.07491857,12.58278146,13.33333333,9.906759907,14.89361702,11.87214612,17.46031746,10.81967213,8.931419458,15.85365854,8.737864078,7.731958763,18.25396825,11.3368984,7.441860465,12.90322581,11.04651163,9.655172414,12.25490196,10.04784689,11.44578313,11.93490054,9.047619048,10.73825503,11.05527638,7.534246575,13.99345336,12.5049781,10.6741573,7.471264368,11.10465116,18.25396825,13.15789474,6.923076923,11.90053286,12.88135593,10.52631579,12.42236025,13.73056995,18.25396825,8.108108108,9.6,11.93181818,17.18983558,7.142857143,11.53846154,8.673469388,7.751937984,10.13986014,8.965517241,11.92893401,12.69035533,16.66666667,11.47540984,22.27979275,14.66854725,12.26190476,16.67820069,14.89361702,6.944444444,16.96113074,11.83800623,11.82795699,11.9047619\r\n+,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\r\n+Number of Y,128,3,10,7,10,12,48,10,35,19,10,10,24,5,18,5,8,5,19,37,5,5,31,3,48,7,26,5,19,6,12,5,5,6,13,12,6,29,5,12,19,6,26,3,12,10,5,16,13,118,10,18,14,22,36,12,5,6,3,4,16,3,6,22,13,15,10,10,7,7,13,29,3,24,19,24,29,9,4,27,16,25,8,24,18,9,19,12,24,46,15,10,26,89,10,23,40,30,25,24,11,12,5,15,31,35,46,3,28,11,5,16,14,39,20,4,2,24,11,19,14,40,5,5,4,3,21,13,2,5,18,36,5,28,15,19,7,25,1,25,21,45,7,5,12,17,4,4,6,5,16,7,4,4,6,9,6,8,16,12,2,5,8,28,55,21,6,53,5,28,3,19,7,18,13,17,5,15,4,8,47,3,8,3,3,19,6,7,11,5,11,6,22,23,32,45,8,12,37,14,24\r\n+Number of pY,1,1,1,1,2,2,1,1,1,3,1,1,4,1,1,1,1,2,2,2,2,1,1,1,1,2,1,2,1,1,1,1,1,1,1,3,1,2,1,3,3,1,1,2,1,1,2,1,1,2,1,1,1,1,1,4,2,1,1,1,1,1,3,1,2,9,1,1,1,1,1,1,1,2,1,2,1,1,2,1,1,1,3,1,1,1,3,3,2,2,1,1,3,1,1,1,5,1,3,1,3,3,2,1,1,1,1,1,1,1,2,1,1,3,1,1,1,1,1,1,1,1,1,2,1,1,1,3,2,2,1,3,2,1,9,2,1,1,1,1,2,1,1,2,1,1,2,4,1,2,1,1,1,1,1,2,1,2,1,5,1,1,1,1,1,1,1,1,2,1,1,1,2,2,1,1,2,1,3,1,1,1,1,1,1,1,1,1,1,2,1,1,1,2,1,1,1,1,2,1,1\r\n+Total AAs,4646,152,2078,334,531,783,1912,368,1490,449,432,454,724,286,592,301,353,156,636,1620,126,730,952,129,1270,425,1581,126,814,207,851,188,206,201,505,406,257,937,128,462,451,317,908,189,261,298,126,459,694,4128,918,845,298,660,944,209,126,217,130,171,1220,128,166,443,370,375,187,835,299,409,411,971,130,998,456,649,2541,221,193,364,345,885,710,662,358,531,451,407,981,1626,646,306,445,2602,277,644,633,815,732,803,434,462,126,961,1189,1140,1267,102,359,238,126,820,297,792,671,133,215,1382,325,734,529,1101,114,126,124,128,609,654,70,126,511,623,126,859,375,463,484,614,151,660,858,1222,219,126,305,627,164,103,194,126,935,215,93,172,145,204,209,332,553,210,149,199,146,1222,2511,534,174,1720,126,760,130,563,295,418,483,386,126,592,125,176,669,56,104,196,129,858,145,394,788,126,427,193,709,840,1445,1222,216,283,642,558,420\r\n'
b
diff -r 000000000000 -r 3e5fdf933646 C and D finder/test-data/input4.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/C and D finder/test-data/input4.csv Fri May 25 10:56:10 2018 -0400
b
b'@@ -0,0 +1,235 @@\n+Substrates,Species,Reference,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,,,,,Phosphite,,\r\n+,,sp|Q8NE71|ABCF1_HUMAN,,,,,,G,N,Y,M,T,F,K,,,,,,,GNYMTFK,LHYYRGNYMTFKKMY,,\r\n+,,sp|Q00526|CDK3_HUMAN; sp|P24941|CDK2_HUMAN; sp|P06493|CDK1_HUMAN,,,I,G,E,G,T,Y,G,V,V,Y,K,,,,,,IGEGTYGVVYK,EKIGEGTYGVVYKAK,EKIGEGTYGVVYKAR,EKIGEGTYGVVYKGR\r\n+,,sp|P13639|EF2_HUMAN,,,,K,E,D,L,Y,L,K,P,I,Q,R,,,,,KEDLYLKPIQR,PGKKEDLYLKPIQRT,,\r\n+,,sp|O43390|HNRPR_HUMAN,,,,,,,D,Y,A,F,V,H,F,E,D,,,,DYAFVHFED,RVKKLKDYAFVHFED,,\r\n+,,sp|P62899|RL31_HUMAN,,,,,,,L,Y,T,L,V,T,Y,V,P,,,,LYTLVTYVP,EDSPNKLYTLVTYVP,,\r\n+,,sp|P78347|GTF2I_HUMAN,,,,T,V,E,D,Y,F,C,F,C,Y,G,K,,,,TVEDYFCFCYGK,LRKTVEDYFCFCYGK,,\r\n+,,sp|P60842|IF4A1_HUMAN,,,,,D,Q,I,Y,D,I,F,Q,K,,,,,,DQIYDIFQK,RGFKDQIYDIFQKLN,,\r\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN,T,V,P,I,Y,E,G,Y,A,L,P,H,A,I,L,,,,TVPIYEGYALPHAIL,TVPIYEGYALPHAIL,,\r\n+,,sp|Q02790|FKBP4_HUMAN,,G,E,H,S,I,V,Y,L,K,P,S,Y,A,F,,,,GEHSIVYLKPSYAF,KGEHSIVYLKPSYAF,,\r\n+,,sp|Q9NR30|DDX21_HUMAN,,,,,,S,T,Y,E,Q,V,D,L,I,G,,,,STYEQVDLIG,KKYMKSTYEQVDLIG,,\r\n+,,sp|P31939|PUR9_HUMAN,,,,,N,G,N,Y,C,V,L,Q,M,D,Q,,,,NGNYCVLQMDQ,SKKKNGNYCVLQMDQ,,\r\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN,,,,,K,D,L,Y,A,N,T,V,L,S,G,,,,KDLYANTVLSG,VDIRKDLYANTVLSG,,\r\n+,,sp|P62805|H4_HUMAN,,,I,S,G,L,I,Y,E,E,T,R,,,,,,,ISGLIYEETR,KRISGLIYEETRGVL,,\r\n+,,sp|P22234|PUR6_HUMAN,,,,,,E,V,Y,E,L,L,D,S,P,G,,,,EVYELLDSPG,EGKTKEVYELLDSPG,,\r\n+,,sp|P30086|PEBP1_HUMAN,,,,,,,L,Y,E,Q,L,S,G,K,,,,,LYEQLSGK,DDYVPKLYEQLSGK,,\r\n+,,sp|Q08945|SSRP1_HUMAN,,N,M,S,G,S,L,Y,E,M,V,S,R,,,,,,NMSGSLYEMVSR,KNMSGSLYEMVSRVM,,\r\n+,,sp|Q9Y3I0|RTCB_HUMAN,G,M,A,A,A,G,N,Y,A,W,V,N,R,,,,,,GMAAAGNYAWVNR,GMAAAGNYAWVNRSS,,\r\n+,,sp|Q96AE4|FUBP1_HUMAN,,,Q,Q,A,A,Y,Y,A,Q,T,S,P,Q,G,,,,QQAAYYAQTSPQG,YRQQAAYYAQTSPQG,,\r\n+,,sp|P62899|RL31_HUMAN,E,D,S,P,N,K,L,Y,T,L,V,T,Y,V,P,,,,EDSPNKLYTLVTYVP,EDSPNKLYTLVTYVP,,\r\n+,,sp|P52272|HNRPM_HUMAN,N,E,C,G,H,V,L,Y,A,D,I,K,,,,,,,NECGHVLYADIK,NECGHVLYADIKMEN,,\r\n+,,sp|O75369|FLNB_HUMAN,,,,,D,G,T,Y,A,V,T,Y,I,P,D,,,,DGTYAVTYIPD,HDNKDGTYAVTYIPD,,\r\n+,,sp|P57764|GSDMD_HUMAN,,S,R,G,D,N,V,Y,V,V,T,E,V,L,Q,,,,SRGDNVYVVTEVLQ,RSRGDNVYVVTEVLQ,,\r\n+,,sp|P62805|H4_HUMAN,V,T,A,M,D,V,V,Y,A,L,K,,,,,,,,VTAMDVVYALK,VTAMDVVYALKRQGR,,\r\n+,,sp|Q9NYV4|CDK12_HUMAN,R,Q,S,V,S,P,P,Y,K,E,P,S,A,Y,Q,,,,RQSVSPPYKEPSAYQ,RQSVSPPYKEPSAYQ,,\r\n+,,sp|P60842|IF4A1_HUMAN,,G,F,K,D,Q,I,Y,D,I,F,Q,K,,,,,,GFKDQIYDIFQK,RGFKDQIYDIFQKLN,,\r\n+,,sp|Q9BTM1|H2AJ_HUMAN; sp|Q99878|H2A1J_HUMAN; sp|Q96KK5|H2A1H_HUMAN; sp|Q6FI13|H2A2A_HUMAN; sp|Q16777|H2A2C_HUMAN; sp|P0C0S8|H2A1_HUMAN; sp|Q8IUE6|H2A2B_HUMAN,,,,,K,G,N,Y,A,E,R,,,,,,,,KGNYAER,RLLRKGNYAERVGAG,,\r\n+,,sp|P52565|GDIR1_HUMAN,,,I,D,K,T,D,Y,M,V,G,S,Y,G,P,,,,IDKTDYMVGSYGP,VKIDKTDYMVGSYGP,,\r\n+,,sp|P08865|RSSA_HUMAN,,,,S,D,G,I,Y,I,I,N,L,K,,,,,,SDGIYIINLK,KRKSDGIYIINLKRT,,\r\n+,,sp|P54727|RD23B_HUMAN,,,,,A,V,E,Y,L,L,M,G,I,P,G,,,,AVEYLLMGIPG,NPDRAVEYLLMGIPG,,\r\n+,,sp|Q02543|RL18A_HUMAN,,S,S,G,E,I,V,Y,C,G,Q,V,F,E,K,,,,SSGEIVYCGQVFEK,KSSGEIVYCGQVFEK,,\r\n+,,sp|O43390|HNRPR_HUMAN,,,,,L,K,D,Y,A,F,V,H,F,E,D,,,,LKDYAFVHFED,RVKKLKDYAFVHFED,,\r\n+,,sp|P12956|XRCC6_HUMAN,,,,,,N,I,Y,V,L,Q,E,L,D,N,,,,NIYVLQELDN,SVNFKNIYVLQELDN,,\r\n+,,sp|Q16881|TRXR1_HUMAN,,,,,S,Y,D,Y,D,L,I,I,I,G,G,,,,SYDYDLIIIGG,DLPKSYDYDLIIIGG,,\r\n+,,sp|O43390|HNRPR_HUMAN,,,,,S,T,A,Y,E,D,Y,Y,Y,H,P,,,,STAYEDYYYHP,QASRSTAYEDYYYHP,,\r\n+,,sp|P84090|ERH_HUMAN,,,,,,,I,Y,V,L,L,R,,,,,,,IYVLLR,DWIKEKIYVLLRRQA,,\r\n+,,sp|O76094|SRP72_HUMAN,,,,,,E,L,Y,G,Q,V,L,Y,R,,,,,ELYGQVLYR,TDKLKELYGQVLYRL,,\r\n+,,sp|P62899|RL31_HUMAN,,L,Y,T,L,V,T,Y,V,P,V,T,T,F,K,,,,LYTLVTYVPVTTFK,KLYTLVTYVPVTTFK,,\r\n+,,sp|Q16778|H2B2E_HUMAN; sp|P33778|H2B1B_HUMAN; sp|P23527|H2B1O_HUMAN; sp|P06899|H2B1J_HUMAN; sp|Q8N257|H2B3B_HUMAN; sp|Q6DRA6|H2B2D_HUMAN; sp|Q6DN03|H2B2C_HUMAN,,K,E,S,Y,S,I,Y,V,Y,K,,,,,,,,KESYSIYVYK,RKESYSIYVYKVLKQ,RKESYSIYVYKVLKR,\r\n+,,sp|O14979|HNRDL_HUMAN,,,,D,L,T,E,Y,L,S,R,,,,,,,,DLTEYLSR,SKKDLTEYLSRFGEV,,\r\n+,,sp|P09211|GSTP1_HUMAN; sp|GSTP1_HUMAN,,,,,,,P,Y,T,V,V,Y,F,P,V,,,,PYTVVYFPV,MPPYTVVYFPV,PPYTVVYFPV,\r\n+,,sp|Q13263|TIF1B_HUMAN,P,G,S,T,T,E,D,Y,N,L,I,V,I,E,R,,,,PGSTTEDYNLIVIER,PGSTTEDYNLIVIER,,\r'..b',,,,,PPYTVVYFPVR,MPPYTVVYFPVRGRC,PPYTVVYFPVRGRC,\r\n+,,sp|O75306|NDUS2_HUMAN,,,,,T,Q,P,Y,D,V,Y,D,Q,V,E,,,,TQPYDVYDQVE,DLRKTQPYDVYDQVE,,\r\n+,,sp|Q2M2I8|AAK1_HUMAN,,,,,,G,H,Y,V,L,C,D,F,G,S,,,,GHYVLCDFGS,LLHDRGHYVLCDFGS,,\r\n+,,sp|P62847|RS24_HUMAN,T,T,G,F,G,M,I,Y,D,S,L,D,Y,A,K,,,,TTGFGMIYDSLDYAK,TTGFGMIYDSLDYAK,,\r\n+,,sp|Q14240|IF4A2_HUMAN,,G,F,K,D,Q,I,Y,E,I,F,Q,K,,,,,,GFKDQIYEIFQK,RGFKDQIYEIFQKLN,,\r\n+,,sp|O43390|HNRPR_HUMAN,,S,T,A,Y,E,D,Y,Y,Y,H,P,P,P,R,,,,STAYEDYYYHPPPR,RSTAYEDYYYHPPPR,,\r\n+,,sp|P14618|KPYM_HUMAN,,E,A,E,A,A,I,Y,H,L,Q,L,F,E,E,,,,EAEAAIYHLQLFEE,REAEAAIYHLQLFEE,,\r\n+,,sp|P25789|PSA4_HUMAN,Q,S,D,P,S,G,N,Y,G,G,W,K,,,,,,,QSDPSGNYGGWK,QSDPSGNYGGWKATC,,\r\n+,,sp|P09211|GSTP1_HUMAN; sp|GSTP1_HUMAN,,,,,,P,P,Y,T,V,V,Y,F,P,V,,,,PPYTVVYFPV,MPPYTVVYFPV,PPYTVVYFPV,\r\n+,,sp|O43390|HNRPR_HUMAN,,,,,,,G,Y,A,F,I,T,F,C,G,,,,GYAFITFCG,LSGQNRGYAFITFCG,,\r\n+,,sp|Q92835|SHIP1_HUMAN,,,,,N,Q,N,Y,M,N,I,L,R,,,,,,NQNYMNILR,KLRRNQNYMNILRFL,,\r\n+,,sp|P60174|TPIS_HUMAN,,,,V,V,L,A,Y,E,P,V,W,A,I,G,,,,VVLAYEPVWAIG,WSKVVLAYEPVWAIG,,\r\n+,,sp|Q9UKK9|NUDT5_HUMAN,,,,,T,L,H,Y,E,C,I,V,L,V,K,,,,TLHYECIVLVK,VLQRTLHYECIVLVK,,\r\n+,,sp|Q9Y277|VDAC3_HUMAN,,,,,V,C,N,Y,G,L,T,F,T,Q,K,,,,VCNYGLTFTQK,TKYKVCNYGLTFTQK,,\r\n+,,sp|P50395|GDIB_HUMAN,D,Y,L,D,Q,P,C,Y,E,T,I,N,R,,,,,,DYLDQPCYETINR,DYLDQPCYETINRIK,,\r\n+,,sp|P62805|H4_HUMAN,,,,D,A,V,T,Y,T,E,H,A,K,,,,,,DAVTYTEHAK,VIRDAVTYTEHAKRK,,\r\n+,,sp|P60842|IF4A1_HUMAN; sp|Q14240|IF4A2_HUMAN; sp|P38919|IF4A3_HUMAN,,,,,,G,I,Y,A,Y,G,F,E,K,P,,,,GIYAYGFEKP,ESLLRGIYAYGFEKP,EDLLRGIYAYGFEKP,\r\n+,,sp|Q9Y490|TLN1_HUMAN,G,I,T,N,H,D,E,Y,S,L,V,R,,,,,,,GITNHDEYSLVR,GITNHDEYSLVRELM,,\r\n+,,sp|P0DP24|CALM2_HUMAN,F,D,K,D,G,N,G,Y,I,S,A,A,E,L,R,,,,FDKDGNGYISAAELR,FDKDGNGYISAAELR,,\r\n+,,sp|P11142|HSP7C_HUMAN,G,I,D,L,G,T,T,Y,S,C,V,G,V,F,Q,,,,GIDLGTTYSCVGVFQ,GIDLGTTYSCVGVFQ,,\r\n+,,sp|E9PAV3|NACAM_HUMAN; sp|Q13765|NACA_HUMAN,,S,P,A,S,D,T,Y,I,V,F,G,E,A,K,,,,SPASDTYIVFGEAK,KSPASDTYIVFGEAK,,\r\n+,,sp|Q06830|PRDX1_HUMAN,,,T,I,A,Q,D,Y,G,V,L,K,,,,,,,TIAQDYGVLK,KRTIAQDYGVLKADE,,\r\n+,,sp|P23193|TCEA1_HUMAN,,,,,N,C,T,Y,T,Q,V,Q,T,R,,,,,NCTYTQVQTR,CKKKNCTYTQVQTRS,,\r\n+,,sp|Q99497|PARK7_HUMAN,,,,,E,G,P,Y,D,V,V,V,L,P,G,,,,EGPYDVVVLPG,DAKKEGPYDVVVLPG,,\r\n+,,sp|P21281|VATB2_HUMAN,A,D,V,S,N,Q,L,Y,A,C,Y,A,I,G,K,,,,ADVSNQLYACYAIGK,ADVSNQLYACYAIGK,,\r\n+,,sp|Q969T4|UB2E3_HUMAN; sp|P51965|UB2E1_HUMAN; sp|Q96LR5|UB2E2_HUMAN,,,,G,D,N,I,Y,E,W,R,,,,,,,,GDNIYEWR,GPKGDNIYEWRSTIL,,\r\n+,,sp|P84098|RL19_HUMAN,,,,,,H,M,Y,H,S,L,Y,L,K,,,,,HMYHSLYLK,KKIDRHMYHSLYLKV,,\r\n+,,sp|Q92544|TM9S4_HUMAN,,T,Q,L,P,Y,E,Y,Y,S,L,P,F,C,Q,,,,TQLPYEYYSLPFCQ,RTQLPYEYYSLPFCQ,,\r\n+,,sp|P11940|PABP1_HUMAN,,,,,S,L,G,Y,A,Y,V,N,F,Q,Q,,,,SLGYAYVNFQQ,ITRRSLGYAYVNFQQ,,\r\n+,,sp|Q07020|RL18_HUMAN,,,,S,Q,D,I,Y,L,R,,,,,,,,,SQDIYLR,EPKSQDIYLRLLVKL,,\r\n+,,sp|P62906|RL10A_HUMAN,,,,,D,T,L,Y,E,A,V,R,,,,,,,DTLYEAVR,KVSRDTLYEAVREVL,,\r\n+,,sp|Q9HC35|EMAL4_HUMAN,I,I,N,Q,E,G,E,Y,I,K,M,F,M,R,,,,,IINQEGEYIKMFMR,IINQEGEYIKMFMRG,,\r\n+,,sp|O00571|DDX3X_HUMAN; sp|O15523|DDX3Y_HUMAN,,,,D,K,D,A,Y,S,S,F,G,S,R,,,,,DKDAYSSFGSR,SSKDKDAYSSFGSRS,CSKDKDAYSSFGSRD,\r\n+,,sp|P08238|HS90B_HUMAN,E,M,T,S,L,S,E,Y,V,S,R,,,,,,,,EMTSLSEYVSR,EMTSLSEYVSRMKET,,\r\n+,,sp|P19338|NUCL_HUMAN,,,,S,I,S,L,Y,Y,T,G,E,K,,,,,,SISLYYTGEK,DGRSISLYYTGEKGQ,,\r\n+,,sp|P78527|PRKDC_HUMAN,,,,,,,F,Y,Q,G,F,L,F,S,E,,,,FYQGFLFSE,VFNELKFYQGFLFSE,,\r\n+,,sp|O60841|IF2P_HUMAN,,,T,S,E,V,P,Y,A,G,I,N,I,G,P,,,,TSEVPYAGINIGP,LKTSEVPYAGINIGP,,\r\n+,,sp|P63244|RACK1_HUMAN,,,,D,E,T,N,Y,G,I,P,Q,R,,,,,,DETNYGIPQR,LTRDETNYGIPQRAL,,\r\n+,,sp|P58876|H2B1D_HUMAN; sp|Q99879|H2B1M_HUMAN; sp|Q99880|H2B1L_HUMAN; sp|Q99877|H2B1N_HUMAN; sp|Q93079|H2B1H_HUMAN; sp|Q5QNW6|H2B2F_HUMAN; sp|P62807|H2B1C_HUMAN; sp|O60814|H2B1K_HUMAN; sp|P57053|H2BFS_HUMAN,,,E,S,Y,S,V,Y,V,Y,K,,,,,,,,ESYSVYVYK,RKESYSVYVYKVLKQ,,\r\n+,,sp|P62263|RS14_HUMAN,D,R,D,E,S,S,P,Y,A,A,M,L,A,A,Q,,,,DRDESSPYAAMLAAQ,DRDESSPYAAMLAAQ,,\r\n+,,sp|Q9NQS7|INCE_HUMAN,,,,,,,S,Y,K,Q,A,V,S,E,L,,,,SYKQAVSEL,SARRKRSYKQAVSEL,,\r\n+,,sp|Q9UQ80|PA2G4_HUMAN,,,,S,D,Q,D,Y,I,L,K,,,,,,,,SDQDYILK,PLKSDQDYILKEGDL,,\r\n+,,sp|Q99497|PARK7_HUMAN,,,,K,E,G,P,Y,D,V,V,V,L,P,G,,,,KEGPYDVVVLPG,DAKKEGPYDVVVLPG,,\r\n'
b
diff -r 000000000000 -r 3e5fdf933646 C and D finder/test-data/input5.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/C and D finder/test-data/input5.csv Fri May 25 10:56:10 2018 -0400
b
b'@@ -0,0 +1,36 @@\n+Amino Acids,sp|Q14204|DYHC1_HUMAN,sp|P62269|RS18_HUMAN,sp|Q14258|TRI25_HUMAN,sp|E9PAV3|NACAM_HUMAN,sp|P07195|LDHB_HUMAN,sp|Q9NR30|DDX21_HUMAN,sp|P26599|PTBP1_HUMAN,sp|P78406|RAE1L_HUMAN,sp|Q9BQE3|TBA1C_HUMAN,sp|O75165|DJC13_HUMAN,sp|Q92841|DDX17_HUMAN,sp|P42167|LAP2B_HUMAN,sp|Q14566|MCM6_HUMAN,sp|P08238|HS90B_HUMAN,sp|P60174|TPIS_HUMAN,sp|P31939|PUR9_HUMAN,sp|Q8TDN6|BRX1_HUMAN,sp|P62750|RL23A_HUMAN,sp|Q96QV6|H2A1A_HUMAN,sp|P11940|PABP1_HUMAN,sp|Q9UM73|ALK_HUMAN,sp|P62807|H2B1C_HUMAN,sp|P52272|HNRPM_HUMAN,sp|P10253|LYAG_HUMAN,sp|Q9BTM1|H2AJ_HUMAN,sp|Q08211|DHX9_HUMAN,sp|P22234|PUR6_HUMAN,sp|P41250|GARS_HUMAN,sp|P57053|H2BFS_HUMAN,sp|Q92769|HDAC2_HUMAN,sp|P55884|EIF3B_HUMAN,sp|Q4VC31|CCD58_HUMAN,sp|Q969T4|UB2E3_HUMAN,sp|Q9H8H2|DDX31_HUMAN,sp|Q07020|RL18_HUMAN,sp|Q9Y6E2|BZW2_HUMAN,sp|Q9NQ50|RM40_HUMAN,sp|P07948|LYN_HUMAN,sp|P60891|PRPS1_HUMAN,sp|Q9Y3I0|RTCB_HUMAN,sp|Q93077|H2A1C_HUMAN,sp|P60842|IF4A1_HUMAN,sp|P62917|RL8_HUMAN,sp|P63010|AP2B1_HUMAN,sp|P62979|RS27A_HUMAN,sp|Q71U36|TBA1A_HUMAN,sp|P63244|RACK1_HUMAN,sp|P04040|CATA_HUMAN,sp|Q99497|PARK7_HUMAN,sp|Q16778|H2B2E_HUMAN,sp|P48444|COPD_HUMAN,sp|Q02790|FKBP4_HUMAN,sp|P42166|LAP2A_HUMAN,sp|P78527|PRKDC_HUMAN,sp|Q8NE71|ABCF1_HUMAN,sp|P54136|SYRC_HUMAN,sp|P51858|HDGF_HUMAN,sp|P22061|PIMT_HUMAN,sp|CATA_HUMAN,sp|Q14697|GANAB_HUMAN,sp|O60814|H2B1K_HUMAN,sp|GSTP1_HUMAN,sp|P62906|RL10A_HUMAN,sp|Q6FI13|H2A2A_HUMAN,sp|P18124|RL7_HUMAN,sp|P19105|ML12A_HUMAN,sp|O60841|IF2P_HUMAN,sp|Q99878|H2A1J_HUMAN,sp|P23528|COF1_HUMAN,sp|Q07666|KHDR1_HUMAN,sp|Q9Y617|SERC_HUMAN,sp|Q5JSH3|WDR44_HUMAN,sp|P60709|ACTB_HUMAN,sp|P30086|PEBP1_HUMAN,sp|Q13263|TIF1B_HUMAN,sp|Q96QK1|VPS35_HUMAN,sp|P54727|RD23B_HUMAN,sp|P38919|IF4A3_HUMAN,sp|Q13242|SRSF9_HUMAN,sp|P55060|XPO2_HUMAN,sp|Q15393|SF3B3_HUMAN,sp|Q14699|RFTN1_HUMAN,sp|P78347|GTF2I_HUMAN,sp|P39656|OST48_HUMAN,sp|P62987|RL40_HUMAN,sp|Q16881|TRXR1_HUMAN,sp|Q6DN03|H2B2C_HUMAN,sp|Q08J23|NSUN2_HUMAN,sp|Q9BWF3|RBM4_HUMAN,sp|P52597|HNRPF_HUMAN,sp|Q01780|EXOSX_HUMAN,sp|P19338|NUCL_HUMAN,sp|P49368|TCPG_HUMAN,sp|O00571|DDX3X_HUMAN,sp|O75503|CLN5_HUMAN,sp|P14618|KPYM_HUMAN,sp|Q13595|TRA2A_HUMAN,sp|P68363|TBA1B_HUMAN,sp|Q9HC35|EMAL4_HUMAN,sp|Q02880|TOP2B_HUMAN,sp|Q9UKM9|RALY_HUMAN,sp|P50395|GDIB_HUMAN,sp|O43390|HNRPR_HUMAN,sp|Q5R372|RBG1L_HUMAN,sp|P07900|HS90A_HUMAN,sp|P14625|ENPL_HUMAN,sp|P06733|ENOA_HUMAN,sp|P68104|EF1A1_HUMAN,sp|Q99880|H2B1L_HUMAN,sp|P26641|EF1G_HUMAN,sp|Q15084|PDIA6_HUMAN,sp|Q96PE3|INP4A_HUMAN,sp|Q92835|SHIP1_HUMAN,sp|Q16531|DDB1_HUMAN,sp|Q5JPE7|NOMO2_HUMAN,sp|P61604|CH10_HUMAN,sp|Q9BQ04|RBM4B_HUMAN,sp|P62995|TRA2B_HUMAN,sp|P52788|SPSY_HUMAN,sp|P58876|H2B1D_HUMAN,sp|Q9BUQ8|DDX23_HUMAN,sp|P49411|EFTU_HUMAN,sp|O76094|SRP72_HUMAN,sp|P62847|RS24_HUMAN,sp|Q13765|NACA_HUMAN,sp|Q14152|EIF3A_HUMAN,sp|Q13347|EIF3I_HUMAN,sp|P42224|STAT1_HUMAN,sp|Q9Y2X3|NOP58_HUMAN,sp|P62195|PRS8_HUMAN,sp|P53396|ACLY_HUMAN,sp|O75368|SH3L1_HUMAN,sp|Q93079|H2B1H_HUMAN,sp|Q9UHA4|LTOR3_HUMAN,sp|Q96KK5|H2A1H_HUMAN,sp|P11908|PRPS2_HUMAN,sp|P12956|XRCC6_HUMAN,sp|P11021|BIP_HUMAN,sp|P21108|PRPS3_HUMAN,sp|P63173|RL38_HUMAN,sp|P06899|H2B1J_HUMAN,sp|P67775|PP2AA_HUMAN,sp|O60506|HNRPQ_HUMAN,sp|P62714|PP2AB_HUMAN,sp|Q5QNW6|H2B2F_HUMAN,sp|Q9UKV8|AGO2_HUMAN,sp|P63261|ACTG_HUMAN,sp|O75306|NDUS2_HUMAN,sp|P57764|GSDMD_HUMAN,sp|P00491|PNPH_HUMAN,sp|P62263|RS14_HUMAN,sp|O15523|DDX3Y_HUMAN,sp|P13639|EF2_HUMAN,sp|P0CG47|UBB_HUMAN,sp|Q15155|NOMO1_HUMAN,sp|Q9UKK9|NUDT5_HUMAN,sp|P23527|H2B1O_HUMAN,sp|P04406|G3P_HUMAN,sp|P13796|PLSL_HUMAN,sp|Q9UNN5|FAF1_HUMAN,sp|Q6DRA6|H2B2D_HUMAN,sp|P62805|H4_HUMAN,sp|P24928|RPB1_HUMAN,sp|Q8N257|H2B3B_HUMAN,sp|Q9UKJ3|GPTC8_HUMAN,sp|P54105|ICLN_HUMAN,sp|P11586|C1TC_HUMAN,sp|O15144|ARPC2_HUMAN,sp|Q9H299|SH3L3_HUMAN,sp|O14950|ML12B_HUMAN,sp|P52565|GDIR1_HUMAN,sp|P61254|RL26_HUMAN,sp|P00338|LDHA_HUMAN,sp|P25705|ATPA_HUMAN,sp|P09211|GSTP1_HUMAN,sp|P0DP24|CALM2_HUMAN,sp|Q06830|PRDX1_HUMAN,sp|P62249|RS16_HUMAN,sp|Q96JH7|VCIP1_HUMAN,sp|Q9Y5S9|RBM8A_HUMAN,sp|P33778|H2B1B_HUMAN,sp|P02786|TFR1_HUMAN,sp|P0C0S8|H2A1_HUMAN,sp|Q969V3|NCLN_HUMAN,sp|P07437|TBB5_H'..b'46031746,13.11154599,10.02178649,17.86743516,12.35465116,8.047337278,8.939393939,9.583333333,9.251101322,9.315589354,11.44067797,16.66666667,9.090909091,9.677419355,6.923076923,5.241935484,11.11111111,9.836065574,7.8125,11.44578313,10.60948081,10,16.75794085,13.6,12.8342246,14.01197605,9.924623116,17.3594132,12.16545012,10.40723982,11.32852729,11.91454396,14.01384083,12.3246493,13.81578947,9.375,10.93990755,16.58031088,11.2125163,13.46153846,13.4939759,10.96045198,10.42253521,12.11009174,14.95468278,10.05586592,10.73446328,21.9858156,11.52993348,16.71763507,12.48462485,13.39869281,11.46067416,7.898894155,13.86503067,11.61202186,12.95143213,10.13824885,11.25541126,18.25396825,11.67048055,11.13636364,14.53428864,16.14802355,13.59649123,14.91712707,8.823529412,13.64902507,21.18055556,10.10928962,18.25396825,9.390243902,9.292035398,12.07153502,11.27819549,16.74418605,7.742402315,14.15384615,11.46666667,10.77504726,8.128078818,11.26248865,8.771929825,17.46031746,12.90322581,7.03125,11.63522013,11.65845649,11.77370031,12.26415094,8.571428571,16.66666667,10.67961165,8.025682183,10.03236246,17.46031746,11.40861467,13.6,7.991360691,13.63636364,9.688581315,12.58278146,13.33333333,9.906759907,13.10043668,14.89361702,11.87214612,17.46031746,12.53731343,8.931419458,13.53846154,15.85365854,8.737864078,18.98477157,18.25396825,18.9081225,14.34599156,11.3368984,11.66666667,12.90322581,11.04651163,12.25490196,9.655172414,11.44578313,11.93490054,9.047619048,10.73825503,11.05527638,7.534246575,13.99345336,7.471264368,18.25396825,13.15789474,6.923076923,11.90053286,12.83783784,12.88135593,12.42236025,13.73056995,18.25396825,9.6,11.93181818,17.18983558,7.142857143,13.13868613,15.55555556,10.40172166,11.53846154,8.673469388,7.751937984,11.80400891,12.80898876,12.77173913,8.965517241,16.66666667,11.47540984,14.66854725,12.26190476,14.89361702,16.96113074,11.83800623,14.38415159,11.9047619\r\n+,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\r\n+Number of Y,128,3,17,10,7,12,10,10,19,72,29,10,22,24,5,18,8,5,3,19,37,5,5,31,3,48,7,21,5,29,19,1,6,12,5,12,5,24,5,13,3,12,6,29,6,19,6,21,3,5,10,16,13,118,18,22,5,7,21,36,5,12,6,3,11,4,16,3,6,22,13,23,15,10,10,28,7,13,17,29,38,12,24,19,3,24,4,16,27,22,25,8,9,24,18,9,22,19,24,46,10,26,40,30,25,24,11,12,5,14,11,22,31,35,46,3,28,23,19,5,16,10,20,4,2,24,11,18,14,7,40,5,5,4,3,5,21,13,5,2,5,16,36,16,5,28,15,19,7,9,1,25,21,3,45,7,5,9,17,12,4,4,85,5,31,9,16,12,4,4,9,6,8,16,12,2,5,8,28,6,5,28,3,19,16,7,13,17,5,4,8,47,3,9,2,48,8,3,3,24,16,21,6,5,11,22,23,45,12,37,23,24\r\n+Number of pY,2,1,1,1,1,2,1,1,2,1,1,1,1,4,1,2,1,2,1,2,1,2,1,1,1,2,2,1,2,2,1,1,1,1,1,1,1,1,1,1,1,4,1,3,1,2,1,1,2,2,1,1,1,2,1,1,1,2,1,1,2,5,1,1,1,1,1,1,2,3,1,1,9,3,1,1,1,2,1,1,1,1,1,1,1,2,2,1,1,1,1,3,1,1,1,1,1,2,1,1,1,3,3,1,3,1,3,4,2,2,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,2,2,2,3,1,1,1,2,1,2,2,1,3,1,2,1,9,2,1,1,1,1,2,3,1,1,2,2,3,1,2,4,1,2,1,1,1,1,1,1,1,1,2,1,6,1,1,1,1,1,2,1,1,1,1,2,1,1,2,2,1,1,1,9,1,1,1,1,1,1,1,1,1,2,1,1,2,1,1,2,1,1\r\n+Total AAs,4646,152,630,2078,334,783,531,368,449,2243,729,454,821,724,286,592,353,156,131,636,1620,126,730,952,129,1270,425,739,126,488,814,144,207,851,188,419,206,512,318,505,130,406,257,937,156,451,317,527,189,126,511,459,694,4128,845,660,240,227,526,944,126,209,217,130,248,171,1220,128,166,443,370,913,375,187,835,796,409,411,221,971,1217,578,998,456,128,649,193,767,364,415,885,710,545,662,358,531,282,451,981,1626,306,445,633,815,732,803,434,462,126,437,440,977,1189,1140,1267,102,359,288,366,126,820,452,671,133,215,1382,325,750,529,406,1101,114,126,124,128,318,609,654,318,70,126,309,623,309,126,859,375,463,484,289,151,660,858,229,1222,219,126,335,627,650,164,103,1970,126,1502,237,935,300,93,172,204,145,332,553,210,149,199,146,1222,174,126,760,130,563,444,295,483,386,126,125,176,669,56,685,135,1394,104,196,129,449,445,1104,145,126,427,709,840,1222,283,642,1161,420\r\n'
b
diff -r 000000000000 -r 3e5fdf933646 C and D finder/test-data/input6.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/C and D finder/test-data/input6.csv Fri May 25 10:56:10 2018 -0400
b
b'@@ -0,0 +1,246 @@\n+Substrates,Species,Reference,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,,,,,Phosphite,\r\n+,,sp|Q8NE71|ABCF1_HUMAN,,,,,,G,N,Y,M,T,F,K,,,,,,,GNYMTFK,LHYYRGNYMTFKKMY,\r\n+,,sp|P13639|EF2_HUMAN,,,,K,E,D,L,Y,L,K,P,I,Q,R,,,,,KEDLYLKPIQR,PGKKEDLYLKPIQRT,\r\n+,,sp|O43390|HNRPR_HUMAN,,,,,,,D,Y,A,F,V,H,F,E,D,,,,DYAFVHFED,RVKKLKDYAFVHFED,\r\n+,,sp|P26641|EF1G_HUMAN,,,,,,,E,Y,F,S,W,E,G,A,F,,,,EYFSWEGAF,TQTLVREYFSWEGAF,\r\n+,,sp|P09211|GSTP1_HUMAN; sp|GSTP1_HUMAN,,,Y,I,S,L,I,Y,T,N,Y,E,A,G,K,,,,YISLIYTNYEAGK,CKYISLIYTNYEAGK,\r\n+,,sp|P62899|RL31_HUMAN,,,,,,,L,Y,T,L,V,T,Y,V,P,,,,LYTLVTYVP,EDSPNKLYTLVTYVP,\r\n+,,sp|P78347|GTF2I_HUMAN,,,,T,V,E,D,Y,F,C,F,C,Y,G,K,,,,TVEDYFCFCYGK,LRKTVEDYFCFCYGK,\r\n+,,sp|Q08J23|NSUN2_HUMAN,,L,A,Q,E,G,I,Y,T,L,Y,P,F,I,N,,,,LAQEGIYTLYPFIN,RLAQEGIYTLYPFIN,\r\n+,,sp|P60842|IF4A1_HUMAN,,,,,D,Q,I,Y,D,I,F,Q,K,,,,,,DQIYDIFQK,RGFKDQIYDIFQKLN,\r\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN,T,V,P,I,Y,E,G,Y,A,L,P,H,A,I,L,,,,TVPIYEGYALPHAIL,TVPIYEGYALPHAIL,\r\n+,,sp|Q02790|FKBP4_HUMAN,,G,E,H,S,I,V,Y,L,K,P,S,Y,A,F,,,,GEHSIVYLKPSYAF,KGEHSIVYLKPSYAF,\r\n+,,sp|Q9NR30|DDX21_HUMAN,,,,,,S,T,Y,E,Q,V,D,L,I,G,,,,STYEQVDLIG,KKYMKSTYEQVDLIG,\r\n+,,sp|P31939|PUR9_HUMAN,,,,,N,G,N,Y,C,V,L,Q,M,D,Q,,,,NGNYCVLQMDQ,SKKKNGNYCVLQMDQ,\r\n+,,sp|P60709|ACTB_HUMAN; sp|P63261|ACTG_HUMAN,,,,,K,D,L,Y,A,N,T,V,L,S,G,,,,KDLYANTVLSG,VDIRKDLYANTVLSG,\r\n+,,sp|P62805|H4_HUMAN,,,I,S,G,L,I,Y,E,E,T,R,,,,,,,ISGLIYEETR,KRISGLIYEETRGVL,\r\n+,,sp|P22234|PUR6_HUMAN,,,,,,E,V,Y,E,L,L,D,S,P,G,,,,EVYELLDSPG,EGKTKEVYELLDSPG,\r\n+,,sp|P30086|PEBP1_HUMAN,,,,,,,L,Y,E,Q,L,S,G,K,,,,,LYEQLSGK,DDYVPKLYEQLSGK,\r\n+,,sp|Q08945|SSRP1_HUMAN,,N,M,S,G,S,L,Y,E,M,V,S,R,,,,,,NMSGSLYEMVSR,KNMSGSLYEMVSRVM,\r\n+,,sp|Q9Y3I0|RTCB_HUMAN,G,M,A,A,A,G,N,Y,A,W,V,N,R,,,,,,GMAAAGNYAWVNR,GMAAAGNYAWVNRSS,\r\n+,,sp|P52272|HNRPM_HUMAN,N,E,C,G,H,V,L,Y,A,D,I,K,,,,,,,NECGHVLYADIK,NECGHVLYADIKMEN,\r\n+,,sp|P38919|IF4A3_HUMAN,,,,,E,Q,I,Y,D,V,Y,R,,,,,,,EQIYDVYR,KGFKEQIYDVYRYLP,\r\n+,,sp|Q14258|TRI25_HUMAN,,,,F,D,T,I,Y,Q,I,L,L,K,,,,,,FDTIYQILLK,NSKFDTIYQILLKKK,\r\n+,,sp|P57764|GSDMD_HUMAN,,S,R,G,D,N,V,Y,V,V,T,E,V,L,Q,,,,SRGDNVYVVTEVLQ,RSRGDNVYVVTEVLQ,\r\n+,,sp|P41250|GARS_HUMAN,,,,,,T,S,Y,G,W,I,E,I,V,G,,,,TSYGWIEIVG,DAESKTSYGWIEIVG,\r\n+,,sp|P62805|H4_HUMAN,V,T,A,M,D,V,V,Y,A,L,K,,,,,,,,VTAMDVVYALK,VTAMDVVYALKRQGR,\r\n+,,sp|Q15393|SF3B3_HUMAN,,,,,,,D,Y,I,V,V,G,S,D,S,,,,DYIVVGSDS,LTGGTKDYIVVGSDS,\r\n+,,sp|P60842|IF4A1_HUMAN,,G,F,K,D,Q,I,Y,D,I,F,Q,K,,,,,,GFKDQIYDIFQK,RGFKDQIYDIFQKLN,\r\n+,,sp|Q9BTM1|H2AJ_HUMAN; sp|Q99878|H2A1J_HUMAN; sp|Q96KK5|H2A1H_HUMAN; sp|Q6FI13|H2A2A_HUMAN; sp|Q16777|H2A2C_HUMAN; sp|P0C0S8|H2A1_HUMAN; sp|Q96QV6|H2A1A_HUMAN; sp|Q93077|H2A1C_HUMAN,,,,,K,G,N,Y,A,E,R,,,,,,,,KGNYAER,RLLRKGNYAERVGAG,RLLRKGNYAERIGAG\r\n+,,sp|P52565|GDIR1_HUMAN,,,I,D,K,T,D,Y,M,V,G,S,Y,G,P,,,,IDKTDYMVGSYGP,VKIDKTDYMVGSYGP,\r\n+,,sp|P08865|RSSA_HUMAN,,,,S,D,G,I,Y,I,I,N,L,K,,,,,,SDGIYIINLK,KRKSDGIYIINLKRT,\r\n+,,sp|Q07666|KHDR1_HUMAN,K,K,D,D,E,E,N,Y,L,D,L,F,S,H,K,,,,KKDDEENYLDLFSHK,KKDDEENYLDLFSHK,\r\n+,,sp|P54727|RD23B_HUMAN,,,,,A,V,E,Y,L,L,M,G,I,P,G,,,,AVEYLLMGIPG,NPDRAVEYLLMGIPG,\r\n+,,sp|O43390|HNRPR_HUMAN,,,,,L,K,D,Y,A,F,V,H,F,E,D,,,,LKDYAFVHFED,RVKKLKDYAFVHFED,\r\n+,,sp|Q02543|RL18A_HUMAN,,S,S,G,E,I,V,Y,C,G,Q,V,F,E,K,,,,SSGEIVYCGQVFEK,KSSGEIVYCGQVFEK,\r\n+,,sp|P12956|XRCC6_HUMAN,,,,,,N,I,Y,V,L,Q,E,L,D,N,,,,NIYVLQELDN,SVNFKNIYVLQELDN,\r\n+,,sp|P49368|TCPG_HUMAN,I,S,D,L,A,Q,H,Y,L,M,R,,,,,,,,ISDLAQHYLMR,ISDLAQHYLMRANIT,\r\n+,,sp|Q16881|TRXR1_HUMAN,,,,,S,Y,D,Y,D,L,I,I,I,G,G,,,,SYDYDLIIIGG,DLPKSYDYDLIIIGG,\r\n+,,sp|P84090|ERH_HUMAN,,,,,,,I,Y,V,L,L,R,,,,,,,IYVLLR,DWIKEKIYVLLRRQA,\r\n+,,sp|Q9UHA4|LTOR3_HUMAN,,,S,I,I,C,Y,Y,N,T,Y,Q,V,V,Q,,,,SIICYYNTYQVVQ,NKSIICYYNTYQVVQ,\r\n+,,sp|Q14204|DYHC1_HUMAN,M,D,L,E,K,P,N,Y,I,V,P,D,Y,M,P,,,,MDLEKPNYIVPDYMP,MDLEKPNYIVPDYMP,\r\n+,,sp|Q9UHA4|LTOR3_HUMAN,,,,S,I,I,C,Y,Y,N,T,Y,Q,V,V,,,,SIICYYNTYQVV,KNKSIICYYNTYQVV,\r\n+,,sp|O76094|SRP72_HUMAN,,,,,,E,L,Y,G,Q,V,L,Y,R,,,,,ELYGQVLYR,TDKLKELYGQVLYRL,\r\n+,,sp|Q96QK1|VPS35_HUMAN,S,E,D,P,D,Q,Q,Y,L,I,L,N,T,A,R,,,,SEDPDQQYLILNTAR,SEDPDQQYLILNTAR,\r\n+,,sp|P62899|RL31_HUMAN,,L,Y,T,L,V,T,Y,V,P,V,T,T,F,K,,,,LYTLVTYVPVT'..b'AN,,,,,,,,Y,Q,V,F,F,F,G,T,,,,YQVFFFGT,VKSTANKYQVFFFGT,\r\n+,,sp|Q96PE3|INP4A_HUMAN,D,G,G,S,D,Q,N,Y,D,I,V,T,I,G,A,,,,DGGSDQNYDIVTIGA,DGGSDQNYDIVTIGA,\r\n+,,sp|P24928|RPB1_HUMAN,,,,,,E,L,Y,H,V,I,S,F,D,G,,,,ELYHVISFDG,KALERELYHVISFDG,\r\n+,,sp|P14618|KPYM_HUMAN,,E,A,E,A,A,I,Y,H,L,Q,L,F,E,E,,,,EAEAAIYHLQLFEE,REAEAAIYHLQLFEE,\r\n+,,sp|Q9BSJ8|ESYT1_HUMAN,,,,H,L,S,P,Y,A,T,L,T,V,G,D,,,,HLSPYATLTVGD,GTKHLSPYATLTVGD,\r\n+,,sp|Q92769|HDAC2_HUMAN,,,,,,,Y,Y,A,V,N,F,P,M,R,,,,YYAVNFPMR,GAGKGKYYAVNFPMR,\r\n+,,sp|P09211|GSTP1_HUMAN; sp|GSTP1_HUMAN,,,,,,P,P,Y,T,V,V,Y,F,P,V,,,,PPYTVVYFPV,MPPYTVVYFPV,PPYTVVYFPV\r\n+,,sp|O43390|HNRPR_HUMAN,,,,,,,G,Y,A,F,I,T,F,C,G,,,,GYAFITFCG,LSGQNRGYAFITFCG,\r\n+,,sp|Q92835|SHIP1_HUMAN,,,,,N,Q,N,Y,M,N,I,L,R,,,,,,NQNYMNILR,KLRRNQNYMNILRFL,\r\n+,,sp|P60174|TPIS_HUMAN,,,,V,V,L,A,Y,E,P,V,W,A,I,G,,,,VVLAYEPVWAIG,WSKVVLAYEPVWAIG,\r\n+,,sp|Q9UKK9|NUDT5_HUMAN,,,,,T,L,H,Y,E,C,I,V,L,V,K,,,,TLHYECIVLVK,VLQRTLHYECIVLVK,\r\n+,,sp|Q9Y277|VDAC3_HUMAN,,,,,V,C,N,Y,G,L,T,F,T,Q,K,,,,VCNYGLTFTQK,TKYKVCNYGLTFTQK,\r\n+,,sp|P50395|GDIB_HUMAN,D,Y,L,D,Q,P,C,Y,E,T,I,N,R,,,,,,DYLDQPCYETINR,DYLDQPCYETINRIK,\r\n+,,sp|P13796|PLSL_HUMAN,S,Y,S,E,E,E,K,Y,A,F,V,N,W,I,N,,,,SYSEEEKYAFVNWIN,SYSEEEKYAFVNWIN,\r\n+,,sp|P62805|H4_HUMAN,,,,D,A,V,T,Y,T,E,H,A,K,,,,,,DAVTYTEHAK,VIRDAVTYTEHAKRK,\r\n+,,sp|P60842|IF4A1_HUMAN; sp|P38919|IF4A3_HUMAN,,,,,,G,I,Y,A,Y,G,F,E,K,P,,,,GIYAYGFEKP,ESLLRGIYAYGFEKP,EDLLRGIYAYGFEKP\r\n+,,sp|P0DP24|CALM2_HUMAN,F,D,K,D,G,N,G,Y,I,S,A,A,E,L,R,,,,FDKDGNGYISAAELR,FDKDGNGYISAAELR,\r\n+,,sp|E9PAV3|NACAM_HUMAN; sp|Q13765|NACA_HUMAN,,S,P,A,S,D,T,Y,I,V,F,G,E,A,K,,,,SPASDTYIVFGEAK,KSPASDTYIVFGEAK,\r\n+,,sp|Q06830|PRDX1_HUMAN,,,T,I,A,Q,D,Y,G,V,L,K,,,,,,,TIAQDYGVLK,KRTIAQDYGVLKADE,\r\n+,,sp|Q99497|PARK7_HUMAN,,,,,E,G,P,Y,D,V,V,V,L,P,G,,,,EGPYDVVVLPG,DAKKEGPYDVVVLPG,\r\n+,,sp|Q969T4|UB2E3_HUMAN,,,,G,D,N,I,Y,E,W,R,,,,,,,,GDNIYEWR,GPKGDNIYEWRSTIL,\r\n+,,sp|Q92841|DDX17_HUMAN,,,,,G,T,A,Y,T,F,F,T,P,G,N,,,,GTAYTFFTPGN,STNKGTAYTFFTPGN,\r\n+,,sp|P84098|RL19_HUMAN,,,,,,H,M,Y,H,S,L,Y,L,K,,,,,HMYHSLYLK,KKIDRHMYHSLYLKV,\r\n+,,sp|Q92544|TM9S4_HUMAN,,T,Q,L,P,Y,E,Y,Y,S,L,P,F,C,Q,,,,TQLPYEYYSLPFCQ,RTQLPYEYYSLPFCQ,\r\n+,,sp|P11940|PABP1_HUMAN,,,,,S,L,G,Y,A,Y,V,N,F,Q,Q,,,,SLGYAYVNFQQ,ITRRSLGYAYVNFQQ,\r\n+,,sp|Q07020|RL18_HUMAN,,,,S,Q,D,I,Y,L,R,,,,,,,,,SQDIYLR,EPKSQDIYLRLLVKL,\r\n+,,sp|P26641|EF1G_HUMAN,,,A,A,G,T,L,Y,T,Y,P,E,N,W,R,,,,AAGTLYTYPENWR,MAAGTLYTYPENWR,\r\n+,,sp|P62906|RL10A_HUMAN,,,,,D,T,L,Y,E,A,V,R,,,,,,,DTLYEAVR,KVSRDTLYEAVREVL,\r\n+,,sp|Q13242|SRSF9_HUMAN,,,,G,S,P,H,Y,F,S,P,F,R,P,Y,,,,GSPHYFSPFRPY,QSRGSPHYFSPFRPY,\r\n+,,sp|O75165|DJC13_HUMAN,M,M,S,I,D,D,A,Y,E,V,L,N,L,P,Q,,,,MMSIDDAYEVLNLPQ,MMSIDDAYEVLNLPQ,\r\n+,,sp|Q9HC35|EMAL4_HUMAN,I,I,N,Q,E,G,E,Y,I,K,M,F,M,R,,,,,IINQEGEYIKMFMR,IINQEGEYIKMFMRG,\r\n+,,sp|O00571|DDX3X_HUMAN; sp|O15523|DDX3Y_HUMAN,,,,D,K,D,A,Y,S,S,F,G,S,R,,,,,DKDAYSSFGSR,SSKDKDAYSSFGSRS,CSKDKDAYSSFGSRD\r\n+,,sp|P04040|CATA_HUMAN; sp|CATA_HUMAN,,,,L,G,P,N,Y,L,H,I,P,V,N,C,,,,LGPNYLHIPVNC,RHRLGPNYLHIPVNC,\r\n+,,sp|P08238|HS90B_HUMAN,E,M,T,S,L,S,E,Y,V,S,R,,,,,,,,EMTSLSEYVSR,EMTSLSEYVSRMKET,\r\n+,,sp|P42224|STAT1_HUMAN,,,,,S,Q,W,Y,E,L,Q,Q,L,D,S,,,,SQWYELQQLDS,MSQWYELQQLDS,\r\n+,,sp|P19338|NUCL_HUMAN,,,,S,I,S,L,Y,Y,T,G,E,K,,,,,,SISLYYTGEK,DGRSISLYYTGEKGQ,\r\n+,,sp|P78527|PRKDC_HUMAN,,,,,,,F,Y,Q,G,F,L,F,S,E,,,,FYQGFLFSE,VFNELKFYQGFLFSE,\r\n+,,sp|Q08211|DHX9_HUMAN,A,H,N,N,M,T,N,Y,A,T,V,W,A,S,K,,,,AHNNMTNYATVWASK,AHNNMTNYATVWASK,\r\n+,,sp|O60841|IF2P_HUMAN,,,T,S,E,V,P,Y,A,G,I,N,I,G,P,,,,TSEVPYAGINIGP,LKTSEVPYAGINIGP,\r\n+,,sp|P63244|RACK1_HUMAN,,,,D,E,T,N,Y,G,I,P,Q,R,,,,,,DETNYGIPQR,LTRDETNYGIPQRAL,\r\n+,,sp|Q99880|H2B1L_HUMAN; sp|Q99879|H2B1M_HUMAN; sp|Q99877|H2B1N_HUMAN; sp|Q93079|H2B1H_HUMAN; sp|Q5QNW6|H2B2F_HUMAN; sp|P62807|H2B1C_HUMAN; sp|P58876|H2B1D_HUMAN; sp|O60814|H2B1K_HUMAN; sp|P57053|H2BFS_HUMAN,,,E,S,Y,S,V,Y,V,Y,K,,,,,,,,ESYSVYVYK,RKESYSVYVYKVLKQ,\r\n+,,sp|P62263|RS14_HUMAN,D,R,D,E,S,S,P,Y,A,A,M,L,A,A,Q,,,,DRDESSPYAAMLAAQ,DRDESSPYAAMLAAQ,\r\n+,,sp|Q99497|PARK7_HUMAN,,,,K,E,G,P,Y,D,V,V,V,L,P,G,,,,KEGPYDVVVLPG,DAKKEGPYDVVVLPG,\r\n+,,sp|P42704|LPPRC_HUMAN,,,,,,A,L,Y,E,H,L,T,A,K,,,,,ALYEHLTAK,VTSAKALYEHLTAKN,\r\n'