| Next changeset 1:155b796033b6 (2012-12-21) |
|
Commit message:
Uploaded |
|
added:
command.R references/ClassIWithoutNQex2-3.plus75.1.ebwt references/ClassIWithoutNQex2-3.plus75.2.ebwt references/ClassIWithoutNQex2-3.plus75.3.ebwt references/ClassIWithoutNQex2-3.plus75.4.ebwt references/ClassIWithoutNQex2-3.plus75.fasta references/ClassIWithoutNQex2-3.plus75.rev.1.ebwt references/ClassIWithoutNQex2-3.plus75.rev.2.ebwt references/HLA2.ex2.plus75.1.ebwt references/HLA2.ex2.plus75.2.ebwt references/HLA2.ex2.plus75.3.ebwt references/HLA2.ex2.plus75.4.ebwt references/HLA2.ex2.plus75.fasta references/HLA2.ex2.plus75.rev.1.ebwt references/HLA2.ex2.plus75.rev.2.ebwt seq2HLA.py seq2HLA.xml |
| b |
| diff -r 000000000000 -r 913ea6991ee4 command.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/command.R Thu Dec 20 10:37:01 2012 -0500 |
| [ |
| @@ -0,0 +1,24 @@ +args <- commandArgs(trailingOnly = TRUE) + +x<-unlist(strsplit(args[2],split=",")) +x<-as.numeric(x) +args[3] +paril<-1-pnorm(as.numeric(args[1]),mean(x),sd(x)) +poutlier<-pbinom(0,length(x),paril) +1-poutlier + +x<-unlist(strsplit(args[5],split=",")) +x<-as.numeric(x) +args[6] +#1-exp(log(pnorm(as.numeric(args[4]),mean(x),sd(x)))*length(x)) +paril<-1-pnorm(as.numeric(args[4]),mean(x),sd(x)) +poutlier<-pbinom(0,length(x),paril) +1-poutlier + +x<-unlist(strsplit(args[8],split=",")) +x<-as.numeric(x) +args[9] +#1-exp(log(pnorm(as.numeric(args[7]),mean(x),sd(x)))*length(x)) +paril<-1-pnorm(as.numeric(args[7]),mean(x),sd(x)) +poutlier<-pbinom(0,length(x),paril) +1-poutlier |
| b |
| diff -r 000000000000 -r 913ea6991ee4 references/ClassIWithoutNQex2-3.plus75.1.ebwt |
| b |
| Binary file references/ClassIWithoutNQex2-3.plus75.1.ebwt has changed |
| b |
| diff -r 000000000000 -r 913ea6991ee4 references/ClassIWithoutNQex2-3.plus75.2.ebwt |
| b |
| Binary file references/ClassIWithoutNQex2-3.plus75.2.ebwt has changed |
| b |
| diff -r 000000000000 -r 913ea6991ee4 references/ClassIWithoutNQex2-3.plus75.3.ebwt |
| b |
| Binary file references/ClassIWithoutNQex2-3.plus75.3.ebwt has changed |
| b |
| diff -r 000000000000 -r 913ea6991ee4 references/ClassIWithoutNQex2-3.plus75.4.ebwt |
| b |
| Binary file references/ClassIWithoutNQex2-3.plus75.4.ebwt has changed |
| b |
| diff -r 000000000000 -r 913ea6991ee4 references/ClassIWithoutNQex2-3.plus75.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/references/ClassIWithoutNQex2-3.plus75.fasta Thu Dec 20 10:37:01 2012 -0500 |
| b |
| b'@@ -0,0 +1,10260 @@\n+>HLA:HLA00001 A*01:01:01:01 1098 bp\n+ATGGCCGTCATGGCGCCCCGAACCCTCCTCCTGCTACTCTCGGGGGCCCTGGCCCTGACCCAGACCTGGGCGGGCTCCCACTCCATGAGGTATTTCTTCACATCCGTGTCCCGGCCCGGCCGCGGGGAGCCCCGCTTCATCGCCGTGGGCTACGTGGACGACACGCAGTTCGTGCGGTTCGACAGCGACGCCGCGAGCCAGAAGATGGAGCCGCGGGCGCCGTGGATAGAGCAGGAGGGGCCGGAGTATTGGGACCAGGAGACACGGAATATGAAGGCCCACTCACAGACTGACCGAGCGAACCTGGGGACCCTGCGCGGCTACTACAACCAGAGCGAGGACGGTTCTCACACCATCCAGATAATGTATGGCTGCGACGTGGGGCCGGACGGGCGCTTCCTCCGCGGGTACCGGCAGGACGCCTACGACGGCAAGGATTACATCGCCCTGAACGAGGACCTGCGCTCTTGGACCGCGGCGGACATGGCAGCTCAGATCACCAAGCGCAAGTGGGAGGCGGTCCATGCGGCGGAGCAGCGGAGAGTCTACCTGGAGGGCCGGTGCGTGGACGGGCTCCGCAGATACCTGGAGAACGGGAAGGAGACGCTGCAGCGCACGGACCCCCCCAAGACACATATGACCCACCACCCCATCTCTGACCATGAGGCCACCCTGAGGTGCTGGGCCCTGGGCT\n+>HLA:HLA01244 A*01:01:02 546 bp\n+ATGGCCGTCATGGCGCCCCGAACCCTCCTCCTGCTACTCTCGGGGGCCCTGGCCCTGACCCAGACCTGGGCGGGCTCCCACTCCATGAGGTATTTCTTCACATCCGTGTCCCGGCCCGGCCGCGGGGAGCCCCGCTTCATCGCCGTGGGCTACGTGGACGACACGCAGTTCGTGCGGTTCGACAGCGACGCCGCGAGCCAGAAGATGGAGCCGCGGGCGCCGTGGATAGAGCAGGAGGGGCCGGAGTATTGGGACCAGGAGACACGGAATATGAAGGCCCACTCACAGACTGACCGAGCGAACCTGGGGACCCTGCGCGGCTACTACAACCAGAGCGAGGACGGTTCTCACACCATCCAGATAATGTATGGCTGCGACGTGGGGCCGGACGGGCGCTTCCTCCGCGGGTACCGGCAGGACGCCTACGACGGCAAGGATTACATCGCCCTGAACGAGGACCTGCGCTCTTGGACCGCGGCGGACATGGCAGCTCAGATTACCAAGCGCAAGTGGGAGGCGGTCCATGCGGCGGAGCAGCGGAGAGTCTACCTGGAGGGCCGGTGCGTGGACGGGCTCCGCAGATACCTGGAGAACGGGAAGGAGACGCTGCAGCGCACGGACCCCCCCAAGACACATATGACCCACCACCCCATCTCTGACCATGAGGCCACCCTGAGGTGCTGGGCCCTGGGCT\n+>HLA:HLA01971 A*01:01:03 895 bp\n+ATGGCCGTCATGGCGCCCCGAACCCTCCTCCTGCTACTCTCGGGGGCCCTGGCCCTGACCCAGACCTGGGCGGGCTCCCACTCCATGAGGTATTTCTTCACATCCGTGTCCCGGCCCGGCCGCGGGGAGCCCCGCTTCATCGCCGTGGGCTACGTGGACGACACGCAGTTCGTGCGGTTCGACAGCGACGCCGCGAGCCAGAAGATGGAGCCGCGGGCGCCGTGGATAGAGCAGGAGGGGCCGGAGTATTGGGACCAGGAGACACGGAATATGAAGGCCCACTCACAGACTGACCGAGCGAACCTGGGGACCCTGCGCGGCTACTACAACCAGAGCGAGGACGGTTCTCACACCATCCAGATAATGTATGGCTGCGACGTGGGGCCGGACGGGCGCTTCCTCCGCGGGTACCGGCAGGACGCCTACGACGGCAAGGATTACATCGCCCTGAACGAGGACCTGCGCTCTTGGACCGCGGCGGACATGGCAGCTCAGATCACCAAGCGCAAGTGGGAGGCGGTCCATGCGGCGGAGCAGCGGAGAGTCTACCTGGAGGGCCGGTGCGTGGACGGGCTCCGCAGATACCTGGAGAACGGGAAGGAGACGCTGCAGCGCACTGACCCCCCCAAGACACATATGACCCACCACCCCATCTCTGACCATGAGGCCACCCTGAGGTGCTGGGCCCTGGGCT\n+>HLA:HLA02540 A*01:01:04 546 bp\n+ATGGCCGTCATGGCGCCCCGAACCCTCCTCCTGCTACTCTCGGGGGCCCTGGCCCTGACCCAGACCTGGGCGGGCTCCCACTCCATGAGGTATTTCTTCACATCCGTGTCCCGGCCCGGCCGCGGGGAGCCCCGCTTCATCGCCGTGGGCTACGTGGACGACACGCAGTTCGTGCGGTTCGACAGCGACGCCGCGAGCCAGAAGATGGAGCCGCGGGCGCCGTGGATAGAGCAGGAGGGGCCGGAGTATTGGGACCAGGAGACACGGAATATGAAGGCCCACTCACAGACTGACCGAGCGAATCTGGGGACCCTGCGCGGCTACTACAACCAGAGCGAGGACGGTTCTCACACCATCCAGATAATGTATGGCTGCGACGTGGGGCCGGACGGGCGCTTCCTCCGCGGGTACCGGCAGGACGCCTACGACGGCAAGGATTACATCGCCCTGAACGAGGACCTGCGCTCTTGGACCGCGGCGGACATGGCAGCTCAGATCACCAAGCGCAAGTGGGAGGCGGTCCATGCGGCGGAGCAGCGGAGAGTCTACCTGGAGGGCCGGTGCGTGGACGGGCTCCGCAGATACCTGGAGAACGGGAAGGAGACGCTGCAGCGCACGGACCCCCCCAAGACACATATGACCCACCACCCCATCTCTGACCATGAGGCCACCCTGAGGTGCTGGGCCCTGGGCT\n+>HLA:HLA03131 A*01:01:05 822 bp\n+ATGGCCGTCATGGCGCCCCGAACCCTCCTCCTGCTACTCTCGGGGGCCCTGGCCCTGACCCAGACCTGGGCGGGCTCCCACTCCATGAGGTATTTCTTCACATCCGTGTCCCGGCCCGGCCGCGGGGAGCCCCGCTTCATCGCGGTGGGCTACGTGGACGACACGCAGTTCGTGCGGTTCGACAGCGACGCCGCGAGCCAGAAGATGGAGCCGCGGGCGCCGTGGATAGAGCAGGAGGGGCCGGAGTATTGGGACCAGGAGACACGGAATATGAAGGCCCACTCACAGACTGACCGAGCGAACCTGGGGACCCTGCGCGGCTACTACAACCAGAGCGAGGACGGTTCTCACACCATCCAGATAATGTATGGCTGCGACGTGGGGCCGGACGGGCGCTTCCTCCGCGGGTACCGGCAGGACGCCTACGACGGCAAGGATTACATCGCCCTGAACGAGGACCTGCGCTCTTGGACCGCGGCGGACATGGCAGCTCAGATCACCAAGCGCAAGTGGGAGGCGGTCCATGCGGCGGAGCAGCGGAGAGTCTACCTGGAGGGCCGGTGCGTGGACGGGCTCCGCAGATACCTGGAGAACGGGAAGGAGACGCTGCAGCGCACGGACCCCCCCAAGACACATATGACCCACCACCCCATCTCTGACCATGAGGCCACCCTGAGGTGCTGGGCCCTGGGCT\n+>HLA:HLA03742 A*01:01:06 546 bp\n+ATGGCCGTCATGGCGCCCCGAACCCTCCTCCTGCTACTCTCGGGGGCCCTGGCCCTGACCCAGACCTGGGCGGGCTCCCACTCCATGAGGTATTTCTTCACATCCGTGTCCCGGCCCGGCCGCGGGGAGCCCCGCTTCATCGCCGTGGGCTACGTGGACGACACGCAGTTCGTGCGGTTCGACAGCGACGCCGCGAGCCAGAAGATGGAGCCGCGGGCGCCGTGGATAGAGCAGGAGGGGCCGGAGTATTGGGACCAGGAGACACGGAATATGAAGGCCCACTCACAGACTGACCG'..b'ACCAGAGCGAGGACGGTTCTCACACCATCCAGAGGATGTATGGCTGCGACCTGGGGCCCGACGGGCGCCTCCTCCGCGGGTATAACCAGTTCGCCTACGACGGCAAGGATTACATCGCCCTGAACGAGGACCTGCGCTCCTGGACCGCGGCGGACACGGCGGCTCAGATCTCCCAGCGCAAGTTGGAGGCGGCCCGTGAGGCGGAGCAGCTGAGAGCCTACCTGGAGGGCGAGTGCGTGGAGTGGCTCCGCGGATACCTGGAGAACGGGAAGGAGACGCTGCAGCGCGCGGAACACCCAAAGACACACGTGACCCACCATCCCGTCTCTGACCATGAGGCCACCCTGAGGTGCTGGGCCCTGGGCT\n+>HLA:HLA00483 C*18:01 1101 bp\n+ATGCGGGTCATGGCGCCCCGAACCCTCATCCTGCTGCTCTCGGGAGCCCTGGCCCTGACCGAGACCTGGGCCTGCTCCCACTCCATGAGGTATTTCGACACCGCCGTGTCCCGGCCCGGCCGCGGAGAGCCCCGCTTCATCTCAGTGGGCTACGTGGACGACACGCAGTTCGTGCGGTTCGACAGCGACGCCGCGAGTCCGAGAGGGGAGCCCCGGGCGCCGTGGGTGGAGCAGGAGGGGCCGGAGTATTGGGACCGGGAGACACAGAAGTACAAGCGCCAGGCACAGGCTGACCGAGTGAACCTGCGGAAACTGCGCGGCTACTACAACCAGAGCGAGGACGGGTCTCACACCCTCCAGAGGATGTTTGGCTGCGACCTGGGGCCGGACGGGCGCCTCCTCCGCGGGTATAACCAGTTCGCCTACGACGGCAAGGATTACATCGCCCTGAACGAGGATCTGCGCTCCTGGACCGCCGCGGACACGGCGGCTCAGATCACCCAGCGCAAGTGGGAGGCGGCCCGTGAGGCGGAGCAGCGGAGAGCCTACCTGGAGGGCACGTGCGTGGAGTGGCTCCGCAGATACCTGGAGAACGGGAAGGAGACGCTGCAGCGCGCGGAACACCCAAAGACACACGTGACCCACCATCCCGTCTCTGACCATGAGGCCACCCTGAGGTGCTGGGCCCTGGGCT\n+>HLA:HLA00484 C*18:02 1101 bp\n+ATGCGGGTCATGGCGCCCCGAACCCTCATCCTGCTGCTCTCGGGAGCCCTGGCCCTGACCGAGACCTGGGCCTGCTCCCACTCCATGAGGTATTTCGACACCGCCGTGTCCCGGCCCGGCCGCGGAGAGCCCCGCTTCATCTCAGTGGGCTACGTGGACGACACGCAGTTCGTGCGGTTCGACAGCGACGCCGCGAGTCCGAGAGGGGAGCCCCGGGCGCCGTGGGTGGAGCAGGAGGGGCCGGAGTATTGGGACCGGGAGACACAGAAGTACAAGCGCCAGGCACAGGCTGACCGAGTGAACCTGCGGAAACTGCGCGGCTACTACAACCAGAGCGAGGACGGGTCTCACACCCTCCAGAGGATGTTTGGCTGCGACCTGGGGCCGGACGGGCGCCTCCTCCGCGGGTATAACCAGTTCGCCTACGACGGCAAGGATTACATCGCCCTGAACGAGGATCTGCGCTCCTGGACCGCCGCGGACACGGCGGCTCAGATCACCCAGCGCAAGTGGGAGGCGGCCCGTGAGGCGGAGCAGCGGAGAGCCTACCTGGAGGGCACGTGCGTGGAGTGGCTCCGCAGATACCTGGAGAACGGGAAGGAGACGCTGCAGCGCGCGGAACACCCAAAGACACACGTGACCCACCATCCCGTCTCTGACCATGAGGCCACCCTGAGGTGCTGGGCCCTGGGCT\n+>HLA:HLA02839 C*18:03 546 bp\n+ATGCGGGTCATGGCGCCCCGAACCCTCATCCTGCTGCTCTCGGGAGCCCTGGCCCTGACCGAGACCTGGGCCTGCTCCCACTCCATGAGGTATTTCGACACCGCCGTGTCCCGGCCCGGCCGCGGAGAGCCCCGCTTCATCTCAGTGGGCTACGTGGACGACACGCAGTTCGTGCGGTTCGACAGCGACGCCGCGAGTCCGAGAGGGGAGCCCCGGGCGCCGTGGGTGGAGCAGGAGGGGCCGGAGTATTGGGACCGGGAGACACAGAAGTACAAGCGCCAGGCACAGGCTGACCGAGTGAACCTGCGGAAACTGCGCGGCTACTACAACCAGAGCGAGGACGGGTCTCACACCCTCCAGAGGATGTTTGGCTGCGACCTGGGGCCGGACGGGCGCCTCCTCCGCGGGTATAACCAGTTCGCCTACGACGGCAAGGATTACATCGCCCTGAACGAGGATCTGCGCTCCTGGACCGCCGCGGACACGGCGGCTCAGATCACCCAGCGCAAGTGGGAGGCGGCCCGTGAGGCGGAGCAGTGGAGAGCCTACCTGGAGGGCGAGTGCGTGGAGTGGCTCCGCAGATACCTGGAGAACGGGAAGGAGACGCTGCAGCGCGCGGAACACCCAAAGACACACGTGACCCACCATCCCGTCTCTGACCATGAGGCCACCCTGAGGTGCTGGGCCCTGGGCT\n+>HLA:HLA05601 C*18:04 546 bp\n+ATGCGGGTCATGGCGCCCCGAACCCTCATCCTGCTGCTCTCGGGAGCCCTGGCCCTGACCGAGACCTGGGCCTGCTCCCACTCCATGAGGTATTTCGACACCGCCGTGTCCCGGCCCGGCCGCGGAGAGCCCCGCTTCATCGCAGTGGGCTACGTGGACGACACGCAGTTCGTGCGGTTCGACAGCGACGCCGCGAGTCCGAGAGGGGAGCCCCGGGCGCCGTGGGTGGAGCAGGAGGGGCCGGAGTATTGGGACCGGGAGACACAGAAGTACAAGCGCCAGGCACAGGCTGACCGAGTGAACCTGCGGAAACTGCGCGGCTACTACAACCAGAGCGAGGACGGGTCTCACACCCTCCAGAGGATGTTTGGCTGCGACCTGGGGCCGGACGGGCGCCTCCTCCGCGGGTATAACCAGTTCGCCTACGACGGCAAGGATTACATCGCCCTGAACGAGGATCTGCGCTCCTGGACCGCCGCGGACACGGCGGCTCAGATCACCCAGCGCAAGTGGGAGGCGGCCCGTGAGGCGGAGCAGCGGAGAGCCTACCTGGAGGGCACGTGCGTGGAGTGGCTCCGCAGATACCTGGAGAACGGGAAGGAGACGCTGCAGCGCGCGGAACACCCAAAGACACACGTGACCCACCATCCCGTCTCTGACCATGAGGCCACCCTGAGGTGCTGGGCCCTGGGCT\n+>HLA:HLA06443 C*18:05 546 bp\n+ATGCGGGTCATGGCGCCCCGAACCCTCATCCTGCTGCTCTCGGGAGCCCTGGCCCTGACCGAGACCTGGGCCTGCTCCCACTCCATGAGGTATTTCGACACCGCCGTGTCCCGGCCCGGCCGCGGAGAGCCCCGCTTCATCTCAGTGGGCTACGTGGACGACACGCAGTTCGTGCGGTTCGACAGCGACGCCGCGAGTCCGAGAGGGGAGCCGCGGGCGCCGTGGGTGGAGCAGGAGGGGCCGGAGTATTGGGACCGGGAGACACAGAACTACAAGCGCCAGGCACAGGCTGACCGAGTGAACCTGCGGAAACTGCGCGGCTACTACAACCAGAGCGAGGACGGGTCTCACACCCTCCAGAGGATGTTTGGCTGCGACCTGGGGCCGGACGGGCGCCTCCTCCGCGGGTATAACCAGTTCGCCTACGACGGCAAGGATTACATCGCCCTGAACGAGGATCTGCGCTCCTGGACCGCCGCGGACACGGCGGCTCAGATCACCCAGCGCAAGTGGGAGGCGGCCCGTGAGGCGGAGCAGCGGAGAGCCTACCTGGAGGGCACGTGCGTGGAGTGGCTCCGCAGATACCTGGAGAACGGGAAGGAGACGCTGCAGCGCGCGGAACACCCAAAGACACACGTGACCCACCATCCCGTCTCTGACCATGAGGCCACCCTGAGGTGCTGGGCCCTGGGCT\n' |
| b |
| diff -r 000000000000 -r 913ea6991ee4 references/ClassIWithoutNQex2-3.plus75.rev.1.ebwt |
| b |
| Binary file references/ClassIWithoutNQex2-3.plus75.rev.1.ebwt has changed |
| b |
| diff -r 000000000000 -r 913ea6991ee4 references/ClassIWithoutNQex2-3.plus75.rev.2.ebwt |
| b |
| Binary file references/ClassIWithoutNQex2-3.plus75.rev.2.ebwt has changed |
| b |
| diff -r 000000000000 -r 913ea6991ee4 references/HLA2.ex2.plus75.1.ebwt |
| b |
| Binary file references/HLA2.ex2.plus75.1.ebwt has changed |
| b |
| diff -r 000000000000 -r 913ea6991ee4 references/HLA2.ex2.plus75.2.ebwt |
| b |
| Binary file references/HLA2.ex2.plus75.2.ebwt has changed |
| b |
| diff -r 000000000000 -r 913ea6991ee4 references/HLA2.ex2.plus75.3.ebwt |
| b |
| Binary file references/HLA2.ex2.plus75.3.ebwt has changed |
| b |
| diff -r 000000000000 -r 913ea6991ee4 references/HLA2.ex2.plus75.4.ebwt |
| b |
| Binary file references/HLA2.ex2.plus75.4.ebwt has changed |
| b |
| diff -r 000000000000 -r 913ea6991ee4 references/HLA2.ex2.plus75.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/references/HLA2.ex2.plus75.fasta Thu Dec 20 10:37:01 2012 -0500 |
| b |
| b'@@ -0,0 +1,2546 @@\n+>HLA:HLA00601 DQA1*01:01:01 768 bp\n+TAAACAAAGCTCTGCTGCTGGGGGCCCTCGCTCTGACCACCGTGATGAGCCCCTGTGGAGGTGAAGACATTGTGGCTGACCACGTTGCCTCTTGTGGTGTAAACTTGTACCAGTTTTACGGTCCCTCTGGCCAGTACACCCATGAATTTGATGGAGATGAGGAGTTCTACGTGGACCTGGAGAGGAAGGAGACTGCCTGGCGGTGGCCTGAGTTCAGCAAATTTGGAGGTTTTGACCCGCAGGGTGCACTGAGAAACATGGCTGTGGCAAAACACAACTTGAACATCATGATTAAACGCTACAACTCTACCGCTGCTACCAATGAGGTTCCTGAGGTCACAGTGTTTTCCAAGTCTCCCGTGACACTGGGTCAGCCCAACACCCTCATTTGTCTTGTGG\n+>HLA:HLA01409 DQA1*01:01:02 768 bp\n+TAAACAAAGCTCTGCTGCTGGGGGCCCTCGCTCTGACCACCGTGATGAGCCCCTGTGGAGGTGAAGACATTGTGGCTGACCACGTTGCCTCTTGTGGTGTAAACTTGTACCAGTTTTACGGTCCCTCTGGCCAGTACACCCATGAATTTGATGGAGATGAGGAGTTCTACGTGGACCTGGAGAGGAAGGAGACTGCCTGGCGGTGGCCTGAGTTCAGCAAATTTGGAGGTTTTGACCCGCAGGGTGCACTGAGAAACATGGCTGTGGCAAAACACAACTTGAACATCATGATTAAACGCTACAACTCTACCGCTGCTACCAATGAGGTTCCTGAGGTCACAGTGTTTTCCAAGTCTCCCGTGACACTGGGTCAGCCCAACACCCTCATTTGTCTTGTGG\n+>HLA:HLA00602 DQA1*01:02:01:01 768 bp\n+TAAACAAAGCTCTGCTGCTGGGGGCCCTCGCTCTGACCACCGTGATGAGCCCCTGTGGAGGTGAAGACATTGTGGCTGACCACGTTGCCTCTTGTGGTGTAAACTTGTACCAGTTTTACGGTCCCTCTGGCCAGTACACCCATGAATTTGATGGAGATGAGCAGTTCTACGTGGACCTGGAGAGGAAGGAGACTGCCTGGCGGTGGCCTGAGTTCAGCAAATTTGGAGGTTTTGACCCGCAGGGTGCACTGAGAAACATGGCTGTGGCAAAACACAACTTGAACATCATGATTAAACGCTACAACTCTACCGCTGCTACCAATGAGGTTCCTGAGGTCACAGTGTTTTCCAAGTCTCCCGTGACACTGGGTCAGCCCAACACCCTCATTTGTCTTGTGG\n+>HLA:HLA06598 DQA1*01:02:01:02 768 bp\n+TAAACAAAGCTCTGCTGCTGGGGGCCCTCGCTCTGACCACCGTGATGAGCCCCTGTGGAGGTGAAGACATTGTGGCTGACCACGTTGCCTCTTGTGGTGTAAACTTGTACCAGTTTTACGGTCCCTCTGGCCAGTACACCCATGAATTTGATGGAGATGAGCAGTTCTACGTGGACCTGGAGAGGAAGGAGACTGCCTGGCGGTGGCCTGAGTTCAGCAAATTTGGAGGTTTTGACCCGCAGGGTGCACTGAGAAACATGGCTGTGGCAAAACACAACTTGAACATCATGATTAAACGCTACAACTCTACCGCTGCTACCAATGAGGTTCCTGAGGTCACAGTGTTTTCCAAGTCTCCCGTGACACTGGGTCAGCCCAACACCCTCATTTGTCTTGTGG\n+>HLA:HLA06594 DQA1*01:02:01:03 768 bp\n+TAAACAAAGCTCTGCTGCTGGGGGCCCTCGCTCTGACCACCGTGATGAGCCCCTGTGGAGGTGAAGACATTGTGGCTGACCACGTTGCCTCTTGTGGTGTAAACTTGTACCAGTTTTACGGTCCCTCTGGCCAGTACACCCATGAATTTGATGGAGATGAGCAGTTCTACGTGGACCTGGAGAGGAAGGAGACTGCCTGGCGGTGGCCTGAGTTCAGCAAATTTGGAGGTTTTGACCCGCAGGGTGCACTGAGAAACATGGCTGTGGCAAAACACAACTTGAACATCATGATTAAACGCTACAACTCTACCGCTGCTACCAATGAGGTTCCTGAGGTCACAGTGTTTTCCAAGTCTCCCGTGACACTGGGTCAGCCCAACACCCTCATTTGTCTTGTGG\n+>HLA:HLA06599 DQA1*01:02:01:04 768 bp\n+TAAACAAAGCTCTGCTGCTGGGGGCCCTCGCTCTGACCACCGTGATGAGCCCCTGTGGAGGTGAAGACATTGTGGCTGACCACGTTGCCTCTTGTGGTGTAAACTTGTACCAGTTTTACGGTCCCTCTGGCCAGTACACCCATGAATTTGATGGAGATGAGCAGTTCTACGTGGACCTGGAGAGGAAGGAGACTGCCTGGCGGTGGCCTGAGTTCAGCAAATTTGGAGGTTTTGACCCGCAGGGTGCACTGAGAAACATGGCTGTGGCAAAACACAACTTGAACATCATGATTAAACGCTACAACTCTACCGCTGCTACCAATGAGGTTCCTGAGGTCACAGTGTTTTCCAAGTCTCCCGTGACACTGGGTCAGCCCAACACCCTCATTTGTCTTGTGG\n+>HLA:HLA00603 DQA1*01:02:02 768 bp\n+TAAACAAAGCTCTGCTGCTGGGGGCCCTCGCTCTGACCACCGTGATGAGCCCCTGTGGAGGTGAAGACATTGTGGCTGACCACGTTGCCTCTTGTGGTGTAAACTTGTACCAGTTTTACGGTCCCTCTGGCCAGTACACCCATGAATTTGATGGAGATGAGCAGTTCTACGTGGACCTGGAGAGGAAGGAGACTGCCTGGCGGTGGCCTGAGTTCAGCAAATTTGGAGGTTTTGACCCGCAGGGTGCACTGAGAAACATGGCTGTGGCAAAACACAACTTGAACATCATGATTAAACGCTACAACTCTACCGCTGCTACCAATGAGGTTCCTGAGGTCACAGTGTTTTCCAAGTCTCCCGTGACACTGGGTCAGCCCAACACCCTCATCTGTCTTGTGG\n+>HLA:HLA02338 DQA1*01:02:03 768 bp\n+TAAACAAAGCTCTGCTGCTGGGGGCCCTCGCTCTGACCACCGTGATGAGCCCCTGTGGAGGTGAAGACATTGTGGCTGACCACGTTGCCTCTTGTGGTGTAAACTTGTACCAGTTTTACGGTCCCTCTGGCCAGTACACCCATGAATTTGATGGAGATGAGCAGTTCTACGTGGACCTGGAGAGGAAGGAGACTGCCTGGCGGTGGCCTGAGTTCAGCAAATTTGGAGGTTTTGACCCGCAGGGTGCACTGAGAAACATGGCTGTGGCAAAACACAACTTGAACATCATGATTAAACGCTACAACTCTACCGCTGCTACCAATGAGGTTCCTGAGGTCACAGTGTTTTCCAAGTCTCCCGTGACACTGGGTCAGCCCAACACCCTCATTTGTCTTGTGG\n+>HLA:HLA02432 DQA1*01:02:04 768 bp\n+TAAACAAAGCTCTGCTGCTGGGGGCCCTCGCTCTGACCACCGTGATGAGCCCCTGTGGAGGTGAAGACATTGTGGCTGACCACGTTGCCTCTTGTGGTGTAAACTTGTACCAGTTTTACGGTCCCTCTGGCCAGTACACCCATGAATTTGATGGAGATGAGCAGTTCTACGTGGACCTGGAGAGGAAGGAGACTGCCTGGCGGTGGCCTGAGTTCAGCAAATTTGGAGGTTTTGACCCGCAGGGTGCACTGAGAAACATGGCTGTGGCAAAACACAACTTGAACATCATGATTAAACGCTACAACTCTACCGCTGCTACCAATGAGGTTCCTGAGGTCACAGTGTTTTCCAAGTCTCCCGTGACACTGGGTCAGCCCAACACCCTCATCTGTCTTGTGG\n+>HLA:HLA00604 DQA1*01:03:01:01 768'..b'GGGGACACCCGACCACGTTTCCTGTGGCAGCCTAAGAGGGAGTGTCATTTCTTCAATGGGACGGAGCGGGTGCGGTTCCTGGACAGATACTTCTATAACCAGGAGGAGTCCGTGCGCTTCGACAGCGACGTGGGGGAGTTCCGGGCGGTGACGGAGCTGGGGCGGCCTGACGCTGAGTACTGGAACAGCCAGAAGGACCTCCTGGAAGACAGGCGCGCCGCGGTGGACACCTACTGCAGACACAACTACGGGGTTGGTGAGAGCTTCACAGTGCAGCGGCGAGTCCAACCTAAGGTGACTGTATATCCTTCAAAGACCCAGCCCCTGCAGCACCACAACCTCCTGGTCTGCTCTGTGA\n+>HLA:HLA02623 DRB1*16:11 270 bp\n+GCTCCTGCATGACAGCGCTGACAGTGACACTGATGGTGCTGAGCTCCCCACTGGCTTTGGCTGGGGACACCCGACCACGTTTCCTGTGGCAGCCTAAGAGGAAGTGTCATTTCTTCAATGGGACGGAGCGGGTGCGGTTCCTGGACAGATACTTCTATAACCAGGAGGAGTCCGTGCGCTTCGACAGCGACGTGGGGGAGTACCGGGCGGTGACGGAGCTGGGGCGGCCTGACGCTGAGTACTGGAACAGCCAGAAGGACCTCCTGGAAGACAGGCGCGCCGCGGTGGACACCTACTGCAGACACAACTACGGGGTTGGTGAGAGCTTCACAGTGCAGCGGCGAGTCCAACCTAAGGTGACTGTATATCCTTCAAAGACCCAGCCCCTGCAGCACCACAACCTCCTGGTCTGCTCTGTGA\n+>HLA:HLA03043 DRB1*16:12 270 bp\n+GCTCCTGCATGACAGCGCTGACAGTGACACTGATGGTGCTGAGCTCCCCACTGGCTTTGGCTGGGGACACCCGACCACGTTTCCTGTGGCAGCCTAAGAAGGAGTGTCATTTCTTCAATGGGACGGAGCGGGTGCGGTTCCTGGACAGATACTTCTATAACCAGGAGGAGTCCGTGCGCTTCGACAGCGACGTGGGGGAGTACCGGGCGGTGACGGAGCTGGGGCGGCCTGACGCTGAGTACTGGAACAGCCAGAAGGACCTCCTGGAAGACAGGCGCGCCGCGGTGGACACCTACTGCAGACACAACTACGGGGTTGGTGAGAGCTTCACAGTGCAGCGGCGAGTCCAACCTAAGGTGACTGTATATCCTTCAAAGACCCAGCCCCTGCAGCACCACAACCTCCTGGTCTGCTCTGTGA\n+>HLA:HLA03995 DRB1*16:14 270 bp\n+GCTCCTGCATGACAGCGCTGACAGTGACACTGATGGTGCTGAGCTCCCCACTGGCTTTGGCTGGGGACACCCGACCACGTTTCCTGTGGCAGCCTAAGAGGGAGTGTCATTTCTTCAATGGGACGGAGCGGGTGCGGTTCCTGGACAGATACTTCTATAACCAGGAGGAGTCCGTGCGCTTCGACAGCGACGTGGGGGAGTACCGGGCGGTGACGGAGCTGGGGCGGCCTGAAGCTGAGTACTGGAACAGCCAGAAGGACCTCCTGGAAGACAGGCGCGCCGCGGTGGACACCTACTGCAGACACAACTACGGGGTTGGTGAGAGCTTCACAGTGCAGCGGCGAGTCCAACCTAAGGTGACTGTATATCCTTCAAAGACCCAGCCCCTGCAGCACCACAACCTCCTGGTCTGCTCTGTGA\n+>HLA:HLA04359 DRB1*16:15 270 bp\n+GCTCCTGCATGACAGCGCTGACAGTGACACTGATGGTGCTGAGCTCCCCACTGGCTTTGGCTGGGGACACCCGACCACGTTTCCTGTGGCAGCCTAAGAGGGAGTGTCATTTCTTCAATGGGACGGAGCGGGTGCGGTTCCTGGACAGATACTTCTATAACCAGGAGGAGTCCGTGCGCTTCGACAGCGACGTGGGGGAGTACCGGGCGGTGACGGAGCTGGGGCGGCCTGACGCTGAGTACTGGAACAGCCAGAAGGACTTCCTGGAAGACAGGCGCGCCGCGGTGGACACCTACTGCAGACACAACTACGGGGTTGTGGAGAGCTTCACAGTGCAGCGGCGAGTCCAACCTAAGGTGACTGTATATCCTTCAAAGACCCAGCCCCTGCAGCACCACAACCTCCTGGTCTGCTCTGTGA\n+>HLA:HLA04747 DRB1*16:16 270 bp\n+GCTCCTGCATGACAGCGCTGACAGTGACACTGATGGTGCTGAGCTCCCCACTGGCTTTGGCTGGGGACACCCGACCACGTTTCCTGTGGCAGCCTAAGAGGGAGTGTCATTTCTTCAATGGGACGGAGCGGGTGCGGTTCCTGGACAGATACTTCTATAACCAGGAGGAGTCCGTGCGCTTCGACAGCGACGTGGGGGAGTACCGGGCGGTGACGGAGCTGGGGCGGCCTGATGAGGAGTACTGGAACAGCCAGAAGGACCTCCTGGAAGACAGGCGCGCCGCGGTGGACACCTACTGCAGACACAACTACGGGGTTGGTGAGAGCTTCACAGTGCAGCGGCGAGTCCAACCTAAGGTGACTGTATATCCTTCAAAGACCCAGCCCCTGCAGCACCACAACCTCCTGGTCTGCTCTGTGA\n+>HLA:HLA05791 DRB1*16:17 270 bp\n+GCTCCTGCATGACAGCGCTGACAGTGACACTGATGGTGCTGAGCTCCCCACTGGCTTTGGCTGGGGACACCCGACCACGTTTCCTGTGGCAGCCTAAGAGGGAGTGTAATTTCTTCAATGGGACGGAGCGGGTGCGGTTCCTGGACAGATACTTCTATAACCAGGAGGAGTCCGTGCGCTTCGACAGCGACGTGGGGGAGTACCGGGCGGTGACGGAGCTGGGGCGGCCTGACGCTGAGTACTGGAACAGCCAGAAGGACCTCCTGGAAGACAGGCGCGCCGCGGTGGACACCTACTGCAGACACAACTACGGGGTTGGTGAGAGCTTCACAGTGCAGCGGCGAGTCCAACCTAAGGTGACTGTATATCCTTCAAAGACCCAGCCCCTGCAGCACCACAACCTCCTGGTCTGCTCTGTGA\n+>HLA:HLA06295 DRB1*16:18 270 bp\n+GCTCCTGCATGACAGCGCTGACAGTGACACTGATGGTGCTGAGCTCCCCACTGGCTTTGGCTGGGGACACCCGACCACGTTTCCTGTGGCAGCCTAAGAGGGAGTGTCATTTCTTCAATGGGACGGAGCGGGTGCGGTTCCTGGACAGATACTTCTATAACCAGGAGGAGTCCGTGCGCTTCGACAGCGACGTGGGGGAGTACCGGGCGGTGACGGAGCTGGGGCGGCCTGACGCTGAGTACTGGAACAGCCAGAAGGACCTCCTGGAAGACAGGCGGGCCCTGGTGGACACCTACTGCAGACACAACTACGGGGTTGGTGAGAGCTTCACAGTGCAGCGGCGAGTCCAACCTAAGGTGACTGTATATCCTTCAAAGACCCAGCCCCTGCAGCACCACAACCTCCTGGTCTGCTCTGTGA\n+>HLA:HLA07407 DRB1*16:19 270 bp\n+GCTCCTGCATGACAGCGCTGACAGTGACACTGATGGTGCTGAGCTCCCCACTGGCTTTGGCTGGGGACACCCGACCACGTTTCCTGTGGCAGCCTAAGAGGGAGTGTCATTTCTTCAATGGGACGGAGCGGGTGCGGTTCCTGGACAGATACTTCTATAACCAGGAGGAGTCCGTGCACTTCGACAGCGACGTGGGGGAGTACCGGGCGGTGACGGAGCTGGGGCGGCCTGACGCTGAGTACTGGAACAGCCAGAAGGACCTCCTGGAAGACAGGCGCGCCGCGGTGGACACCTACTGCAGACACAACTACGGGGTTGGTGAGAGCTTCACAGTGCAGCGGCGAGTCCAACCTAAGGTGACTGTATATCCTTCAAAGACCCAGCCCCTGCAGCACCACAACCTCCTGGTCTGCTCTGTGA\n' |
| b |
| diff -r 000000000000 -r 913ea6991ee4 references/HLA2.ex2.plus75.rev.1.ebwt |
| b |
| Binary file references/HLA2.ex2.plus75.rev.1.ebwt has changed |
| b |
| diff -r 000000000000 -r 913ea6991ee4 references/HLA2.ex2.plus75.rev.2.ebwt |
| b |
| Binary file references/HLA2.ex2.plus75.rev.2.ebwt has changed |
| b |
| diff -r 000000000000 -r 913ea6991ee4 seq2HLA.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seq2HLA.py Thu Dec 20 10:37:01 2012 -0500 |
| [ |
| b'@@ -0,0 +1,801 @@\n+##########################################################################################################\n+#Title:\n+#seq2HLA - HLA genotyping from RNA-Seq sequence reads\n+#\n+#Release: 1.0\n+#\n+#Author:\n+#Sebastian Boegel, 2012 (c)\n+#TRON - Translational Oncology at the University Medical Center Mainz, 55131 Mainz, Germany\n+#University Medical Center of the Johannes Gutenberg-University Mainz, III. Medical Department, Mainz, Germany\n+#\n+#Contact:\n+#boegels@uni-mainz.de\n+#\n+#Synopsis:\n+#We developed an in-silico method "Seq2HLA", written in python and R, which takes standard RNA-Seq sequence reads in fastq format \n+#as input, uses a bowtie index comprising all HLA alleles and outputs the most likely HLA class I and class II genotypes, \n+#a p-value for each call, and the expression of each class \n+#\n+#Usage: \n+#python seq2HLA.py -1 <readfile1> -2 <readfile2> -r "<runname>" -l <readlength> [-3 <int>]*\n+#*optional (Default:0)\n+#\n+#Dependencies:\n+#0.) seq2HLA is a python script, developed with Python 2.6.8\n+#1.) bowtie must be reachable by the command "bowtie". seq2HLA was developed and tested with bowtie version 0.12.7 (64-bit). The call to bowtie is invoked with 6 CPUs. You can change that in the function "mapping".\n+#2.) R must be installed, seq2HLA.py was developed and tested with R version 2.12.2 (2011-02-25)\n+#3.) Input must be paired-end reads in fastq-format\n+#4.) Index files must be located in the folder "references".\n+#5.) Packages: biopython (developed with V1.58), numpy (1.3.0)\n+###########################################################################################################\n+\n+from operator import itemgetter\n+import sys\n+import linecache\n+import ast\n+import os\n+import shutil\n+import tempfile\n+import subprocess\n+from Bio import SeqIO\n+import numpy\n+import operator\n+from optparse import OptionParser\n+\n+log_stderr = sys.stderr\n+log_file = open("err.log","w")\n+sys.stderr = log_file\n+\n+import pysam\n+\n+#These variables need to be global, as they are filled and used by different modules\n+readcount={}\n+readspergroup={}\n+allelesPerLocus={}\n+\n+def main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped):\n+\tlog = open(logfile,"w")\n+#\tmapopt="-a -v"+str(mismatch)\n+\tmapopt="-a -v"+str(mismatch)\n+\t#call HLA typing for Class I\n+\tmainClassI(runName+"-ClassI",readFile1,readFile2,bowtiebuildClassI,fastaClassI,mapopt,trim3,output1,log,gzipped)\n+\t#call HLA typing for Class II\n+\tmainClassII(runName+"-ClassII",readFile1,readFile2,bowtiebuildClassII,fastaClassII,mapopt,trim3,output2,log,gzipped)\n+\n+#---------------Class I-------------------------------\n+def mainClassI(runName,readFile1,readFile2,bowtiebuild,hla1fasta,mapopt,trim3,finaloutput,log,gzipped):\n+\t#-------1st iteration-----------------------------------\n+\tlog.write("----------HLA class I------------\\n")\n+\tsam1=runName+"-iteration1.bam"\n+\titeration=1\n+\tlog.write("First iteration starts....\\nMapping ......\\n")\n+\tmapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped)\n+\tmedians=[]\n+\tmedians.extend([0,0,0])\n+\tmedianflag=False\n+\t#Calculation of first digital haplototype .....\n+\toutput1=runName+".digitalhaplotype1"\t\n+\tlog.write("Calculation of first digital haplototype .....\\n")\n+\tmap=createRefDict(hla1fasta,"A","B","C")\n+\treadMapping(map,sam1)\n+\tpredictHLA1(sam1,medians,output1,medianflag,log)\n+\tlog.write("1st iteration done.\\nNow removing reads that mapped to the three top-scoring groups .......\\n")\n+\tremoveReads(runName,createRemoveList(runName,map))\n+\t\n+\t#------2nd iteration------------------------------------------\n+\tlog.write("Second iterations starts .....\\n Mapping ......\\n")\n+\tmedians=[]\n+\titeration=2\n+\tsam2=runName+"-iteration2.bam"\n+\tnewReadFile1=runName+"-2nditeration_1.fq"\n+\tnewReadFile2=runName+"-2nditeration_2.fq"\n+\tmapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped)\n+\tmedianfile=runName+".digitalhap'..b'n, i).split(\'\\t\')[3])\n+\t\n+\t#create read dictionary\n+\treads={}\n+\taligned_handle1=open(aligned1,"r")\n+\tfor record in SeqIO.parse(aligned_handle1, "fastq"):\n+\t\tilluminaid=record.id.split(\'/\')[0].split(\' \')[0]#find exact id, which also appears in the mapping file\n+\t\treads[illuminaid]={}\n+\t\treads[illuminaid][locus1]=0\n+\t\treads[illuminaid][locus2]=0\n+\t\treads[illuminaid][locus3]=0\n+\tsamhandle = pysam.Samfile(sam,"rb")\n+\tfor read in samhandle.fetch():\n+\t\tilluminaid = read.qname\n+\t\thlapseudoname = samhandle.getrname(read.tid)\n+\t\tif map[hlapseudoname].split(\':\')[0] in alleles:\n+\t\t\treads[illuminaid][map[hlapseudoname].split(\'*\')[0]]+=1\n+\n+\tcount={}\n+\tcount[locus1]=0\n+\tcount[locus2]=0\n+\tcount[locus3]=0\n+\tfor key in reads:\n+\t\tn=0\n+\t\tfor locus in reads[key]:\n+\t\t\tif reads[key][locus] > 0:\n+\t\t\t\tn+=1\n+\t\tfor locus in reads[key]:\n+\t\t\tif reads[key][locus] > 0:\n+\t\t\t\tcount[locus]+=float(1.0/float(n))\n+\t\n+\toutfile.write("\\n")\n+\t#Calculate RPKM and print expression values for each locus to stdout\n+\tfor locus in count:\n+\t\tif locus==locus1:\n+\t\t\toutfile.write(locus+": "+str(round(float((1000.0/length1))*float((1000000.0/totalreads))*count[locus],2))+" RPKM\\n")\n+\t\tif locus==locus2:\n+\t\t\toutfile.write(locus+": "+str(round(float((1000.0/length2))*float((1000000.0/totalreads))*count[locus],2))+" RPKM\\n")\n+\t\tif locus==locus3:\n+\t\t\toutfile.write(locus+": "+str(round(float((1000.0/length3))*float((1000000.0/totalreads))*count[locus],2))+" RPKM\\n")\n+\n+\n+if __name__ == \'__main__\':\n+\tparser = OptionParser(usage="usage: %prog -1 readFile1 -2 readFile2 -r runName -l readlength [-3 <int>]", version="%prog 1.0")\n+\tparser.add_option("-z","--gzipped",action="store",dest="gzipped",help="Select if input reads are in gzip format")\n+\tparser.add_option("-1",\n+\t\t\taction="store", \n+\t\t\tdest="readFile1",\n+\t\t\thelp="File name of #1 mates ")\n+\tparser.add_option("-2",\n+\t\t\taction="store", \n+\t\t\tdest="readFile2",\n+\t\t\thelp="File name of #2 mates")\n+\tparser.add_option("-r", "--runName",\n+\t\t\taction="store", \n+\t\t\tdest="runName",\n+\t\t\thelp="Name of this HLA typing run. Wil be used throughout this process as part of the name of the newly created files.")\n+\tparser.add_option("-l", "--length",\n+\t\t\taction="store",\n+\t\t\tdest="length",\n+\t\t\thelp="Readlength")\n+\tparser.add_option("-3", "--trim3",\n+\t\t\taction="store",\n+\t\t\tdest="trim3",\n+\t\t\tdefault="0",\n+\t\t\thelp="Bowtie option: -3 <int> trims <int> bases from the low quality 3\' end of each read. Default: 0")\n+\tparser.add_option("-o", "--output1",\n+\t\t\taction="store",\n+\t\t\tdest="output1",\n+\t\t\thelp="Output file 1")\n+\tparser.add_option("-p", "--output2",\n+\t\t\taction="store",\n+\t\t\tdest="output2",\n+\t\t\thelp="Output file 2")\n+\tparser.add_option("-g","--log",action="store",dest="logfile",help="Output Log File")\n+\n+\n+\t(options, args) = parser.parse_args()\n+\tif not options.readFile1: \n+\t\tparser.error(\'File name #1 pair not given.\')\n+\tif not options.readFile2: \n+\t\tparser.error(\'File name #2 pair not given.\')\n+\tif not options.runName: \n+\t\tparser.error(\'Run name not given.\')\n+\tif not options.length: \n+\t\tparser.error(\'Readlength not given.\')\n+\tgzipped = options.gzipped\n+\treadFile1=options.readFile1\n+\treadFile2=options.readFile2\n+\trunName=options.runName\n+\toutput1 = options.output1\n+\toutput2 = options.output2\n+\tlogfile = options.logfile\n+\tbowtiebuildClassI=sys.path[0]+"/references/ClassIWithoutNQex2-3.plus75"\n+\tbowtiebuildClassII=sys.path[0]+"/references/HLA2.ex2.plus75"\n+\tfastaClassI=sys.path[0]+"/references/ClassIWithoutNQex2-3.plus75.fasta"\n+\tfastaClassII=sys.path[0]+"/references/HLA2.ex2.plus75.fasta"\n+\t#as shown in the publication HLA typing with RNA-Seq works best by allowing as less mismatches as necessary\n+\tif int(options.length)<=50:\n+\t\tmismatch=1\n+\telif int(options.length)>50 and int(options.length)<=100:\n+\t\tmismatch=2\n+\telse:\n+\t\tmismatch=3\n+\ttrim3=str(options.trim3)\n+\tmain(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped)\n+\tsys.stderr = log_stderr\n+\tlog_file.close()\n' |
| b |
| diff -r 000000000000 -r 913ea6991ee4 seq2HLA.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seq2HLA.xml Thu Dec 20 10:37:01 2012 -0500 |
| b |
| @@ -0,0 +1,79 @@ +<tool id="seqhla" name="seq2HLA" version="1.0.0"> + <description>HLA typing from RNA-Seq sequence read</description> + <command interpreter="python"> + seq2HLA.py + -z $compressed + -1 $readFile1 + -2 $readFile2 + -r $runName + -l $readLength + -3 $trim + -o $out1 + -p $out2 + -g $logfile + </command> + <inputs> + <param name="compressed" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Select if your input files are compressed gzipped fastq files" help="Leave default if you didn't upload the files or don't know"/> + <param format="fastq,fastqsanger" name="readFile1" type="data" label="Forward FASTQ File" help="FASTQ File with forward reads" /> + <param format="fastq,fastqsanger" name="readFile2" type="data" label="Reverse FASTQ File" help="FASTQ File with reverse reads" /> + <param name="runName" type="text" value="Run1" label="Run Name" help="Name of the Run" /> + <param name="readLength" type="integer" value="50" label="Read Length" help="Length of the input reads" /> + <param name="trim" type="integer" value="0" label="Trim x bases from the low quality 3' end" help="Trim x bases from the low quality 3' end. Default: 0" /> + </inputs> + <outputs> + <data format="txt" name="out1" label="${tool.name} on ${on_string}: Output 1" /> + <data format="txt" name="out2" label="${tool.name} on ${on_string}: Output 2" /> + <data format="txt" name="logfile" label="${tool.name} on ${on_string}: Log" /> + </outputs> + <tests> + <test> + <param name="compressed" value="True"/> + <param name="readFile1" ftype="fastq" value="input1.fq"/> + <param name="readFile2" ftype="fastq" value="input2.fq"/> + <param name="runName" value="Run1"/> + <param name="readLength" value="37"/> + <param name="trim" value="0"/> + <output name="out1" file="output1.txt"/> + <output name="out2" file="output2.txt"/> + <output name="logfile" file="log.txt"/> + </test> + </tests> +<help> +.. class:: infomark + +**What it does** + +We developed an in-silico method "seq2HLA", written in python and R, which takes standard paired end RNA-Seq sequence reads in fastq format +as input, uses a bowtie index comprising all HLA alleles and outputs the most likely HLA class I and class II genotypes, a p-value for each call, and the expression of each class. + + +----- + +.. class:: infomark + +**Input** + +Input files in FASTQ format + +----- + +.. class:: infomark + +**Output** + +Output file(s) in TXT format + +----- + +.. class:: infomark + +**Authors** + +Sebastian Boegel + +----- + +.. image:: ./static/images/tron_logo.png + +</help> +</tool> |