# HG changeset patch
# User sebastian-boegel
# Date 1356079575 18000
# Node ID 155b796033b67297b97a9a123b694a2115f5788f
# Parent 913ea6991ee4647dc5c8bfd3a2c8352312686e89
Uploaded
diff -r 913ea6991ee4 -r 155b796033b6 README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README Fri Dec 21 03:46:15 2012 -0500
@@ -0,0 +1,62 @@
+=============================================================================
+We developed an in-silico method "seq2HLA", written in python and R, which
+takes standard paired end RNA-Seq sequence reads in fastq format
+as input, uses a bowtie index comprising all HLA alleles and outputs the most
+likely HLA class I and class II genotypes, a p-value for each call, and the
+expression of each class.
+=============================================================================
+Author:
+Sebastian Boegel
+=============================================================================
+
+Tool dependencies:
+
+- bowtie
+- R
+
+=============================================================================
+To implement this script into Galaxy you have to copy the scripts folder in
+your "$GALAXYHOME/tools/" folder and add the path of seq2HLA.xml
+to your "$GALAXYHOME/tool_conf.xml"
+
+Example:
+#
+#
+#
+#
+
+#
+#
+#
+#
+#
+#
+...
+==============================================================================
+The script uses in its subprocesses 4 cores, so you will also have to add a
+setting to your "$GALAXYHOME/universe_wsgi.ini" to configure the queue for that
+script
+
+...
+[galaxy:tool_runners]
+
+#biomart = local:///
+#encode_db1 = local:///
+#hbvar = local:///
+#microbial_import1 = local:///
+ucsc_table_direct1 = local:///
+#ucsc_table_direct_archaea1 = local:///
+#ucsc_table_direct_test1 = local:///
+upload1 = local:///
+bowtie_wrapper = pbs:////-l nodes=1:ppn=4
+bowtie2_wrapper = pbs:////-l nodes=1:ppn=4
+tophat = pbs:////-l nodes=1:ppn=4
+tophat_color = pbs:////-l nodes=1:ppn=4
+cuffdiff = pbs:////-l nodes=1:ppn=4
+cufflinks = pbs:////-l nodes=1:ppn=4
+cuffmerge = pbs:////-l nodes=1:ppn=4
+seqhla = pbs:////-l nodes=1:ppn=6 (this line should be added)
+...
+=============================================================================
diff -r 913ea6991ee4 -r 155b796033b6 seq2HLA.py
--- a/seq2HLA.py Thu Dec 20 10:37:01 2012 -0500
+++ b/seq2HLA.py Fri Dec 21 03:46:15 2012 -0500
@@ -54,23 +54,23 @@
readspergroup={}
allelesPerLocus={}
-def main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped):
+def main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped,num_threads):
log = open(logfile,"w")
# mapopt="-a -v"+str(mismatch)
mapopt="-a -v"+str(mismatch)
#call HLA typing for Class I
- mainClassI(runName+"-ClassI",readFile1,readFile2,bowtiebuildClassI,fastaClassI,mapopt,trim3,output1,log,gzipped)
+ mainClassI(runName+"-ClassI",readFile1,readFile2,bowtiebuildClassI,fastaClassI,mapopt,trim3,output1,log,gzipped,num_threads)
#call HLA typing for Class II
- mainClassII(runName+"-ClassII",readFile1,readFile2,bowtiebuildClassII,fastaClassII,mapopt,trim3,output2,log,gzipped)
+ mainClassII(runName+"-ClassII",readFile1,readFile2,bowtiebuildClassII,fastaClassII,mapopt,trim3,output2,log,gzipped,num_threads)
#---------------Class I-------------------------------
-def mainClassI(runName,readFile1,readFile2,bowtiebuild,hla1fasta,mapopt,trim3,finaloutput,log,gzipped):
+def mainClassI(runName,readFile1,readFile2,bowtiebuild,hla1fasta,mapopt,trim3,finaloutput,log,gzipped,num_threads):
#-------1st iteration-----------------------------------
log.write("----------HLA class I------------\n")
sam1=runName+"-iteration1.bam"
iteration=1
log.write("First iteration starts....\nMapping ......\n")
- mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped)
+ mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped,num_threads)
medians=[]
medians.extend([0,0,0])
medianflag=False
@@ -90,7 +90,7 @@
sam2=runName+"-iteration2.bam"
newReadFile1=runName+"-2nditeration_1.fq"
newReadFile2=runName+"-2nditeration_2.fq"
- mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped)
+ mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped,num_threads)
medianfile=runName+".digitalhaplotype1"
for i in range(2,5):
medians.append(linecache.getline(medianfile, i).split('\t', 3)[2])
@@ -107,13 +107,13 @@
expression("A","B","C",694,694,694,map,runName,readFile1,finaloutput,gzipped)
#--------------Class II----------------------------------------
-def mainClassII(runName,readFile1,readFile2,bowtiebuild,hla2fasta,mapopt,trim3,finaloutput,log,gzipped):
+def mainClassII(runName,readFile1,readFile2,bowtiebuild,hla2fasta,mapopt,trim3,finaloutput,log,gzipped,num_threads):
#-------1st iteration----------------------------------------
log.write("----------HLA class II------------\n")
sam1=runName+"-iteration1.bam"
iteration=1
log.write("ClassII: first iteration starts....\nMapping ......\n")
- mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped)
+ mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped,num_threads)
medians=[]
medians.extend([0,0,0])
medianflag=False
@@ -132,7 +132,7 @@
sam2=runName+"-iteration2.bam"
newReadFile1=runName+"-2nditeration_1.fq"
newReadFile2=runName+"-2nditeration_2.fq"
- mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped)
+ mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped,num_threads)
medianfile=runName+".digitalhaplotype1"
for i in range(2,5):
medians.append(float(linecache.getline(medianfile, i).split('\t', 3)[2])/2.0)
@@ -149,18 +149,18 @@
expression("DQA1","DQB1","DRB1",400,421,421,map,runName,readFile1,finaloutput,gzipped)
#performs the bowtie mapping for the 2 iterations using the given parameters
-def mapping(sam,runName,readFile1,readFile2,bowtiebuild,iteration,mapopt,trim3,log,gzipped):
+def mapping(sam,runName,readFile1,readFile2,bowtiebuild,iteration,mapopt,trim3,log,gzipped,num_threads):
if iteration==1:
if gzipped == "True":
sam = sam.rsplit(".",1)[0]
- cmd1 = "bowtie --threads 4 -3 %s --quiet -S %s --al %s.aligned %s -1 <(zcat %s) -2 <(zcat %s) | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam)
+ cmd1 = "bowtie --threads "+num_threads+" -3 %s --quiet -S %s --al %s.aligned %s -1 <(zcat %s) -2 <(zcat %s) | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam)
mappingOutput = execBowtie(cmd1)
cmd2 = "samtools index %s.bam" % sam
createBAMIndex(cmd2)
else:
sam = sam.rsplit(".",1)[0]
- cmd1 = "bowtie --threads 4 -3 %s --quiet -S %s --al %s.aligned %s -1 %s -2 %s | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam)
+ cmd1 = "bowtie --threads "+num_threads+" -3 %s --quiet -S %s --al %s.aligned %s -1 %s -2 %s | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam)
mappingOutput = execBowtie(cmd1)
cmd2 = "samtools index %s.bam" % sam
@@ -169,7 +169,7 @@
else:
sam = sam.rsplit(".",1)[0]
- cmd2 = "bowtie --threads 4 -3 %s --quiet -S %s %s -1 %s -2 %s | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,bowtiebuild,readFile1,readFile2,sam)
+ cmd2 = "bowtie --threads "+num_threads+" -3 %s --quiet -S %s %s -1 %s -2 %s | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,bowtiebuild,readFile1,readFile2,sam)
mappingOutput = execBowtie(cmd2)
cmd3 = "samtools index %s.bam" % sam
@@ -766,6 +766,7 @@
dest="output2",
help="Output file 2")
parser.add_option("-g","--log",action="store",dest="logfile",help="Output Log File")
+ parser.add_option("-t","--threads",type="int",dest="num_threads",help="Define amount of threads to use for Bowtie in this script")
(options, args) = parser.parse_args()
@@ -796,6 +797,6 @@
else:
mismatch=3
trim3=str(options.trim3)
- main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped)
+ main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped,options.num_threads)
sys.stderr = log_stderr
log_file.close()
diff -r 913ea6991ee4 -r 155b796033b6 seq2HLA.xml
--- a/seq2HLA.xml Thu Dec 20 10:37:01 2012 -0500
+++ b/seq2HLA.xml Fri Dec 21 03:46:15 2012 -0500
@@ -2,6 +2,7 @@
HLA typing from RNA-Seq sequence read
seq2HLA.py
+ -t $threads
-z $compressed
-1 $readFile1
-2 $readFile2
@@ -13,12 +14,13 @@
-g $logfile
-
+
+
@@ -33,6 +35,7 @@
+
@@ -51,6 +54,11 @@
.. class:: infomark
+**Dependencies**
+
+- bowtie
+- R
+
**Input**
Input files in FASTQ format
@@ -73,7 +81,7 @@
-----
-.. image:: ./static/images/tron_logo.png
+.. image:: http://tron-mainz.de/wp-content/themes/tron/images/page_logo.png