Mercurial > repos > sebastian-boegel > seq2hla
changeset 1:155b796033b6 draft default tip
Uploaded
author | sebastian-boegel |
---|---|
date | Fri, 21 Dec 2012 03:46:15 -0500 |
parents | 913ea6991ee4 |
children | |
files | README seq2HLA.py seq2HLA.xml |
diffstat | 3 files changed, 87 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Fri Dec 21 03:46:15 2012 -0500 @@ -0,0 +1,62 @@ +============================================================================= +We developed an in-silico method "seq2HLA", written in python and R, which +takes standard paired end RNA-Seq sequence reads in fastq format +as input, uses a bowtie index comprising all HLA alleles and outputs the most +likely HLA class I and class II genotypes, a p-value for each call, and the +expression of each class. +============================================================================= +Author: +Sebastian Boegel +============================================================================= + +Tool dependencies: + +- bowtie +- R + +============================================================================= +To implement this script into Galaxy you have to copy the scripts folder in +your "$GALAXYHOME/tools/" folder and add the path of seq2HLA.xml +to your "$GALAXYHOME/tool_conf.xml" + +Example: +# <tool file="rna_seq/mapping_counter.xml" /> +# <tool file="rna_seq/count_genes_exons_junctions.xml" /> +# <tool file="rna_seq_auto/get_RNA-seq_data.xml" /> +# </section> + <section name="HLA Typing" id="hla_type"> + <tool file="seq2HLA/seq2HLA.xml"/> + </section> +# <section name="RSeQC" id="rseqc"> +# <tool file="RSeQC/bam2wig.xml" /> +# <tool file="RSeQC/bam_stat.xml" /> +# <tool file="RSeQC/clipping_profile.xml" /> +# <tool file="RSeQC/geneBody_coverage.xml" /> +# <tool file="RSeQC/infer_experiment.xml" /> +... +============================================================================== +The script uses in its subprocesses 4 cores, so you will also have to add a +setting to your "$GALAXYHOME/universe_wsgi.ini" to configure the queue for that +script + +... +[galaxy:tool_runners] + +#biomart = local:/// +#encode_db1 = local:/// +#hbvar = local:/// +#microbial_import1 = local:/// +ucsc_table_direct1 = local:/// +#ucsc_table_direct_archaea1 = local:/// +#ucsc_table_direct_test1 = local:/// +upload1 = local:/// +bowtie_wrapper = pbs:////-l nodes=1:ppn=4 +bowtie2_wrapper = pbs:////-l nodes=1:ppn=4 +tophat = pbs:////-l nodes=1:ppn=4 +tophat_color = pbs:////-l nodes=1:ppn=4 +cuffdiff = pbs:////-l nodes=1:ppn=4 +cufflinks = pbs:////-l nodes=1:ppn=4 +cuffmerge = pbs:////-l nodes=1:ppn=4 +seqhla = pbs:////-l nodes=1:ppn=6 (this line should be added) +... +=============================================================================
--- a/seq2HLA.py Thu Dec 20 10:37:01 2012 -0500 +++ b/seq2HLA.py Fri Dec 21 03:46:15 2012 -0500 @@ -54,23 +54,23 @@ readspergroup={} allelesPerLocus={} -def main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped): +def main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped,num_threads): log = open(logfile,"w") # mapopt="-a -v"+str(mismatch) mapopt="-a -v"+str(mismatch) #call HLA typing for Class I - mainClassI(runName+"-ClassI",readFile1,readFile2,bowtiebuildClassI,fastaClassI,mapopt,trim3,output1,log,gzipped) + mainClassI(runName+"-ClassI",readFile1,readFile2,bowtiebuildClassI,fastaClassI,mapopt,trim3,output1,log,gzipped,num_threads) #call HLA typing for Class II - mainClassII(runName+"-ClassII",readFile1,readFile2,bowtiebuildClassII,fastaClassII,mapopt,trim3,output2,log,gzipped) + mainClassII(runName+"-ClassII",readFile1,readFile2,bowtiebuildClassII,fastaClassII,mapopt,trim3,output2,log,gzipped,num_threads) #---------------Class I------------------------------- -def mainClassI(runName,readFile1,readFile2,bowtiebuild,hla1fasta,mapopt,trim3,finaloutput,log,gzipped): +def mainClassI(runName,readFile1,readFile2,bowtiebuild,hla1fasta,mapopt,trim3,finaloutput,log,gzipped,num_threads): #-------1st iteration----------------------------------- log.write("----------HLA class I------------\n") sam1=runName+"-iteration1.bam" iteration=1 log.write("First iteration starts....\nMapping ......\n") - mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped) + mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped,num_threads) medians=[] medians.extend([0,0,0]) medianflag=False @@ -90,7 +90,7 @@ sam2=runName+"-iteration2.bam" newReadFile1=runName+"-2nditeration_1.fq" newReadFile2=runName+"-2nditeration_2.fq" - mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped) + mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped,num_threads) medianfile=runName+".digitalhaplotype1" for i in range(2,5): medians.append(linecache.getline(medianfile, i).split('\t', 3)[2]) @@ -107,13 +107,13 @@ expression("A","B","C",694,694,694,map,runName,readFile1,finaloutput,gzipped) #--------------Class II---------------------------------------- -def mainClassII(runName,readFile1,readFile2,bowtiebuild,hla2fasta,mapopt,trim3,finaloutput,log,gzipped): +def mainClassII(runName,readFile1,readFile2,bowtiebuild,hla2fasta,mapopt,trim3,finaloutput,log,gzipped,num_threads): #-------1st iteration---------------------------------------- log.write("----------HLA class II------------\n") sam1=runName+"-iteration1.bam" iteration=1 log.write("ClassII: first iteration starts....\nMapping ......\n") - mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped) + mapping(sam1,runName,readFile1,readFile2,bowtiebuild,1,mapopt,trim3,log,gzipped,num_threads) medians=[] medians.extend([0,0,0]) medianflag=False @@ -132,7 +132,7 @@ sam2=runName+"-iteration2.bam" newReadFile1=runName+"-2nditeration_1.fq" newReadFile2=runName+"-2nditeration_2.fq" - mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped) + mapping(sam2,runName,newReadFile1,newReadFile2,bowtiebuild,2,mapopt,trim3,log,gzipped,num_threads) medianfile=runName+".digitalhaplotype1" for i in range(2,5): medians.append(float(linecache.getline(medianfile, i).split('\t', 3)[2])/2.0) @@ -149,18 +149,18 @@ expression("DQA1","DQB1","DRB1",400,421,421,map,runName,readFile1,finaloutput,gzipped) #performs the bowtie mapping for the 2 iterations using the given parameters -def mapping(sam,runName,readFile1,readFile2,bowtiebuild,iteration,mapopt,trim3,log,gzipped): +def mapping(sam,runName,readFile1,readFile2,bowtiebuild,iteration,mapopt,trim3,log,gzipped,num_threads): if iteration==1: if gzipped == "True": sam = sam.rsplit(".",1)[0] - cmd1 = "bowtie --threads 4 -3 %s --quiet -S %s --al %s.aligned %s -1 <(zcat %s) -2 <(zcat %s) | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam) + cmd1 = "bowtie --threads "+num_threads+" -3 %s --quiet -S %s --al %s.aligned %s -1 <(zcat %s) -2 <(zcat %s) | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam) mappingOutput = execBowtie(cmd1) cmd2 = "samtools index %s.bam" % sam createBAMIndex(cmd2) else: sam = sam.rsplit(".",1)[0] - cmd1 = "bowtie --threads 4 -3 %s --quiet -S %s --al %s.aligned %s -1 %s -2 %s | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam) + cmd1 = "bowtie --threads "+num_threads+" -3 %s --quiet -S %s --al %s.aligned %s -1 %s -2 %s | awk -F '\t' '$3 != \"*\"{ print $0 }' | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,runName,bowtiebuild,readFile1,readFile2,sam) mappingOutput = execBowtie(cmd1) cmd2 = "samtools index %s.bam" % sam @@ -169,7 +169,7 @@ else: sam = sam.rsplit(".",1)[0] - cmd2 = "bowtie --threads 4 -3 %s --quiet -S %s %s -1 %s -2 %s | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,bowtiebuild,readFile1,readFile2,sam) + cmd2 = "bowtie --threads "+num_threads+" -3 %s --quiet -S %s %s -1 %s -2 %s | samtools view -bS - | samtools sort - %s" % (trim3,mapopt,bowtiebuild,readFile1,readFile2,sam) mappingOutput = execBowtie(cmd2) cmd3 = "samtools index %s.bam" % sam @@ -766,6 +766,7 @@ dest="output2", help="Output file 2") parser.add_option("-g","--log",action="store",dest="logfile",help="Output Log File") + parser.add_option("-t","--threads",type="int",dest="num_threads",help="Define amount of threads to use for Bowtie in this script") (options, args) = parser.parse_args() @@ -796,6 +797,6 @@ else: mismatch=3 trim3=str(options.trim3) - main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped) + main(runName,readFile1,readFile2,fastaClassI,fastaClassII,bowtiebuildClassI,bowtiebuildClassII,mismatch,trim3,output1,output2,logfile,gzipped,options.num_threads) sys.stderr = log_stderr log_file.close()
--- a/seq2HLA.xml Thu Dec 20 10:37:01 2012 -0500 +++ b/seq2HLA.xml Fri Dec 21 03:46:15 2012 -0500 @@ -2,6 +2,7 @@ <description>HLA typing from RNA-Seq sequence read</description> <command interpreter="python"> seq2HLA.py + -t $threads -z $compressed -1 $readFile1 -2 $readFile2 @@ -13,12 +14,13 @@ -g $logfile </command> <inputs> - <param name="compressed" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Select if your input files are compressed gzipped fastq files" help="Leave default if you didn't upload the files or don't know"/> + <param name="compressed" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Select if your input files are compressed gzipped fastq files" help="Leave default (not checked) if you are unsure"/> <param format="fastq,fastqsanger" name="readFile1" type="data" label="Forward FASTQ File" help="FASTQ File with forward reads" /> <param format="fastq,fastqsanger" name="readFile2" type="data" label="Reverse FASTQ File" help="FASTQ File with reverse reads" /> <param name="runName" type="text" value="Run1" label="Run Name" help="Name of the Run" /> <param name="readLength" type="integer" value="50" label="Read Length" help="Length of the input reads" /> <param name="trim" type="integer" value="0" label="Trim x bases from the low quality 3' end" help="Trim x bases from the low quality 3' end. Default: 0" /> + <param name="threads" type="integer" value="4" label="Number of threads to use for Bowtie" help="Choose a plausible amount of threads to use for Bowtie in this tool" /> </inputs> <outputs> <data format="txt" name="out1" label="${tool.name} on ${on_string}: Output 1" /> @@ -33,6 +35,7 @@ <param name="runName" value="Run1"/> <param name="readLength" value="37"/> <param name="trim" value="0"/> + <param name="threads" value="4"/> <output name="out1" file="output1.txt"/> <output name="out2" file="output2.txt"/> <output name="logfile" file="log.txt"/> @@ -51,6 +54,11 @@ .. class:: infomark +**Dependencies** + +- bowtie +- R + **Input** Input files in FASTQ format @@ -73,7 +81,7 @@ ----- -.. image:: ./static/images/tron_logo.png +.. image:: http://tron-mainz.de/wp-content/themes/tron/images/page_logo.png </help> </tool>