Mercurial > repos > geert-vandeweyer > vcf_to_variantdb
view VCF_to_VariantDB.pl @ 9:3b27cae9b359 draft
New Version. VariantDB checks for Genome Build matching using the Galaxy dbkey variable.
author | geert-vandeweyer |
---|---|
date | Mon, 14 Jul 2014 03:24:28 -0400 |
parents | db44ff975de8 |
children |
line wrap: on
line source
#!/usr/bin/perl # load modules use Getopt::Std; ########################## # COMMAND LINE ARGUMENTS # ########################## # v = (v)cf file to load # V = (V)CF file encoded id # u = (u)ser email from galaxy # n = sample (n)ame # a = sample (a)nnotation # g = sample (g)ender # o = (o)utput file (simple text file) # b = (b)am file (optional) # B = (B)am index , needed if b is specified # c = encoded id of bam file (optional) # C = encoded id of Bam index , needed if b is specified => NOT POSSIBLE YET, NEEDS INDEXING ON VARIANTDB SERVER ! # S = (S)erver addrress to send data to. # R = (r)oot of galaxy web server (/home/galaxyuser/galaxy-dist) # H = (H)ost of the galaxy web server (http://my.galaxy.server/galaxy/) # G = Genome build (dbkey) # F = (F)ormat of input file getopts('v:u:n:a:g:o:b:B:V:c:S:R:H:G:F:', \%opts); # option are in %opts $|++; ################# ## CHECK INPUT ## ################# if (!exists($opts{'v'})) { die('No VCF File Specified'); } if (!-e $opts{'v'}) { die('VCF File not found'); } if (!exists($opts{'u'})) { die('No user specified'); } if (!exists($opts{'S'})) { die('No VariantDB server specified'); } if (!exists($opts{'H'})) { die('The Galaxy source-server is not specified'); } if (!exists($opts{'F'})) { die('VCF format not specified.'); } ################ # open outfile # ################ open OUT, ">$opts{'o'}"; ############################### ## TEST CONNECTION TO SERVER ## ############################### use LWP::UserAgent; my $url = $opts{'S'}."/"; $url =~ s/\/\/$/\//; $url .= "cgi-bin/galaxy_communication.cgi"; my $conn = LWP::UserAgent->new(); $conn->timeout(1800); my $response = $conn->post( $url, {'HelloWorld' => 1} ); my $content = $response->decoded_content(); if ($content eq 'HelloGalaxy') { print OUT "Testing connection to $opts{'S'} : OK.\n"; } else { die("Could not connect to the specified server : $content"); } ################## ## TEST USER ID ## ################## $email = $opts{'u'}; $dbkey = $opts{'G'}; my $response = $conn->post( $url, {'CheckUser' => $email,'dbkey' => $dbkey} ); my $content = $response->decoded_content(); if ($content eq 'OK') { print OUT "Testing User-existence and Genome Build: OK.\n"; } else { die("ERROR: $content"); } print $opts{'H'}. " was specified as galaxy host\n"; ############################################### ## SEND THE VCF AND BAM FILES FOR PROCESSING ## ############################################### # filepaths my $vcfpath = $opts{'v'}; my $bampath = $opts{'b'}; my $baipath = $opts{'B'}; # input VCF format $format = $opts{'F'}; # make output directory in (galaxy/static/) working dir my $rand = int(rand(1000)); our $wd = $opts{'R'}."/static/VCF_parser.".$rand; #int(rand(1000)); our $dd = $opts{'H'}."/static/VCF_parser.".$rand; while (-d $wd) { my $rand = int(rand(1000)); $wd = $opts{'R'}."/static/VCF_parser.".$rand;#int(rand(1000)); $dd = $opts{'H'}."/static/VCF_parser.".$rand; } $result = system("mkdir $wd"); ## link files $vcfurl = "$dd/data.vcf"; $vcfurl =~ s/\s//g; system ("ln -s $vcfpath $wd/data.vcf"); if (exists($opts{'b'})) { $bamurl = "$dd/data.bam"; $bamurl =~ s/\s//g; $bamidxurl = "$dd/data.bai"; $bamidxurl =~ s/\s//g; system ("ln -s $bampath $wd/data.bam"); system ("ln -s $baipath $wd/data.bai"); } $sample = $opts{'n'}; $gender = $opts{'g'}; # post form to the variantDB host. if (exists($opts{'b'})) { $response = $conn->post( $url, {'VCFurl1' => "$vcfurl", 'BAMurl1' => "$bamurl", 'BAIurl1' => "$bamidxurl",'storedata1' => 1, 'name1' => "$sample", 'gender1' => "$gender", 'User' => $email, 'GalaxyUpload' => 1,'Format1' => $format} ); } else { $response = $conn->post( $url, {'VCFurl1' => "$vcfurl", 'name1' => "$sample", 'gender1' => "$gender", 'User' => $email, 'GalaxyUpload' =>1, 'Format1' => $format } ); } my $content = $response->decoded_content(); chomp($content); ## check if upload went ok. if (substr($content,0,2) ne 'OK') { die("ERROR: $content"); } ## extract wd from content. print OUT "Uploading datafiles to VariantDB : OK.\n"; $content =~ m/OK-(.+)$/; $rwd = $1; if ($rwd eq '') { die("ERROR : No remote working directory provided to check status."); } ## now wait for the import to finish. $status = 0; while ($status == 0) { my $response = $conn->post( $url, {'CheckStatus' => 1,'rwd' => $rwd}) ; $content = $response->decoded_content(); chomp($content); if (substr($content,0,2) ne 'OK') { die("ERROR: $content"); } $status = substr($content,3,1); sleep 10; } ## Loading OK # latest respons : OK-1-Content $content = substr($content,5); print OUT "Processing Datafiles : OK.\n"; print OUT "\n$content\n"; close OUT; # clean up system("rm -Rf '$wd'");