Mercurial > repos > geert-vandeweyer > vcf_to_variantdb
annotate VCF_to_VariantDB.pl @ 8:db44ff975de8 draft
Corrected macro.xml handling of GALAXY_URL
author | geert-vandeweyer |
---|---|
date | Tue, 10 Jun 2014 06:01:18 -0400 |
parents | 04e3bba317f4 |
children | 3b27cae9b359 |
rev | line source |
---|---|
1 | 1 #!/usr/bin/perl |
2 | |
3 # load modules | |
4 use Getopt::Std; | |
5 | |
6 ########################## | |
7 # COMMAND LINE ARGUMENTS # | |
8 ########################## | |
9 # v = (v)cf file to load | |
10 # V = (V)CF file encoded id | |
11 # u = (u)ser email from galaxy | |
12 # n = sample (n)ame | |
13 # a = sample (a)nnotation | |
14 # g = sample (g)ender | |
15 # o = (o)utput file (simple text file) | |
16 # b = (b)am file (optional) | |
17 # B = (B)am index , needed if b is specified | |
18 # c = encoded id of bam file (optional) | |
19 # C = encoded id of Bam index , needed if b is specified => NOT POSSIBLE YET, NEEDS INDEXING ON VARIANTDB SERVER ! | |
20 # S = (S)erver addrress to send data to. | |
21 # R = (r)oot of galaxy web server (/home/galaxyuser/galaxy-dist) | |
22 # H = (H)ost of the galaxy web server (http://my.galaxy.server/galaxy/) | |
7
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
23 # F = (F)ormat of input file |
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
24 getopts('v:u:n:a:g:o:b:B:V:c:S:R:H:F:', \%opts); # option are in %opts |
1 | 25 |
2
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
26 $|++; |
1 | 27 ################# |
28 ## CHECK INPUT ## | |
29 ################# | |
30 if (!exists($opts{'v'})) { | |
31 die('No VCF File Specified'); | |
32 } | |
33 if (!-e $opts{'v'}) { | |
34 die('VCF File not found'); | |
35 } | |
36 if (!exists($opts{'u'})) { | |
37 die('No user specified'); | |
38 } | |
39 if (!exists($opts{'S'})) { | |
40 die('No VariantDB server specified'); | |
41 } | |
42 if (!exists($opts{'H'})) { | |
43 die('The Galaxy source-server is not specified'); | |
44 } | |
7
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
45 if (!exists($opts{'F'})) { |
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
46 die('VCF format not specified.'); |
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
47 } |
1 | 48 |
49 ################ | |
50 # open outfile # | |
51 ################ | |
52 open OUT, ">$opts{'o'}"; | |
53 | |
54 ############################### | |
55 ## TEST CONNECTION TO SERVER ## | |
56 ############################### | |
57 use LWP::UserAgent; | |
58 my $url = $opts{'S'}."/"; | |
59 $url =~ s/\/\/$/\//; | |
60 $url .= "cgi-bin/galaxy_communication.cgi"; | |
61 my $conn = LWP::UserAgent->new(); | |
2
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
62 $conn->timeout(1800); |
1 | 63 my $response = $conn->post( $url, {'HelloWorld' => 1} ); |
64 my $content = $response->decoded_content(); | |
65 | |
66 if ($content eq 'HelloGalaxy') { | |
67 print OUT "Testing connection to $opts{'S'} : OK.\n"; | |
68 } | |
69 else { | |
70 die("Could not connect to the specified server : $content"); | |
71 } | |
72 | |
73 | |
74 ################## | |
75 ## TEST USER ID ## | |
76 ################## | |
77 $email = $opts{'u'}; | |
78 my $response = $conn->post( $url, {'CheckUser' => $email} ); | |
79 my $content = $response->decoded_content(); | |
80 | |
81 if ($content eq 'OK') { | |
82 print OUT "Testing User-existence : OK.\n"; | |
83 } | |
84 else { | |
85 die("ERROR: $content"); | |
86 } | |
87 | |
5 | 88 print $opts{'H'}. " was specified as galaxy host\n"; |
89 | |
1 | 90 ############################################### |
91 ## SEND THE VCF AND BAM FILES FOR PROCESSING ## | |
92 ############################################### | |
93 # filepaths | |
94 my $vcfpath = $opts{'v'}; | |
95 my $bampath = $opts{'b'}; | |
96 my $baipath = $opts{'B'}; | |
7
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
97 # input VCF format |
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
98 $format = $opts{'F'}; |
1 | 99 # make output directory in (galaxy/static/) working dir |
100 my $rand = int(rand(1000)); | |
101 our $wd = $opts{'R'}."/static/VCF_parser.".$rand; #int(rand(1000)); | |
102 our $dd = $opts{'H'}."/static/VCF_parser.".$rand; | |
103 while (-d $wd) { | |
104 my $rand = int(rand(1000)); | |
105 $wd = $opts{'R'}."/static/VCF_parser.".$rand;#int(rand(1000)); | |
106 $dd = $opts{'H'}."/static/VCF_parser.".$rand; | |
107 | |
108 } | |
109 $result = system("mkdir $wd"); | |
110 | |
111 | |
112 ## link files | |
113 $vcfurl = "$dd/data.vcf"; | |
8 | 114 $vcfurl =~ s/\s//g; |
1 | 115 system ("ln -s $vcfpath $wd/data.vcf"); |
116 if (exists($opts{'b'})) { | |
117 $bamurl = "$dd/data.bam"; | |
8 | 118 $bamurl =~ s/\s//g; |
1 | 119 $bamidxurl = "$dd/data.bai"; |
8 | 120 $bamidxurl =~ s/\s//g; |
1 | 121 system ("ln -s $bampath $wd/data.bam"); |
122 system ("ln -s $baipath $wd/data.bai"); | |
123 } | |
124 $sample = $opts{'n'}; | |
125 $gender = $opts{'g'}; | |
126 # post form to the variantDB host. | |
127 if (exists($opts{'b'})) { | |
7
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
128 $response = $conn->post( $url, {'VCFurl1' => "$vcfurl", 'BAMurl1' => "$bamurl", 'BAIurl1' => "$bamidxurl",'storedata1' => 1, 'name1' => "$sample", 'gender1' => "$gender", 'User' => $email, 'GalaxyUpload' => 1,'Format1' => $format} ); |
1 | 129 } |
130 else { | |
7
04e3bba317f4
Added selection to differentiate in VCF format
geert-vandeweyer
parents:
5
diff
changeset
|
131 $response = $conn->post( $url, {'VCFurl1' => "$vcfurl", 'name1' => "$sample", 'gender1' => "$gender", 'User' => $email, 'GalaxyUpload' =>1, 'Format1' => $format } ); |
1 | 132 } |
133 my $content = $response->decoded_content(); | |
134 chomp($content); | |
2
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
135 ## check if upload went ok. |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
136 if (substr($content,0,2) ne 'OK') { |
1 | 137 die("ERROR: $content"); |
138 } | |
2
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
139 ## extract wd from content. |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
140 print OUT "Uploading datafiles to VariantDB : OK.\n"; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
141 $content =~ m/OK-(.+)$/; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
142 $rwd = $1; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
143 if ($rwd eq '') { |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
144 die("ERROR : No remote working directory provided to check status."); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
145 } |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
146 ## now wait for the import to finish. |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
147 $status = 0; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
148 while ($status == 0) { |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
149 my $response = $conn->post( $url, {'CheckStatus' => 1,'rwd' => $rwd}) ; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
150 $content = $response->decoded_content(); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
151 chomp($content); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
152 if (substr($content,0,2) ne 'OK') { |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
153 die("ERROR: $content"); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
154 } |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
155 $status = substr($content,3,1); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
156 sleep 10; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
157 } |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
158 |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
159 ## Loading OK |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
160 # latest respons : OK-1-Content |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
161 $content = substr($content,5); |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
162 print OUT "Processing Datafiles : OK.\n"; |
d03a63a57e82
New version, better following configuration guidelines
geert-vandeweyer
parents:
1
diff
changeset
|
163 print OUT "\n$content\n"; |
1 | 164 close OUT; |
165 | |
166 # clean up | |
167 system("rm -Rf '$wd'"); | |
168 |