Mercurial > repos > bgruening > bismark
comparison bismark_genome_preparation @ 3:91f07ff056ca draft
Uploaded
author | bgruening |
---|---|
date | Mon, 14 Apr 2014 16:43:14 -0400 |
parents | 62c6da72dd4a |
children |
comparison
equal
deleted
inserted
replaced
2:82814a8a2395 | 3:91f07ff056ca |
---|---|
31 my $path_to_bowtie; | 31 my $path_to_bowtie; |
32 my $multi_fasta; | 32 my $multi_fasta; |
33 my $single_fasta; | 33 my $single_fasta; |
34 my $bowtie2; | 34 my $bowtie2; |
35 | 35 |
36 my $bismark_version = 'v0.7.12'; | 36 my $bismark_version = 'v0.10.0'; |
37 | 37 |
38 GetOptions ('verbose' => \$verbose, | 38 GetOptions ('verbose' => \$verbose, |
39 'help' => \$help, | 39 'help' => \$help, |
40 'man' => \$man, | 40 'man' => \$man, |
41 'version' => \$version, | 41 'version' => \$version, |
42 'path_to_bowtie:s' => \$path_to_bowtie, | 42 'path_to_bowtie:s' => \$path_to_bowtie, |
43 'single_fasta' => \$single_fasta, | 43 'single_fasta' => \$single_fasta, |
44 'bowtie2' => \$bowtie2, | 44 'bowtie2' => \$bowtie2, |
45 ); | 45 ); |
46 | 46 |
47 my $genome_folder = shift @ARGV; # mandatory | |
48 my $CT_dir; | |
49 my $GA_dir; | |
50 | |
51 if ($help or $man){ | 47 if ($help or $man){ |
52 print_helpfile(); | 48 print_helpfile(); |
53 exit; | 49 exit; |
54 } | 50 } |
55 | 51 |
63 www.bioinformatics.babraham.ac.uk/projects/ | 59 www.bioinformatics.babraham.ac.uk/projects/ |
64 | 60 |
65 VERSION | 61 VERSION |
66 exit; | 62 exit; |
67 } | 63 } |
64 | |
65 my $genome_folder = shift @ARGV; # mandatory | |
66 | |
67 # Ensuring a genome folder has been specified | |
68 if ($genome_folder){ | |
69 unless ($genome_folder =~ /\/$/){ | |
70 $genome_folder =~ s/$/\//; | |
71 } | |
72 $verbose and print "Path to genome folder specified as: $genome_folder\n"; | |
73 chdir $genome_folder or die "Could't move to directory $genome_folder. Make sure the directory exists! $!"; | |
74 | |
75 # making the genome folder path abolsolute so it won't break if the path was specified relative | |
76 $genome_folder = getcwd; | |
77 unless ($genome_folder =~ /\/$/){ | |
78 $genome_folder =~ s/$/\//; | |
79 } | |
80 } | |
81 else{ | |
82 die "Please specify a genome folder to be used for bisulfite conversion\n\n"; | |
83 } | |
84 | |
85 | |
86 my $CT_dir; | |
87 my $GA_dir; | |
88 | |
68 | 89 |
69 if ($single_fasta){ | 90 if ($single_fasta){ |
70 print "Writing individual genomes out into single-entry fasta files (one per chromosome)\n\n"; | 91 print "Writing individual genomes out into single-entry fasta files (one per chromosome)\n\n"; |
71 $multi_fasta = 0; | 92 $multi_fasta = 0; |
72 } | 93 } |
307 | 328 |
308 sub create_bisulfite_genome_folders{ | 329 sub create_bisulfite_genome_folders{ |
309 | 330 |
310 $verbose and print "Bismark Genome Preparation - Step I: Preparing folders\n\n"; | 331 $verbose and print "Bismark Genome Preparation - Step I: Preparing folders\n\n"; |
311 | 332 |
312 # Ensuring a genome folder has been specified | |
313 if ($genome_folder){ | |
314 unless ($genome_folder =~ /\/$/){ | |
315 $genome_folder =~ s/$/\//; | |
316 } | |
317 $verbose and print "Path to genome folder specified: $genome_folder\n"; | |
318 chdir $genome_folder or die "Could't move to directory $genome_folder. Make sure the directory exists! $!"; | |
319 | |
320 # making the genome folder path abolsolute so it won't break if the path was specified relative | |
321 $genome_folder = getcwd; | |
322 unless ($genome_folder =~ /\/$/){ | |
323 $genome_folder =~ s/$/\//; | |
324 } | |
325 } | |
326 | |
327 else{ | |
328 $verbose and print "Genome folder was not provided as argument "; | |
329 while (1){ | |
330 print "Please specify a genome folder to be bisulfite converted:\n"; | |
331 $genome_folder = <STDIN>; | |
332 chomp $genome_folder; | |
333 | |
334 # adding a trailing slash unless already present | |
335 unless ($genome_folder =~ /\/$/){ | |
336 $genome_folder =~ s/$/\//; | |
337 } | |
338 if (chdir $genome_folder){ | |
339 last; | |
340 } | |
341 else{ | |
342 warn "Could't move to directory $genome_folder! $!"; | |
343 } | |
344 } | |
345 } | |
346 | |
347 if ($path_to_bowtie){ | 333 if ($path_to_bowtie){ |
348 unless ($path_to_bowtie =~ /\/$/){ | 334 unless ($path_to_bowtie =~ /\/$/){ |
349 $path_to_bowtie =~ s/$/\//; | 335 $path_to_bowtie =~ s/$/\//; |
350 } | 336 } |
351 if (chdir $path_to_bowtie){ | 337 if (chdir $path_to_bowtie){ |
374 | 360 |
375 unless (@filenames){ | 361 unless (@filenames){ |
376 die "The specified genome folder $genome_folder does not contain any sequence files in FastA format (with .fa or .fasta file extensions\n"; | 362 die "The specified genome folder $genome_folder does not contain any sequence files in FastA format (with .fa or .fasta file extensions\n"; |
377 } | 363 } |
378 | 364 |
379 warn "Bisulfite Genome Indexer version $bismark_version (last modified 17 Nov 2011)\n\n"; | 365 warn "Bisulfite Genome Indexer version $bismark_version (last modified 19 Sept 2013)\n\n"; |
380 sleep (3); | 366 sleep (3); |
381 | 367 |
382 # creating a directory inside the genome folder to store the bisfulfite genomes unless it already exists | 368 # creating a directory inside the genome folder to store the bisfulfite genomes unless it already exists |
383 my $bisulfite_dir = "${genome_folder}Bisulfite_Genome/"; | 369 my $bisulfite_dir = "${genome_folder}Bisulfite_Genome/"; |
384 unless (-d $bisulfite_dir){ | 370 unless (-d $bisulfite_dir){ |
385 mkdir $bisulfite_dir or die "Unable to create directory $bisulfite_dir $!\n"; | 371 mkdir $bisulfite_dir or die "Unable to create directory $bisulfite_dir $!\n"; |
386 $verbose and print "Created Bisulfite Genome folder $bisulfite_dir\n"; | 372 $verbose and print "Created Bisulfite Genome folder $bisulfite_dir\n"; |
387 } | 373 } |
388 else{ | 374 else{ |
389 while (1){ | 375 print "\nA directory called $bisulfite_dir already exists. Bisulfite converted sequences and/or already existing Bowtie (1 or 2) indices will be overwritten!\n\n"; |
390 print "\nA directory called $bisulfite_dir already exists. Bisulfite converted sequences and/or already existing Bowtie (1 or 2) indexes might be overwritten!\nDo you want to continue anyway?\t"; | 376 sleep(5); |
391 my $proceed = <STDIN>; | 377 } |
392 chomp $proceed; | |
393 if ($proceed =~ /^y/i ){ | |
394 last; | |
395 } | |
396 elsif ($proceed =~ /^n/i){ | |
397 die "Terminated by user\n\n"; | |
398 } | |
399 } | |
400 } | |
401 | |
402 ### as of version 0.6.0 the Bismark indexer will no longer delete the Bisulfite_Genome directory if it was present already, since it could store the Bowtie 1 or 2 indexes already | |
403 # removing any existing files and subfolders in the bisulfite directory (the specified directory won't be deleted) | |
404 # rmtree($bisulfite_dir, {verbose => 1,keep_root => 1}); | |
405 # unless (-d $bisulfite_dir){ # had to add this after changing remove_tree to rmtree // suggested by Samantha Cooper @ Illumina | |
406 # mkdir $bisulfite_dir or die "Unable to create directory $bisulfite_dir $!\n"; | |
407 # } | |
408 # } | |
409 | 378 |
410 chdir $bisulfite_dir or die "Unable to move to $bisulfite_dir\n"; | 379 chdir $bisulfite_dir or die "Unable to move to $bisulfite_dir\n"; |
411 $CT_dir = "${bisulfite_dir}CT_conversion/"; | 380 $CT_dir = "${bisulfite_dir}CT_conversion/"; |
412 $GA_dir = "${bisulfite_dir}GA_conversion/"; | 381 $GA_dir = "${bisulfite_dir}GA_conversion/"; |
413 | 382 |
438 This script is supposed to convert a specified reference genome into two different bisulfite | 407 This script is supposed to convert a specified reference genome into two different bisulfite |
439 converted versions and index them for alignments with Bowtie 1 (default), or Bowtie 2. The first | 408 converted versions and index them for alignments with Bowtie 1 (default), or Bowtie 2. The first |
440 bisulfite genome will have all Cs converted to Ts (C->T), and the other one will have all Gs | 409 bisulfite genome will have all Cs converted to Ts (C->T), and the other one will have all Gs |
441 converted to As (G->A). Both bisulfite genomes will be stored in subfolders within the reference | 410 converted to As (G->A). Both bisulfite genomes will be stored in subfolders within the reference |
442 genome folder. Once the bisulfite conversion has been completed the program will fork and launch | 411 genome folder. Once the bisulfite conversion has been completed the program will fork and launch |
443 two simultaneous instances of the bowtie 1 or 2 indexer (bowtie-build or bowtie2-build). Be aware | 412 two simultaneous instances of the Bowtie 1 or 2 indexer (bowtie-build or bowtie2-build). Be aware |
444 that the indexing process can take up to several hours; this will mainly depend on genome size | 413 that the indexing process can take up to several hours; this will mainly depend on genome size |
445 and system resources. | 414 and system resources. |
446 | 415 |
447 | 416 |
448 | 417 |
449 | |
450 The following is a brief description of command line options and arguments to control the | 418 The following is a brief description of command line options and arguments to control the |
451 Bismark Genome Preparation script: | 419 Bismark Genome Preparation: |
452 | 420 |
453 | 421 |
454 USAGE: bismark_genome_preparation [options] <arguments> | 422 USAGE: bismark_genome_preparation [options] <arguments> |
455 | 423 |
456 | 424 |
460 | 428 |
461 --version Displays version information and exits. | 429 --version Displays version information and exits. |
462 | 430 |
463 --verbose Print verbose output for more details or debugging. | 431 --verbose Print verbose output for more details or debugging. |
464 | 432 |
465 --path_to_bowtie The full path to the Bowtie 1 or Bowtie 2 installation on your system.If | 433 --path_to_bowtie </../> The full path to the Bowtie 1 or Bowtie 2 installation on your system |
466 the path </../../> is not provided as an option you will be prompted for it. | 434 (depending on which aligner/indexer you intend to use). Unless this path |
435 is specified it is assumed that Bowtie is in the PATH. | |
467 | 436 |
468 --bowtie2 This will create bisulfite indexes for Bowtie 2. (Default: Bowtie 1). | 437 --bowtie2 This will create bisulfite indexes for Bowtie 2. (Default: Bowtie 1). |
469 | 438 |
470 --single_fasta Instruct the Bismark Indexer to write the converted genomes into | 439 --single_fasta Instruct the Bismark Indexer to write the converted genomes into |
471 single-entry FastA files instead of making one multi-FastA file (MFA) | 440 single-entry FastA files instead of making one multi-FastA file (MFA) |
479 | 448 |
480 | 449 |
481 ARGUMENTS: | 450 ARGUMENTS: |
482 | 451 |
483 <path_to_genome_folder> The path to the folder containing the genome to be bisulfite converted. | 452 <path_to_genome_folder> The path to the folder containing the genome to be bisulfite converted. |
484 At the current time Bismark Genome Preparation expects one or more fastA | 453 The Bismark Genome Preparation expects one or more fastA files in the folder |
485 files in the folder (with the file extension: .fa or .fasta). If the path | 454 (with the file extension: .fa or .fasta). Specifying this path is mandatory. |
486 is not provided as an argument you will be prompted for it. | 455 |
487 | 456 |
488 | 457 This script was last modified on 19 Sept 2013. |
489 | |
490 This script was last modified on 18 Nov 2011. | |
491 HOW_TO | 458 HOW_TO |
492 } | 459 } |