Mercurial > repos > bgruening > bismark
diff bismark_genome_preparation @ 3:91f07ff056ca draft
Uploaded
author | bgruening |
---|---|
date | Mon, 14 Apr 2014 16:43:14 -0400 |
parents | 62c6da72dd4a |
children |
line wrap: on
line diff
--- a/bismark_genome_preparation Wed Aug 21 05:19:54 2013 -0400 +++ b/bismark_genome_preparation Mon Apr 14 16:43:14 2014 -0400 @@ -33,7 +33,7 @@ my $single_fasta; my $bowtie2; -my $bismark_version = 'v0.7.12'; +my $bismark_version = 'v0.10.0'; GetOptions ('verbose' => \$verbose, 'help' => \$help, @@ -44,10 +44,6 @@ 'bowtie2' => \$bowtie2, ); -my $genome_folder = shift @ARGV; # mandatory -my $CT_dir; -my $GA_dir; - if ($help or $man){ print_helpfile(); exit; @@ -66,6 +62,31 @@ exit; } +my $genome_folder = shift @ARGV; # mandatory + +# Ensuring a genome folder has been specified +if ($genome_folder){ + unless ($genome_folder =~ /\/$/){ + $genome_folder =~ s/$/\//; + } + $verbose and print "Path to genome folder specified as: $genome_folder\n"; + chdir $genome_folder or die "Could't move to directory $genome_folder. Make sure the directory exists! $!"; + + # making the genome folder path abolsolute so it won't break if the path was specified relative + $genome_folder = getcwd; + unless ($genome_folder =~ /\/$/){ + $genome_folder =~ s/$/\//; + } +} +else{ + die "Please specify a genome folder to be used for bisulfite conversion\n\n"; +} + + +my $CT_dir; +my $GA_dir; + + if ($single_fasta){ print "Writing individual genomes out into single-entry fasta files (one per chromosome)\n\n"; $multi_fasta = 0; @@ -309,41 +330,6 @@ $verbose and print "Bismark Genome Preparation - Step I: Preparing folders\n\n"; - # Ensuring a genome folder has been specified - if ($genome_folder){ - unless ($genome_folder =~ /\/$/){ - $genome_folder =~ s/$/\//; - } - $verbose and print "Path to genome folder specified: $genome_folder\n"; - chdir $genome_folder or die "Could't move to directory $genome_folder. Make sure the directory exists! $!"; - - # making the genome folder path abolsolute so it won't break if the path was specified relative - $genome_folder = getcwd; - unless ($genome_folder =~ /\/$/){ - $genome_folder =~ s/$/\//; - } - } - - else{ - $verbose and print "Genome folder was not provided as argument "; - while (1){ - print "Please specify a genome folder to be bisulfite converted:\n"; - $genome_folder = <STDIN>; - chomp $genome_folder; - - # adding a trailing slash unless already present - unless ($genome_folder =~ /\/$/){ - $genome_folder =~ s/$/\//; - } - if (chdir $genome_folder){ - last; - } - else{ - warn "Could't move to directory $genome_folder! $!"; - } - } - } - if ($path_to_bowtie){ unless ($path_to_bowtie =~ /\/$/){ $path_to_bowtie =~ s/$/\//; @@ -376,7 +362,7 @@ die "The specified genome folder $genome_folder does not contain any sequence files in FastA format (with .fa or .fasta file extensions\n"; } - warn "Bisulfite Genome Indexer version $bismark_version (last modified 17 Nov 2011)\n\n"; + warn "Bisulfite Genome Indexer version $bismark_version (last modified 19 Sept 2013)\n\n"; sleep (3); # creating a directory inside the genome folder to store the bisfulfite genomes unless it already exists @@ -386,27 +372,10 @@ $verbose and print "Created Bisulfite Genome folder $bisulfite_dir\n"; } else{ - while (1){ - print "\nA directory called $bisulfite_dir already exists. Bisulfite converted sequences and/or already existing Bowtie (1 or 2) indexes might be overwritten!\nDo you want to continue anyway?\t"; - my $proceed = <STDIN>; - chomp $proceed; - if ($proceed =~ /^y/i ){ - last; - } - elsif ($proceed =~ /^n/i){ - die "Terminated by user\n\n"; - } - } + print "\nA directory called $bisulfite_dir already exists. Bisulfite converted sequences and/or already existing Bowtie (1 or 2) indices will be overwritten!\n\n"; + sleep(5); } - ### as of version 0.6.0 the Bismark indexer will no longer delete the Bisulfite_Genome directory if it was present already, since it could store the Bowtie 1 or 2 indexes already - # removing any existing files and subfolders in the bisulfite directory (the specified directory won't be deleted) - # rmtree($bisulfite_dir, {verbose => 1,keep_root => 1}); - # unless (-d $bisulfite_dir){ # had to add this after changing remove_tree to rmtree // suggested by Samantha Cooper @ Illumina - # mkdir $bisulfite_dir or die "Unable to create directory $bisulfite_dir $!\n"; - # } - # } - chdir $bisulfite_dir or die "Unable to move to $bisulfite_dir\n"; $CT_dir = "${bisulfite_dir}CT_conversion/"; $GA_dir = "${bisulfite_dir}GA_conversion/"; @@ -440,15 +409,14 @@ bisulfite genome will have all Cs converted to Ts (C->T), and the other one will have all Gs converted to As (G->A). Both bisulfite genomes will be stored in subfolders within the reference genome folder. Once the bisulfite conversion has been completed the program will fork and launch -two simultaneous instances of the bowtie 1 or 2 indexer (bowtie-build or bowtie2-build). Be aware +two simultaneous instances of the Bowtie 1 or 2 indexer (bowtie-build or bowtie2-build). Be aware that the indexing process can take up to several hours; this will mainly depend on genome size and system resources. - The following is a brief description of command line options and arguments to control the -Bismark Genome Preparation script: +Bismark Genome Preparation: USAGE: bismark_genome_preparation [options] <arguments> @@ -462,8 +430,9 @@ --verbose Print verbose output for more details or debugging. ---path_to_bowtie The full path to the Bowtie 1 or Bowtie 2 installation on your system.If - the path </../../> is not provided as an option you will be prompted for it. +--path_to_bowtie </../> The full path to the Bowtie 1 or Bowtie 2 installation on your system + (depending on which aligner/indexer you intend to use). Unless this path + is specified it is assumed that Bowtie is in the PATH. --bowtie2 This will create bisulfite indexes for Bowtie 2. (Default: Bowtie 1). @@ -481,12 +450,10 @@ ARGUMENTS: <path_to_genome_folder> The path to the folder containing the genome to be bisulfite converted. - At the current time Bismark Genome Preparation expects one or more fastA - files in the folder (with the file extension: .fa or .fasta). If the path - is not provided as an argument you will be prompted for it. + The Bismark Genome Preparation expects one or more fastA files in the folder + (with the file extension: .fa or .fasta). Specifying this path is mandatory. - -This script was last modified on 18 Nov 2011. +This script was last modified on 19 Sept 2013. HOW_TO }