annotate Roary/lib/Bio/Roary/CommandLine/ParallelAllAgainstAllBlastp.pm @ 3:e95344f6dfc5 draft default tip

Uploaded
author dereeper
date Fri, 12 Nov 2021 16:32:26 +0000
parents c47a5f61bc9f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
1 undef $VERSION;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
2 package Bio::Roary::CommandLine::ParallelAllAgainstAllBlastp;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
3
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
4 # ABSTRACT: Take in a FASTA file of proteins and blast against itself
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
5
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
6 =head1 SYNOPSIS
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
7
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
8 Take in a FASTA file of proteins and blast against itself
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
9
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
10 =cut
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
11
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
12 use Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
13 use Getopt::Long qw(GetOptionsFromArray);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
14 use Bio::Roary::ParallelAllAgainstAllBlast;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
15 use Bio::Roary::CombinedProteome;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
16 use Bio::Roary::PrepareInputFiles;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
17 extends 'Bio::Roary::CommandLine::Common';
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
18
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
19 has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
20 has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
21 has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
22
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
23 has 'fasta_files' => ( is => 'rw', isa => 'ArrayRef' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
24 has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'blast_results' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
25 has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'Local' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
26 has 'cpus' => ( is => 'rw', isa => 'Int', default => 1 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
27 has 'makeblastdb_exec' => ( is => 'rw', isa => 'Str', default => 'makeblastdb' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
28 has 'blastp_exec' => ( is => 'rw', isa => 'Str', default => 'blastp' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
29 has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
30
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
31 has '_error_message' => ( is => 'rw', isa => 'Str' );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
32
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
33 sub BUILD {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
34 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
35
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
36 my ( $fasta_files, $output_filename, $job_runner, $makeblastdb_exec, $blastp_exec, $help, $cpus, $verbose, );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
37
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
38 GetOptionsFromArray(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
39 $self->args,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
40 'o|output=s' => \$output_filename,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
41 'j|job_runner=s' => \$job_runner,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
42 'm|makeblastdb_exec=s' => \$makeblastdb_exec,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
43 'b|blastp_exec=s' => \$blastp_exec,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
44 'p|processors=i' => \$cpus,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
45 'v|verbose' => \$verbose,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
46 'h|help' => \$help,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
47 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
48
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
49 if ( @{ $self->args } == 0 ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
50 $self->_error_message("Error: You need to provide a FASTA file");
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
51 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
52
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
53 if ( defined($verbose) ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
54 $self->verbose($verbose);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
55 $self->logger->level(10000);
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
56 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
57 $self->help($help) if(defined($help));
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
58 $self->output_filename($output_filename) if ( defined($output_filename) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
59 $self->makeblastdb_exec($makeblastdb_exec) if ( defined($makeblastdb_exec) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
60 $self->blastp_exec($blastp_exec) if ( defined($blastp_exec) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
61 $self->job_runner($job_runner) if ( defined($job_runner) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
62 $self->cpus($cpus) if ( defined($cpus) );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
63 if ( $self->cpus > 1 ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
64 $self->job_runner('Parallel');
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
65 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
66
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
67 for my $filename ( @{ $self->args } ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
68 if ( !-e $filename ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
69 $self->_error_message("Error: Cant access file $filename");
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
70 last;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
71 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
72 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
73 $self->fasta_files( $self->args );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
74
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
75 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
76
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
77 sub run {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
78 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
79
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
80 ( !$self->help ) or die $self->usage_text;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
81 if ( defined( $self->_error_message ) ) {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
82 print $self->_error_message . "\n";
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
83 die $self->usage_text;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
84 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
85
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
86 my $prepare_input_files = Bio::Roary::PrepareInputFiles->new(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
87 input_files => $self->fasta_files,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
88 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
89
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
90 my $output_combined_filename;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
91 if(@{$self->fasta_files} > 1)
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
92 {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
93 $output_combined_filename = 'combined_files.fa';
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
94 $self->logger->info("Combining protein files");
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
95 my $combine_fasta_files = Bio::Roary::CombinedProteome->new(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
96 proteome_files => $prepare_input_files->fasta_files,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
97 output_filename => $output_combined_filename,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
98 maximum_percentage_of_unknowns => 5.0,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
99 apply_unknowns_filter => 0
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
100 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
101 $combine_fasta_files->create_combined_proteome_file;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
102 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
103 else
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
104 {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
105 $output_combined_filename = $self->fasta_files->[0];
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
106 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
107
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
108 $self->logger->info("Beginning all against all blast");
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
109 my $blast_obj = Bio::Roary::ParallelAllAgainstAllBlast->new(
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
110 fasta_file => $output_combined_filename,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
111 blast_results_file_name => $self->output_filename,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
112 job_runner => $self->job_runner,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
113 cpus => $self->cpus,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
114 makeblastdb_exec => $self->makeblastdb_exec,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
115 blastp_exec => $self->blastp_exec,
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
116 logger => $self->logger
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
117 );
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
118 $blast_obj->run();
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
119 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
120
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
121 sub usage_text {
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
122 my ($self) = @_;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
123
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
124 return <<USAGE;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
125 Usage: parallel_all_against_all_blastp [options] file.faa
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
126 Take in a FASTA file of proteins and blast against itself
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
127
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
128 Options: -p INT number of threads [1]
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
129 -o STR output filename for blast results [blast_results]
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
130 -m STR makeblastdb executable [makeblastdb]
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
131 -b STR blastp executable [blastp]
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
132 -v verbose output to STDOUT
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
133 -h this help message
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
134
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
135 For further info see: http://sanger-pathogens.github.io/Roary/
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
136 USAGE
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
137 }
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
138
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
139 __PACKAGE__->meta->make_immutable;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
140 no Moose;
c47a5f61bc9f Uploaded
dereeper
parents:
diff changeset
141 1;