Mercurial > repos > dereeper > roary_plots
comparison Roary/lib/Bio/Roary.pm @ 0:c47a5f61bc9f draft
Uploaded
author | dereeper |
---|---|
date | Fri, 14 May 2021 20:27:06 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c47a5f61bc9f |
---|---|
1 package Bio::Roary; | |
2 | |
3 # ABSTRACT: Create a pan genome | |
4 | |
5 =head1 SYNOPSIS | |
6 | |
7 Create a pan genome | |
8 | |
9 =cut | |
10 | |
11 use Moose; | |
12 use File::Copy; | |
13 use Bio::Perl; | |
14 use Bio::Roary::ParallelAllAgainstAllBlast; | |
15 use Bio::Roary::CombinedProteome; | |
16 use Bio::Roary::External::Cdhit; | |
17 use Bio::Roary::External::Mcl; | |
18 use Bio::Roary::InflateClusters; | |
19 use Bio::Roary::AnalyseGroups; | |
20 use Bio::Roary::GroupLabels; | |
21 use Bio::Roary::AnnotateGroups; | |
22 use Bio::Roary::GroupStatistics; | |
23 use Bio::Roary::Output::GroupsMultifastasNucleotide; | |
24 use Bio::Roary::External::PostAnalysis; | |
25 use Bio::Roary::FilterFullClusters; | |
26 use Bio::Roary::External::IterativeCdhit; | |
27 use Bio::Roary::Output::BlastIdentityFrequency; | |
28 | |
29 has 'fasta_files' => ( is => 'rw', isa => 'ArrayRef', required => 1 ); | |
30 has 'input_files' => ( is => 'rw', isa => 'ArrayRef', required => 1 ); | |
31 has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' ); | |
32 has 'output_pan_geneome_filename' => ( is => 'rw', isa => 'Str', default => 'pan_genome.fa' ); | |
33 has 'output_statistics_filename' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' ); | |
34 has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'Local' ); | |
35 has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 ); | |
36 has 'makeblastdb_exec' => ( is => 'rw', isa => 'Str', default => 'makeblastdb' ); | |
37 has 'blastp_exec' => ( is => 'rw', isa => 'Str', default => 'blastp' ); | |
38 has 'mcxdeblast_exec' => ( is => 'ro', isa => 'Str', default => 'mcxdeblast' ); | |
39 has 'mcl_exec' => ( is => 'ro', isa => 'Str', default => 'mcl' ); | |
40 has 'perc_identity' => ( is => 'ro', isa => 'Num', default => 98 ); | |
41 has 'dont_delete_files' => ( is => 'ro', isa => 'Bool', default => 0 ); | |
42 has 'dont_create_rplots' => ( is => 'rw', isa => 'Bool', default => 0 ); | |
43 has 'dont_split_groups' => ( is => 'ro', isa => 'Bool', default => 0 ); | |
44 has 'verbose_stats' => ( is => 'rw', isa => 'Bool', default => 0 ); | |
45 has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 ); | |
46 has 'group_limit' => ( is => 'rw', isa => 'Num', default => 50000 ); | |
47 has 'core_definition' => ( is => 'rw', isa => 'Num', default => 1.0 ); | |
48 has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); | |
49 has 'mafft' => ( is => 'ro', isa => 'Bool', default => 0 ); | |
50 has 'inflation_value' => ( is => 'rw', isa => 'Num', default => 1.5 ); | |
51 has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 ); | |
52 | |
53 has 'output_multifasta_files' => ( is => 'ro', isa => 'Bool', default => 0 ); | |
54 | |
55 sub run { | |
56 my ($self) = @_; | |
57 | |
58 my $output_combined_filename = '_combined_files'; | |
59 my $output_cd_hit_filename = '_clustered'; | |
60 my $output_blast_results_filename = '_blast_results'; | |
61 my $output_mcl_filename = '_uninflated_mcl_groups'; | |
62 my $output_filtered_clustered_fasta = '_clustered_filtered.fa'; | |
63 my $cdhit_groups = $output_combined_filename.'.groups'; | |
64 | |
65 | |
66 unlink($cdhit_groups) unless($self->dont_delete_files == 1); | |
67 | |
68 print "Combine proteins into a single file\n" if($self->verbose); | |
69 my $combine_fasta_files = Bio::Roary::CombinedProteome->new( | |
70 proteome_files => $self->fasta_files, | |
71 output_filename => $output_combined_filename, | |
72 ); | |
73 $combine_fasta_files->create_combined_proteome_file; | |
74 | |
75 my $number_of_input_files = @{$self->input_files}; | |
76 | |
77 print "Iteratively run cd-hit\n" if($self->verbose); | |
78 my $iterative_cdhit= Bio::Roary::External::IterativeCdhit->new( | |
79 output_cd_hit_filename => $output_cd_hit_filename, | |
80 output_combined_filename => $output_combined_filename, | |
81 number_of_input_files => $number_of_input_files, | |
82 output_filtered_clustered_fasta => $output_filtered_clustered_fasta, | |
83 job_runner => $self->job_runner, | |
84 cpus => $self->cpus | |
85 ); | |
86 | |
87 $iterative_cdhit->run(); | |
88 | |
89 print "Parallel all against all blast\n" if($self->verbose); | |
90 my $blast_obj = Bio::Roary::ParallelAllAgainstAllBlast->new( | |
91 fasta_file => $output_cd_hit_filename, | |
92 blast_results_file_name => $output_blast_results_filename, | |
93 job_runner => $self->job_runner, | |
94 cpus => $self->cpus, | |
95 makeblastdb_exec => $self->makeblastdb_exec, | |
96 blastp_exec => $self->blastp_exec, | |
97 perc_identity => $self->perc_identity | |
98 ); | |
99 $blast_obj->run(); | |
100 | |
101 my $blast_identity_frequency_obj = Bio::Roary::Output::BlastIdentityFrequency->new( | |
102 input_filename => $output_blast_results_filename, | |
103 ); | |
104 $blast_identity_frequency_obj->create_file(); | |
105 | |
106 print "Cluster with MCL\n" if($self->verbose); | |
107 my $mcl = Bio::Roary::External::Mcl->new( | |
108 blast_results => $output_blast_results_filename, | |
109 mcxdeblast_exec => $self->mcxdeblast_exec, | |
110 mcl_exec => $self->mcl_exec, | |
111 job_runner => $self->job_runner, | |
112 cpus => $self->cpus, | |
113 inflation_value => $self->inflation_value, | |
114 output_file => $output_mcl_filename | |
115 ); | |
116 $mcl->run(); | |
117 | |
118 unlink($output_blast_results_filename) unless($self->dont_delete_files == 1); | |
119 | |
120 my $post_analysis = Bio::Roary::External::PostAnalysis->new( | |
121 job_runner => 'Local', | |
122 cpus => $self->cpus, | |
123 fasta_files => $self->fasta_files, | |
124 input_files => $self->input_files, | |
125 output_filename => $self->output_filename, | |
126 output_pan_geneome_filename => $self->output_pan_geneome_filename, | |
127 output_statistics_filename => $self->output_statistics_filename, | |
128 clusters_filename => $output_cd_hit_filename.'.clstr', | |
129 dont_wait => 1, | |
130 output_multifasta_files => $self->output_multifasta_files, | |
131 dont_delete_files => $self->dont_delete_files, | |
132 dont_create_rplots => $self->dont_create_rplots, | |
133 dont_split_groups => $self->dont_split_groups, | |
134 verbose_stats => $self->verbose_stats, | |
135 translation_table => $self->translation_table, | |
136 group_limit => $self->group_limit, | |
137 core_definition => $self->core_definition, | |
138 verbose => $self->verbose, | |
139 mafft => $self->mafft, | |
140 allow_paralogs => $self->allow_paralogs, | |
141 ); | |
142 $post_analysis->run(); | |
143 | |
144 } | |
145 | |
146 | |
147 no Moose; | |
148 __PACKAGE__->meta->make_immutable; | |
149 | |
150 1; |