annotate pfamScan/pfam_scan.pl @ 0:68a3648c7d91 draft default tip

Uploaded
author matteoc
date Thu, 22 Dec 2016 04:45:31 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1 #!/usr/bin/env perl
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
2
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
3 # $Id: pfam_scan.pl 9045 2015-05-26 09:09:52Z rdf $
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
4
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
5 use strict;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
6 use warnings;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
7
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
8 BEGIN {push @INC,"/home/inmare/galaxy/tools/pfamScan"}
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
9 use Bio::Pfam::Scan::PfamScan;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
10 use Getopt::Long;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
11
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
12 my $VERSION = "1.5";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
13
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
14 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
15
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
16 # get the user options
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
17 my ( $outfile, $e_seq, $e_dom, $b_seq, $b_dom, $dir,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
18 $clan_overlap, $fasta, $align, $help, $as, $pfamB,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
19 $json, $only_pfamB, $cpu, $translate );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
20 GetOptions( 'help' => \$help,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
21 'outfile=s' => \$outfile,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
22 'e_seq=f' => \$e_seq,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
23 'e_dom=f' => \$e_dom,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
24 'b_seq=f' => \$b_seq,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
25 'b_dom=f' => \$b_dom,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
26 'dir=s' => \$dir,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
27 'clan_overlap' => \$clan_overlap,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
28 'fasta=s' => \$fasta,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
29 'align' => \$align,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
30 'h' => \$help,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
31 'as' => \$as,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
32 'pfamB' => \$pfamB,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
33 'only_pfamB' => \$only_pfamB,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
34 'json:s' => \$json,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
35 'cpu=i' => \$cpu,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
36 'translate:s' => \$translate
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
37 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
38
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
39 help() if $help;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
40 help() unless ( $dir and $fasta ); # required options
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
41
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
42 my $pfamA;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
43 if ( $only_pfamB or $pfamB ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
44 die qq(FATAL: As of release 28.0, Pfam no longer produces Pfam-B. The -pfamB and -only_pfamB options are now obsolete.\n);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
45 $pfamB=1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
46 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
47 else {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
48 $pfamA=1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
49 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
50
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
51 my @hmmlib;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
52 push @hmmlib, 'Pfam-A.hmm' if $pfamA;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
53 push @hmmlib, 'Pfam-B.hmm' if $pfamB;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
54
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
55 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
56
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
57 # check the input parameters
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
58
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
59 die qq(FATAL: must specify both "-dir" and "-fasta")
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
60 unless ( defined $dir and defined $fasta );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
61
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
62 die qq(FATAL: can't find directory "$dir")
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
63 unless -d $dir;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
64
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
65 die qq(FATAL: can't find file "$fasta")
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
66 unless -s $fasta;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
67
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
68 foreach my $hmmlib ( @hmmlib ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
69 die qq(FATAL: can't find "$hmmlib" and/or "$hmmlib" binaries and/or "$hmmlib.dat" file in "$dir")
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
70 unless ( -s "$dir/$hmmlib" and
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
71 -s "$dir/$hmmlib.h3f" and
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
72 -s "$dir/$hmmlib.h3i" and
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
73 -s "$dir/$hmmlib.h3m" and
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
74 -s "$dir/$hmmlib.h3p" and
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
75 -s "$dir/$hmmlib.dat" );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
76 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
77
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
78 die qq(FATAL: can't use E-value or bit score threshold with Pfam-B searches; Pfam-B searches use a default cut_off of 0.001)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
79 if ( ( $e_seq or $e_dom or $b_seq or $b_dom ) and not $pfamA );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
80
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
81 die qq(FATAL: can't use E-value and bit score threshold together)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
82 if ( ( $e_seq and ( $b_seq or $b_dom ) ) or
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
83 ( $b_seq and ( $e_seq or $e_dom ) ) or
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
84 ( $b_dom and $e_dom ) );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
85
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
86 die qq(FATAL: output file "$outfile" already exists)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
87 if ( $outfile and -s $outfile );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
88
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
89 if ( $as ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
90 die qq(FATAL: "-as" option only works on Pfam-A families)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
91 unless $pfamA;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
92
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
93 die qq(FATAL: can't find "active_site.dat" in "$dir")
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
94 unless -s "$dir/active_site.dat";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
95 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
96
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
97 if ( defined $translate ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
98 if ( $translate eq "" ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
99 # no argument to "-translate" was given, so make "orf" the default
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
100 $translate = 'orf';
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
101 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
102 else {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
103 # there was an argument to "-translate", so make sure it's valid
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
104 unless ( $translate eq "all" or $translate eq "orf" ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
105 die qq(FATAL: "-translate" option accepts only "all" and "orf");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
106 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
107 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
108 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
109
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
110 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
111
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
112 # build the object
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
113 my $ps = Bio::Pfam::Scan::PfamScan->new(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
114 -e_seq => $e_seq,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
115 -e_dom => $e_dom,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
116 -b_seq => $b_seq,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
117 -b_dom => $b_dom,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
118 -dir => $dir,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
119 -clan_overlap => $clan_overlap,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
120 -fasta => $fasta,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
121 -align => $align,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
122 -as => $as,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
123 -hmmlib => \@hmmlib,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
124 -version => $VERSION,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
125 -cpu => $cpu,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
126 -translate => $translate
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
127 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
128
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
129 # run the search
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
130 $ps->search;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
131
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
132 # print the results
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
133 if ( defined $json ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
134
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
135 my $json_object;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
136 eval {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
137 require JSON;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
138 $json_object = new JSON;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
139 };
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
140 if ( $@ ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
141 die qq(FATAL: can't load JSON module; can't write JSON-format output);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
142 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
143
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
144 if ( $json eq 'pretty' ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
145 $json_object->pretty( 1 ) ;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
146 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
147 print $json_object->encode( $ps->results );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
148
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
149 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
150 else {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
151 $ps->write_results( $outfile, $e_seq, $e_dom, $b_seq, $b_dom );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
152 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
153
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
154 exit;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
155
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
156 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
157
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
158 sub help {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
159 print STDERR <<EOF;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
160
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
161 pfam_scan.pl: search a FASTA file against a library of Pfam HMMs
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
162
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
163 Usage: pfam_scan.pl -fasta <fasta_file> -dir <directory location of Pfam files>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
164
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
165 Additonal options:
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
166
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
167 -h : show this help
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
168 -outfile <file> : output file, otherwise send to STDOUT
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
169 -clan_overlap : show overlapping hits within clan member families (applies to Pfam-A families only)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
170 -align : show the HMM-sequence alignment for each match
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
171 -e_seq <n> : specify hmmscan evalue sequence cutoff for Pfam-A searches (default Pfam defined)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
172 -e_dom <n> : specify hmmscan evalue domain cutoff for Pfam-A searches (default Pfam defined)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
173 -b_seq <n> : specify hmmscan bit score sequence cutoff for Pfam-A searches (default Pfam defined)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
174 -b_dom <n> : specify hmmscan bit score domain cutoff for Pfam-A searches (default Pfam defined)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
175 -as : predict active site residues for Pfam-A matches
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
176 -json [pretty] : write results in JSON format. If the optional value "pretty" is given,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
177 the JSON output will be formatted using the "pretty" option in the JSON
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
178 module
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
179 -cpu <n> : number of parallel CPU workers to use for multithreads (default all)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
180 -translate [mode] : treat sequence as DNA and perform six-frame translation before searching. If the
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
181 optional value "mode" is given it must be either "all", to translate everything
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
182 and produce no individual ORFs, or "orf", to report only ORFs with length greater
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
183 than 20. If "-translate" is used without a "mode" value, the default is to
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
184 report ORFs (default no translation)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
185
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
186 For more help, check the perldoc:
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
187
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
188 shell\% perldoc pfam_scan.pl
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
189
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
190 EOF
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
191 exit;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
192
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
193 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
194
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
195 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
196
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
197 =head1 NAME
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
198
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
199 pfam_scan.pl -- Search protein sequences against the Pfam HMM library
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
200
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
201 =head1 SYNOPSIS
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
202
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
203 pfam_scan.pl [options] -fasta <fasta_file> -dir <Pfam_data_file_dir>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
204
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
205 =head1 OPTIONS
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
206
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
207 =over
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
208
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
209 =item B<-dir> I<Pfam_data_file_dir>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
210
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
211 Directory containing Pfam data files [required]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
212
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
213 =item B<-fasta> I<fasta_file>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
214
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
215 Filename of input file containing sequence(s) [required]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
216
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
217 =item B<-outfile> I<output_file>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
218
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
219 Write output to C<output_file> [default: STDOUT]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
220
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
221 =item B<-e_seq>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
222
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
223 Sequence E-value cut-off [default: use Pfam GA cutoff]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
224
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
225 =item B<-e_dom>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
226
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
227 Domain E-value cut-off [default: use Pfam GA cutoff]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
228
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
229 =item B<-b_seq>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
230
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
231 Sequence bits score cut-off [default: use Pfam GA cutoff]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
232
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
233 =item B<-b_dom>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
234
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
235 Domain bits score cut-off [default: use Pfam GA cutoff]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
236
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
237 =item B<-clan_overlap>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
238
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
239 Allow sequences in different clans to overlap [default: false]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
240
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
241 =item B<-align>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
242
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
243 Show alignment snippets in results [default: false]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
244
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
245 =item B<-as>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
246
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
247 Search for active sites on Pfam-A matches [default: false]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
248
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
249 =item B<-json> [I<pretty>]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
250
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
251 Write the results in JSON format [default: false]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
252
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
253 =item B<-cpu>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
254
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
255 Number of parallel CPU workers to use for multithreads [default: all]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
256
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
257 =item B<-translate> [I<mode>]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
258
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
259 Treat the input sequence as DNA and perform a six-frame translation before
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
260 searching, using the "translate" program from the HMMER v2.3.2 package. If the
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
261 optional value I<mode> is given, it must be either "all" or "orf": "all" means
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
262 translate in full, with stops, and produce no individual ORFs; "orf" means
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
263 translate and report only ORFs of length greater than 20. If B<translate> is
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
264 used but I<mode> is omitted, the default is to translate using the "orf"
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
265 method [default: off (no translation)]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
266
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
267 =item B<-h>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
268
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
269 Display help message
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
270
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
271 =back
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
272
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
273 The input must be a FASTA-format file. The C<-fasta> and C<-dir> options are
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
274 mandatory. You cannot specify both an E-value and bits score threshold.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
275
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
276 =head1 OVERVIEW
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
277
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
278 C<pfam_scan.pl> is a script for searching one or more protein sequences against the
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
279 library of HMMs from Pfam. It requires a local copy of the Pfam data files, which
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
280 can be obtained from the Pfam FTP area:
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
281
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
282 ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
283
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
284 You must also have the HMMER3 binaries installed and their locations given by your
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
285 C<PATH> environment variable. You can download the HMMER3 package at:
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
286
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
287 ftp://selab.janelia.org/pub/software/hmmer3/
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
288
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
289 =head1 OUTPUT
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
290
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
291 The output format is:
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
292 <seq id> <alignment start> <alignment end> <envelope start> <envelope end> <hmm acc> <hmm name> <type> <hmm start> <hmm end> <hmm length> <bit score> <E-value> <significance> <clan> <predicted_active_site_residues>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
293 Example output (-as option):
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
294
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
295 O65039.1 38 93 38 93 PF08246 Inhibitor_I29 Domain 1 58 58 45.9 2.8e-12 1 No_clan
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
296 O65039.1 126 342 126 342 PF00112 Peptidase_C1 Domain 1 216 216 296.0 1.1e-88 1 CL0125 predicted_active_site[150,285,307]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
297
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
298 Most of these values are derived from the output of I<hmmscan> (see HMMER3
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
299 documentation for details). The significance value is 1 if the bit score for a
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
300 hit is greater than or equal to the curated gathering threshold for the
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
301 matching family, 0 otherwise.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
302
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
303 =head1 REFERENCES
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
304
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
305 Active site residues are predicted using the method described in the publication:
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
306
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
307 Mistry J., Bateman A., Finn R.D. "Predicting active site residue annotations in
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
308 the Pfam database." BMC Bioinformatics. 2007;8:298. PMID:17688688.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
309
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
310 =head1 AUTHORS
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
311
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
312 Jaina Mistry (jaina@ebi.ac.uk), Rob Finn (rdf@ebi.ac.uk)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
313
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
314 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
315
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
316 =head1 COPYRIGHT
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
317
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
318 Copyright (c) 2009: Genome Research Ltd.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
319
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
320 Authors: Jaina Mistry (jaina@ebi.ac.uk), rdf (rdf@ebi.ac.uk)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
321
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
322 This is free software; you can redistribute it and/or
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
323 modify it under the terms of the GNU General Public License
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
324 as published by the Free Software Foundation; either version 2
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
325 of the License, or (at your option) any later version.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
326
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
327 This program is distributed in the hope that it will be useful,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
328 but WITHOUT ANY WARRANTY; without even the implied warranty of
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
329 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
330 GNU General Public License for more details.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
331
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
332 You should have received a copy of the GNU General Public License
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
333 along with this program; if not, write to the Free Software
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
334 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
335 or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
336
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
337 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
338