annotate filter_spades_repeats.pl @ 1:0e3d2c8b1b23 draft default tip

planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 3f9ae719338c7c8db81d645b8ee09727e2d9ce23
author nml
date Tue, 07 Nov 2017 11:52:52 -0500
parents 90957420cc07
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
1 #!/usr/bin/env perl
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
2
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
3 use strict;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
4 use Getopt::Long;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
5 use Bio::SeqIO;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
6 use Pod::Usage;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
7
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
8 my ($fasta_file, $tab_file, $coverage_co, $length_co, $repeat_co, $out_filtered, $out_repeats, $out_norepeats,$coverage_length_co, $summary_out, $filtered_repeats, $help);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
9
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
10 GetOptions(
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
11 'c|coverage-cutoff=s' => \$coverage_co,
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
12 'l|length-cutoff=s' => \$length_co,
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
13 'e|coverage-length-cutoff=s' => \$coverage_length_co,
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
14 'r|repeat_cutoff=s' => \$repeat_co,
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
15 'i|input=s' => \$fasta_file,
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
16 't|tab=s' => \$tab_file,
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
17 'f|filtered-out=s' => \$out_filtered,
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
18 'o|output-repeats=s' => \$out_repeats,
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
19 'u|output-norepeats=s' => \$out_norepeats,
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
20 'n|filtered-repeats=s' => \$filtered_repeats,
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
21 's|summary=s' => \$summary_out,
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
22 'h|help' => \$help
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
23 );
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
24
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
25 pod2usage(-verbose => 2) if ($help);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
26 print "A fasta file is required. Please enter a fasta file using the -i flag.\n" if (!$fasta_file);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
27 print "A spades tabs file is required. Please enter a tabs file using the -t flag\n" if (!$tab_file);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
28 pod2usage(1) unless $fasta_file && $tab_file;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
29
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
30 if (!$coverage_co)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
31 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
32 $coverage_co = 0.33;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
33 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
34 if (!$length_co)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
35 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
36 $length_co = 1000;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
37 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
38 if (!$coverage_length_co)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
39 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
40 $coverage_length_co = 5000;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
41 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
42 if (!$repeat_co)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
43 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
44 $repeat_co = 1.75;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
45 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
46 if (!$out_filtered)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
47 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
48 $out_filtered = "Discarded_sequences.fasta";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
49 print "Discarded sequences will be printed out to $out_filtered\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
50 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
51 if (!$out_repeats)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
52 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
53 $out_repeats = "Filtered_sequences_with_repeats.fasta";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
54 print "Filtered sequences with repeats will be printed out to $out_repeats\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
55 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
56 if (!$out_norepeats)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
57 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
58 $out_norepeats = "Filtered_sequences_no_repeats.fasta";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
59 print "Filtered sequences without repeats will be printed out to $out_norepeats\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
60 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
61 if (!$filtered_repeats)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
62 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
63 $filtered_repeats = "Repeat_sequences.fasta";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
64 print "Repeat sequences will be printed out to $filtered_repeats\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
65 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
66
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
67 die ("No tab file specified") unless ($tab_file);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
68 die ("No fasta file specified") unless ($fasta_file);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
69
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
70 ##Read tab file and discard rows with comments
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
71 open TAB, '<', $tab_file or die "Could not open tab file: $?";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
72 open SEQIN, '<', $fasta_file or die "Could not open tab file: $?";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
73 open SEQOUT_REP, '>', $out_repeats or die "Could not open file for writing: $?";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
74 open SEQOUT_NOREP, '>', $out_norepeats or die "Could not open file for writing: $?";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
75 open SEQOUT_FILT, '>', $out_filtered if ($out_filtered);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
76 open SEQOUT_FILT_REP, '>', $filtered_repeats or die "Could not open file for writing: $?";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
77 open SUMMARY, '>', $summary_out if ($summary_out);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
78
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
79
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
80 my $avg_coverage = 0;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
81 my $num_contigs = 0;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
82 my $cutoff_coverage;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
83 my $cutoff_repeats;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
84 my @stats;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
85
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
86
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
87 while (<TAB>)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
88 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
89 chomp;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
90 push @stats, $_ unless (/^#/);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
91 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
92
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
93 #Calculate average coverage.
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
94 foreach my $stat(@stats)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
95 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
96 my ($length, $coverage);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
97 (undef,$length, $coverage) = split(/\t+/, $stat);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
98 die "length or coverage not defined at $stat\n" unless ($length && ($coverage ne '' && $coverage >= 0));
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
99 if ($length >= $coverage_length_co)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
100 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
101 $avg_coverage = $avg_coverage + $coverage;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
102 $num_contigs++;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
103 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
104 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
105
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
106 $avg_coverage = $avg_coverage / $num_contigs;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
107 $cutoff_coverage = $avg_coverage * $coverage_co;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
108 $cutoff_repeats = $avg_coverage * $repeat_co;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
109
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
110 print SUMMARY "Filter SPAdes repeats Results Summary\n======================================\n\n" if ($summary_out);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
111 print SUMMARY "Paramaters used:\nLength cutoff for calcularing average cutoff: $coverage_length_co\nCoverage cutoff ratio: $coverage_co\nRepeat cutoff ratio: $repeat_co\nLength cutoff: $length_co\n\n" if ($summary_out);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
112
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
113 print SUMMARY "Calculations:\nAverage coverage: $avg_coverage\nCoverage cutoff: $cutoff_coverage\nRepeat cutoff: $cutoff_repeats\n\nFile headers:\n" if ($summary_out);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
114
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
115 my ($header, $seq_id, $seq);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
116 my $repeated = 0;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
117 my $valid = 0;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
118
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
119 #Summary strings:
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
120 my $discarded = "";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
121 my $repeats = "";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
122 my $filtered_rep = "";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
123 my $filtered_norep = "";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
124
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
125 while (my $line = <SEQIN>)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
126 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
127 if ($line =~ />/)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
128 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
129 chomp $line;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
130 #Get the sequence name to compare against tab file
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
131 $header = $line;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
132 $seq_id = $line =~ /(\w+)_length/;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
133 $seq = "";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
134
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
135 my $stat = shift @stats;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
136 die "Less rows in tab than sequences in seq file" unless $stat;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
137 my($name, $length, $coverage) = split(/\t+/, $stat);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
138 die "name or length not defined at $stat\n" unless ($name && $length);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
139 die "coverage is not defined at $stat\n" unless ($coverage ne '' && $coverage >= 0);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
140 die "Unmatched names $header and $name\n" unless ($header =~ /$name/i);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
141
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
142 #Entry passes the length and coverage cutoffs?
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
143 if ($length >= $length_co && $coverage >= $cutoff_coverage)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
144 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
145 $valid = 1;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
146 #Repeats
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
147 if ($coverage >= $cutoff_repeats)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
148 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
149 my $num_repeats = int($coverage/$avg_coverage);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
150 $header = $header."(".$num_repeats." copies)";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
151 print SEQOUT_REP $header,"\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
152 $filtered_rep = $filtered_rep.$header."\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
153 print SEQOUT_FILT_REP $header, "\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
154 $repeats = $repeats.$header."\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
155 $repeated = 1;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
156 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
157 else
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
158 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
159 print SEQOUT_REP $header, "\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
160 $filtered_rep = $filtered_rep.$header."\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
161 print SEQOUT_NOREP $header, "\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
162 $filtered_norep = $filtered_norep.$header."\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
163 $repeated = 0;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
164 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
165 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
166 elsif ($out_filtered)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
167 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
168 $valid = 0;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
169 print SEQOUT_FILT $header,"\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
170 $discarded = $discarded.$header."\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
171 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
172 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
173 else
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
174 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
175 if ($valid)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
176 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
177 print SEQOUT_REP $line;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
178 if (!$repeated)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
179 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
180 print SEQOUT_NOREP $line;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
181 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
182 else
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
183 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
184 print SEQOUT_FILT_REP $line;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
185 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
186 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
187 elsif ($out_filtered)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
188 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
189 print SEQOUT_FILT $line;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
190 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
191 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
192
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
193 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
194
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
195 close TAB;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
196 close SEQIN;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
197 close SEQOUT_REP;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
198 close SEQOUT_NOREP;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
199 close SEQOUT_FILT;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
200 close SEQOUT_FILT_REP;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
201
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
202
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
203 #Get summary info:
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
204 if ($summary_out)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
205 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
206 print SUMMARY "Filtered sequences (with repeats):\n$filtered_rep\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
207 print SUMMARY "Filtered sequences (no repeats):\n$filtered_norep\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
208 print SUMMARY "Repeat sequences:\n$repeats\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
209 if ($out_filtered)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
210 {
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
211 print SUMMARY "Discarded sequences:\n$discarded\n";
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
212 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
213
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
214 close SUMMARY;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
215 }
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
216
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
217 die "More rows in stats file than sequences in the fasta file\n" if (scalar(@stats) > 0);
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
218 exit 0;
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
219
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
220
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
221 __END__
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
222
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
223
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
224
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
225 =head1 NAME
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
226
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
227 filter_spades_repeats.pl - Filters contigs or scaffolds based on contig length and detects contigs/scaffolds with very high coverage.
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
228
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
229
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
230
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
231 =head1 USAGE
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
232
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
233 filter_spades_output.pl -i <contigs/scaffolds input>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
234 -t <stats input>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
235 -o <output fasta with repeats>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
236 -u <output fasta without repeats>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
237
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
238 Optional:
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
239 -c <coverage cutoff ratio> (default 0.33)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
240 -l <length cutoff> (default: 1000)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
241 -e <length cutoff for average coverage calculation> (default: 5000)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
242 -r <repeat cutoff ratio> (default (1.75)
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
243 -n <filtered repeated sequences>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
244 -f <discarded sequences>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
245 -s <output summary file>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
246
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
247 For more information:
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
248 -h
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
249
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
250
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
251 =head1 INPUT
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
252
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
253 =over 8
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
254
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
255 =item B<-i>B<--input>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
256
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
257 Contigs/Scaffolds fasta file.
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
258
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
259 =item B<-t>B<--tab>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
260
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
261 The tabular output file from SPAdes. This file should have the following format:
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
262
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
263 #name length coverage
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
264
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
265 NODE_1 31438 24.5116
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
266
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
267 NODE_2 31354 2316.96
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
268
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
269 NODE_3 26948 82.3294
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
270
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
271 =item B<-o>B<--output-repeats>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
272
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
273 Output fasta file including the contigs marked as repeated.
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
274
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
275 =item B<-u>B<--output-norepeats>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
276
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
277 Output fasta file excluding the contigs marked as repeated.
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
278
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
279 =item B<-c>B<--coverage-cutoff>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
280
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
281 Mininum coverage ratio.
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
282
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
283 coverage_theshold = average_coverage * minimum_coverage_ratio.
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
284
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
285 Any contigs/scaffolds with coverage below the coverage_theshold will be eliminated.
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
286
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
287 =item B<-l>B<--length-cutoff>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
288
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
289 Mininum length. Contigs below this length will be eliminated.
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
290
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
291 =item B<-e>B<--coverage-length-cutoff>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
292
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
293 Minimum length to use for average coverage calculations.
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
294
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
295 =item B<-r>B<--repeat-cutoff>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
296
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
297 Minimum repeats ratio.
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
298
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
299 repeat_threshold = average_coverage * repeat_ratio.
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
300
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
301 Any contigs with coverage below this threshold will be considered to be repeated
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
302
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
303
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
304 =item B<-f>B<--filtered-out>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
305
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
306 If specified, filtered out sequences will be written to this file.
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
307
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
308 =item B<-s>B<--summary>
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
309
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
310 A summary of results
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
311
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
312 =back
90957420cc07 planemo upload for repository https://github.com/phac-nml/galaxy_tools/ commit 8ea19b9db8a5d861466adf3bf4e01928d3d1ca38
nml
parents:
diff changeset
313 =cut