annotate t_coffee_to_cigar.pl @ 0:304d1a82708f draft default tip

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
author earlhaminst
date Fri, 11 Nov 2016 06:57:26 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
1 #!/usr/bin/perl
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
2 #
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
3 use strict;
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
4 use warnings;
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
5
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
6 # A simple Perl script to convert FASTA sequence alignments into 2-column output where first column is FASTA id and second is CIGAR line
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
7 # TCoffee_to_cigar.pl <file>
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
8
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
9 sub convert_and_print {
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
10 my ($header, $sequence) = @_;
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
11 # Converts each match into M and each gap into D
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
12 $sequence =~ s/[^-]/M/g;
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
13 $sequence =~ s/-/D/g;
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
14
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
15 # Split the sequence in substrings composed by the same letter
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
16 $sequence =~ s/DM/D,M/g;
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
17 $sequence =~ s/MD/M,D/g;
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
18 my @cigar_array = split(',', $sequence);
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
19
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
20 # Condense each substring, e.g. DDDD in 4D, and concatenate them again
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
21 my $cigar = '';
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
22 foreach my $str (@cigar_array) {
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
23 if (length($str) > 1) {
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
24 $cigar .= length($str);
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
25 }
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
26 $cigar .= substr($str, 0, 1);
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
27 }
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
28 print "$header\t$cigar\n";
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
29 }
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
30
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
31 my $file1 = $ARGV[0];
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
32 open my $fh1, '<', $file1;
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
33
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
34 my $header = '', my $sequence = '';
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
35 while (my $line = <$fh1>) {
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
36 chomp $line;
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
37 if (substr($line, 0, 1) eq '>') {
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
38 if ($header) {
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
39 convert_and_print($header, $sequence);
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
40 }
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
41 $header = substr($line, 1);
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
42 $sequence = '';
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
43 } else {
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
44 $sequence .= $line;
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
45 }
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
46 }
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
47 close $fh1;
304d1a82708f planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
earlhaminst
parents:
diff changeset
48 convert_and_print($header, $sequence);