Mercurial > repos > earlhaminst > t_coffee
annotate t_coffee_to_cigar.pl @ 0:794a6e864a96 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
author | earlhaminst |
---|---|
date | Thu, 15 Dec 2016 11:04:25 -0500 |
parents | |
children |
rev | line source |
---|---|
0
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
1 #!/usr/bin/perl |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
2 # |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
3 use strict; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
4 use warnings; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
5 |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
6 # A simple Perl script to convert FASTA sequence alignments into 2-column output where first column is FASTA id and second is CIGAR line |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
7 # TCoffee_to_cigar.pl <file> |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
8 |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
9 sub convert_and_print { |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
10 my ($header, $sequence) = @_; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
11 # Converts each match into M and each gap into D |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
12 $sequence =~ s/[^-]/M/g; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
13 $sequence =~ s/-/D/g; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
14 |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
15 # Split the sequence in substrings composed by the same letter |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
16 $sequence =~ s/DM/D,M/g; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
17 $sequence =~ s/MD/M,D/g; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
18 my @cigar_array = split(',', $sequence); |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
19 |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
20 # Condense each substring, e.g. DDDD in 4D, and concatenate them again |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
21 my $cigar = ''; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
22 foreach my $str (@cigar_array) { |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
23 if (length($str) > 1) { |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
24 $cigar .= length($str); |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
25 } |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
26 $cigar .= substr($str, 0, 1); |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
27 } |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
28 print "$header\t$cigar\n"; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
29 } |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
30 |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
31 my $file1 = $ARGV[0]; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
32 open my $fh1, '<', $file1; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
33 |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
34 my $header = '', my $sequence = ''; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
35 while (my $line = <$fh1>) { |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
36 chomp $line; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
37 if (substr($line, 0, 1) eq '>') { |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
38 if ($header) { |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
39 convert_and_print($header, $sequence); |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
40 } |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
41 $header = substr($line, 1); |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
42 $sequence = ''; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
43 } else { |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
44 $sequence .= $line; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
45 } |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
46 } |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
47 close $fh1; |
794a6e864a96
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
earlhaminst
parents:
diff
changeset
|
48 convert_and_print($header, $sequence); |