Mercurial > repos > earlhaminst > t_coffee_to_cigar
comparison t_coffee_to_cigar.pl @ 0:304d1a82708f draft default tip
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee_to_cigar commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
author | earlhaminst |
---|---|
date | Fri, 11 Nov 2016 06:57:26 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:304d1a82708f |
---|---|
1 #!/usr/bin/perl | |
2 # | |
3 use strict; | |
4 use warnings; | |
5 | |
6 # A simple Perl script to convert FASTA sequence alignments into 2-column output where first column is FASTA id and second is CIGAR line | |
7 # TCoffee_to_cigar.pl <file> | |
8 | |
9 sub convert_and_print { | |
10 my ($header, $sequence) = @_; | |
11 # Converts each match into M and each gap into D | |
12 $sequence =~ s/[^-]/M/g; | |
13 $sequence =~ s/-/D/g; | |
14 | |
15 # Split the sequence in substrings composed by the same letter | |
16 $sequence =~ s/DM/D,M/g; | |
17 $sequence =~ s/MD/M,D/g; | |
18 my @cigar_array = split(',', $sequence); | |
19 | |
20 # Condense each substring, e.g. DDDD in 4D, and concatenate them again | |
21 my $cigar = ''; | |
22 foreach my $str (@cigar_array) { | |
23 if (length($str) > 1) { | |
24 $cigar .= length($str); | |
25 } | |
26 $cigar .= substr($str, 0, 1); | |
27 } | |
28 print "$header\t$cigar\n"; | |
29 } | |
30 | |
31 my $file1 = $ARGV[0]; | |
32 open my $fh1, '<', $file1; | |
33 | |
34 my $header = '', my $sequence = ''; | |
35 while (my $line = <$fh1>) { | |
36 chomp $line; | |
37 if (substr($line, 0, 1) eq '>') { | |
38 if ($header) { | |
39 convert_and_print($header, $sequence); | |
40 } | |
41 $header = substr($line, 1); | |
42 $sequence = ''; | |
43 } else { | |
44 $sequence .= $line; | |
45 } | |
46 } | |
47 close $fh1; | |
48 convert_and_print($header, $sequence); |