comparison t_coffee_to_cigar.pl @ 0:794a6e864a96 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty
author earlhaminst
date Thu, 15 Dec 2016 11:04:25 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:794a6e864a96
1 #!/usr/bin/perl
2 #
3 use strict;
4 use warnings;
5
6 # A simple Perl script to convert FASTA sequence alignments into 2-column output where first column is FASTA id and second is CIGAR line
7 # TCoffee_to_cigar.pl <file>
8
9 sub convert_and_print {
10 my ($header, $sequence) = @_;
11 # Converts each match into M and each gap into D
12 $sequence =~ s/[^-]/M/g;
13 $sequence =~ s/-/D/g;
14
15 # Split the sequence in substrings composed by the same letter
16 $sequence =~ s/DM/D,M/g;
17 $sequence =~ s/MD/M,D/g;
18 my @cigar_array = split(',', $sequence);
19
20 # Condense each substring, e.g. DDDD in 4D, and concatenate them again
21 my $cigar = '';
22 foreach my $str (@cigar_array) {
23 if (length($str) > 1) {
24 $cigar .= length($str);
25 }
26 $cigar .= substr($str, 0, 1);
27 }
28 print "$header\t$cigar\n";
29 }
30
31 my $file1 = $ARGV[0];
32 open my $fh1, '<', $file1;
33
34 my $header = '', my $sequence = '';
35 while (my $line = <$fh1>) {
36 chomp $line;
37 if (substr($line, 0, 1) eq '>') {
38 if ($header) {
39 convert_and_print($header, $sequence);
40 }
41 $header = substr($line, 1);
42 $sequence = '';
43 } else {
44 $sequence .= $line;
45 }
46 }
47 close $fh1;
48 convert_and_print($header, $sequence);