t_coffee: t_coffee_to_cigar.pl comparison

comparison t_coffee_to_cigar.pl @ 0:794a6e864a96 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 230ae552ddeb1bfdef3a09becaa5c6d373529a05-dirty

author	earlhaminst
date	Thu, 15 Dec 2016 11:04:25 -0500
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:794a6e864a96
+#!/usr/bin/perl
+#
+use strict;
+use warnings;
+# A simple Perl script to convert FASTA sequence alignments into 2-column output where first column is FASTA id and second is CIGAR line
+# TCoffee_to_cigar.pl <file>
+sub convert_and_print {
+my ($header, $sequence) = @_;
+# Converts each match into M and each gap into D
+$sequence =~ s/[^-]/M/g;
+$sequence =~ s/-/D/g;
+# Split the sequence in substrings composed by the same letter
+$sequence =~ s/DM/D,M/g;
+$sequence =~ s/MD/M,D/g;
+my @cigar_array = split(',', $sequence);
+# Condense each substring, e.g. DDDD in 4D, and concatenate them again
+my $cigar = '';
+foreach my $str (@cigar_array) {
+if (length($str) > 1) {
+$cigar .= length($str);
+}
+$cigar .= substr($str, 0, 1);
+}
+print "$header\t$cigar\n";
+}
+my $file1 = $ARGV[0];
+open my $fh1, '<', $file1;
+my $header = '', my $sequence = '';
+while (my $line = <$fh1>) {
+chomp $line;
+if (substr($line, 0, 1) eq '>') {
+if ($header) {
+convert_and_print($header, $sequence);
+}
+$header = substr($line, 1);
+$sequence = '';
+} else {
+$sequence .= $line;
+}
+}
+close $fh1;
+convert_and_print($header, $sequence);

Mercurial > repos > earlhaminst > t_coffee

comparison t_coffee_to_cigar.pl @ 0:794a6e864a96 draft