Mercurial > repos > earlhaminst > t_coffee

--- a/filter_by_fasta_ids.py	Thu Dec 15 11:04:25 2016 -0500
+++ b/filter_by_fasta_ids.py	Mon Dec 19 17:47:31 2016 -0500
@@ -82,10 +82,9 @@
     work_summary = {'wanted': 0, 'found': 0, 'duplicates': 0}
     targets = []

-    f_target = open(sys.argv[1])
-    for line in f_target.readlines():
-        targets.append(">%s" % line.strip().upper())
-    f_target.close()
+    with open(sys.argv[1]) as f_target:
+        for line in f_target.readlines():
+            targets.append(">%s" % line.strip().upper())

     work_summary['wanted'] = len(targets)
     homd_db = FASTAReader(sys.argv[2])
--- a/t_coffee.xml	Thu Dec 15 11:04:25 2016 -0500
+++ b/t_coffee.xml	Mon Dec 19 17:47:31 2016 -0500
@@ -51,7 +51,7 @@
         t_coffee '$input' $method_opt $output_opt -n_core \${GALAXY_SLOTS:-1} -run_name t_coffee_out -quiet

         #if 'cigar' in $outputs_arr
-            && perl '$__tool_directory__/t_coffee_to_cigar.pl' t_coffee_out.fasta_aln > '$cigar'
+            && python '$__tool_directory__/t_coffee_to_cigar.py' t_coffee_out.fasta_aln > '$cigar'
         #end if
 ]]>
     </command>
--- a/t_coffee_to_cigar.pl	Thu Dec 15 11:04:25 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-#!/usr/bin/perl
-#
-use strict;
-use warnings;
-
-# A simple Perl script to convert FASTA sequence alignments into 2-column output where first column is FASTA id and second is CIGAR line
-# TCoffee_to_cigar.pl <file>
-
-sub convert_and_print {
-    my ($header, $sequence) = @_;
-    # Converts each match into M and each gap into D
-    $sequence =~ s/[^-]/M/g;
-    $sequence =~ s/-/D/g;
-
-    # Split the sequence in substrings composed by the same letter
-    $sequence =~ s/DM/D,M/g;
-    $sequence =~ s/MD/M,D/g;
-    my @cigar_array = split(',', $sequence);
-
-    # Condense each substring, e.g. DDDD in 4D, and concatenate them again
-    my $cigar = '';
-    foreach my $str (@cigar_array) {
-        if (length($str) > 1) {
-            $cigar .= length($str);
-        }
-        $cigar .= substr($str, 0, 1);
-    }
-    print "$header\t$cigar\n";
-}
-
-my $file1 = $ARGV[0];
-open my $fh1, '<', $file1;
-
-my $header = '', my $sequence = '';
-while (my $line = <$fh1>) {
-    chomp $line;
-    if (substr($line, 0, 1) eq '>') {
-        if ($header) {
-            convert_and_print($header, $sequence);
-        }
-        $header = substr($line, 1);
-        $sequence = '';
-    } else {
-        $sequence .= $line;
-    }
-}
-close $fh1;
-convert_and_print($header, $sequence);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/t_coffee_to_cigar.py	Mon Dec 19 17:47:31 2016 -0500
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+""" A script to build specific fasta databases """
+from __future__ import print_function
+
+import re
+import sys
+
+
+FASTA_MATCH_RE = re.compile(r'[^-]')
+
+
+def convert_and_print(header, sequence):
+    # Converts each match into M and each gap into D
+    tmp_seq = FASTA_MATCH_RE.sub('M', sequence)
+    tmp_seq = tmp_seq.replace('-', 'D')
+    # Split the sequence in substrings composed by the same letter
+    tmp_seq = tmp_seq.replace('DM', 'D,M')
+    tmp_seq = tmp_seq.replace('MD', 'M,D')
+    cigar_list = tmp_seq.split(',')
+    # Condense each substring, e.g. DDDD in 4D, and concatenate them again
+    cigar = ''
+    for s in cigar_list:
+        if len(s) > 1:
+            cigar += str(len(s))
+        cigar += s[0]
+    print("%s\t%s" % (header, cigar))
+
+
+def main():
+    with open(sys.argv[1]) as fh:
+        header = None
+        sequence = None
+        for line in fh:
+            line = line.strip()
+            if line and line[0] == '>':
+                if header:
+                    convert_and_print(header, sequence)
+                header = line[1:]
+                sequence = ''
+            else:
+                sequence += line
+    if header:
+        convert_and_print(header, sequence)
+
+
+if __name__ == "__main__":
+    main()