diff fasta_add_barcode.py @ 0:04699558a38a draft default tip

planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit e857f7126443e115f11954085423f8999bc870aa-dirty
author bebatut
date Fri, 15 Apr 2016 06:04:56 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta_add_barcode.py	Fri Apr 15 06:04:56 2016 -0400
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import os
+import argparse
+import copy
+import operator
+
+
+def write_seq_fasta_format(seq, output_file):
+    split_seq = [seq[i:i+60] for i in xrange(0,len(seq),60)]
+    for split in split_seq:
+        output_file.write(split + '\n')
+
+def fasta_add_barcode(args):
+    mapping = {}
+    with open(args.input_mapping_file,'r') as input_mapping_file:
+        for line in input_mapping_file:
+            split_line = line[:-1].split('\t')
+
+            if len(split_line) != 2:
+                string = 'Incorrect number of column in mapping file.'
+                string += '\nTwo tabular separated columns are expected'
+                raise ValueError(string)
+
+            mapping[split_line[0]] = split_line[1]
+
+    seq_id = ''
+    seq = ''
+    with open(args.input_sequence_file,'r') as input_sequence_file:
+        with open(args.output_sequence_file, 'w') as output_sequence_file:
+            for line in input_sequence_file:
+                if line.startswith('>'):
+                    if seq != '':
+                        if not mapping.has_key(seq_id):
+                            string = 'A sequence identifier (' + seq_id + ') is'
+                            string += ' not found in mapping file'
+                            raise ValueError(string)
+
+                        output_sequence_file.write('>' + seq_id + '\n')
+
+                        barcode = mapping[seq_id]
+                        seq = barcode + seq
+                        write_seq_fasta_format(seq, output_sequence_file)
+                    seq_id = line[1:-1].split( )[0]
+                    seq = ''
+                else:
+                    seq += line[:-1]
+
+########
+# Main #
+########
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input_sequence_file', required=True)
+    parser.add_argument('--input_mapping_file', required=True)
+    parser.add_argument('--output_sequence_file', required=True)
+    args = parser.parse_args()
+
+    fasta_add_barcode(args)
\ No newline at end of file