Mercurial > repos > bebatut > fasta_add_barcode
diff fasta_add_barcode.py @ 0:04699558a38a draft default tip
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit e857f7126443e115f11954085423f8999bc870aa-dirty
author | bebatut |
---|---|
date | Fri, 15 Apr 2016 06:04:56 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta_add_barcode.py Fri Apr 15 06:04:56 2016 -0400 @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +import os +import argparse +import copy +import operator + + +def write_seq_fasta_format(seq, output_file): + split_seq = [seq[i:i+60] for i in xrange(0,len(seq),60)] + for split in split_seq: + output_file.write(split + '\n') + +def fasta_add_barcode(args): + mapping = {} + with open(args.input_mapping_file,'r') as input_mapping_file: + for line in input_mapping_file: + split_line = line[:-1].split('\t') + + if len(split_line) != 2: + string = 'Incorrect number of column in mapping file.' + string += '\nTwo tabular separated columns are expected' + raise ValueError(string) + + mapping[split_line[0]] = split_line[1] + + seq_id = '' + seq = '' + with open(args.input_sequence_file,'r') as input_sequence_file: + with open(args.output_sequence_file, 'w') as output_sequence_file: + for line in input_sequence_file: + if line.startswith('>'): + if seq != '': + if not mapping.has_key(seq_id): + string = 'A sequence identifier (' + seq_id + ') is' + string += ' not found in mapping file' + raise ValueError(string) + + output_sequence_file.write('>' + seq_id + '\n') + + barcode = mapping[seq_id] + seq = barcode + seq + write_seq_fasta_format(seq, output_sequence_file) + seq_id = line[1:-1].split( )[0] + seq = '' + else: + seq += line[:-1] + +######## +# Main # +######## +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--input_sequence_file', required=True) + parser.add_argument('--input_mapping_file', required=True) + parser.add_argument('--output_sequence_file', required=True) + args = parser.parse_args() + + fasta_add_barcode(args) \ No newline at end of file