# HG changeset patch # User bebatut # Date 1460714696 14400 # Node ID 04699558a38acffedf862050ebf2b9fd88fe077d planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit e857f7126443e115f11954085423f8999bc870aa-dirty diff -r 000000000000 -r 04699558a38a fasta_add_barcode.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta_add_barcode.py Fri Apr 15 06:04:56 2016 -0400 @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +import os +import argparse +import copy +import operator + + +def write_seq_fasta_format(seq, output_file): + split_seq = [seq[i:i+60] for i in xrange(0,len(seq),60)] + for split in split_seq: + output_file.write(split + '\n') + +def fasta_add_barcode(args): + mapping = {} + with open(args.input_mapping_file,'r') as input_mapping_file: + for line in input_mapping_file: + split_line = line[:-1].split('\t') + + if len(split_line) != 2: + string = 'Incorrect number of column in mapping file.' + string += '\nTwo tabular separated columns are expected' + raise ValueError(string) + + mapping[split_line[0]] = split_line[1] + + seq_id = '' + seq = '' + with open(args.input_sequence_file,'r') as input_sequence_file: + with open(args.output_sequence_file, 'w') as output_sequence_file: + for line in input_sequence_file: + if line.startswith('>'): + if seq != '': + if not mapping.has_key(seq_id): + string = 'A sequence identifier (' + seq_id + ') is' + string += ' not found in mapping file' + raise ValueError(string) + + output_sequence_file.write('>' + seq_id + '\n') + + barcode = mapping[seq_id] + seq = barcode + seq + write_seq_fasta_format(seq, output_sequence_file) + seq_id = line[1:-1].split( )[0] + seq = '' + else: + seq += line[:-1] + +######## +# Main # +######## +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--input_sequence_file', required=True) + parser.add_argument('--input_mapping_file', required=True) + parser.add_argument('--output_sequence_file', required=True) + args = parser.parse_args() + + fasta_add_barcode(args) \ No newline at end of file diff -r 000000000000 -r 04699558a38a fasta_add_barcode.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta_add_barcode.xml Fri Apr 15 06:04:56 2016 -0400 @@ -0,0 +1,60 @@ + + + to FASTA sequences + + + + + + + + python -version + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 04699558a38a test-data/input_sequences.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input_sequences.fasta Fri Apr 15 06:04:56 2016 -0400 @@ -0,0 +1,38 @@ +>SRR029699.4565 EXRA6YY02A66ZM +ACGGCTCGGTGCCGTCCACCGGTCAGCGGCGCCCTGGCCTCCCACGGGCTGACCCCGCAG +TACTCTCGGCGCGATGGGGCTTAGCTTCCGGGTTCGGAACGGGACCGGGCGTGCCCC +>SRR029699.5673 EXRA6YY02A5CA6 +GACCAGAACAACGCCGAAGATCAAAACGGTGAAAACAACGGCGAACAGGGAGGAAACGAA +TAATGAAGAAGCTTGTTCTTTTAGCCCTCGGGGCCTTATTGTTAGCGGGCGCCACCTGCT +CTCCCACACCGTCTCCAGTGCAGTACCATCGGCCGCTTGGGTCTTAACCATCGTGTTCGG +GATGGGAACGGGTGTGTCCCCCAAGCGCATCGCCACCAGCAGTAGTTATCCTAGTTTTTG +AAACTGTTAAAGTCTACGGCTCATCGTCCGTTACCTTAATAACTAAACAGTACGTAAAAC +CC +>SRR029699.5714 EXRA6YY02A735O +ATCCCGAACACGATGGTAAAGACCTAAGCGGCCGATGGTACTGCACTGGAGACGGTGTGG +GAGAGTAGGTGGCCGCCAAATTAAAAAGAAAATAAACCGAGAGGTTCTGCGATAAAGCTG +GTCTTCACCAGTGATCAGAGTTTAAAAGAAAGCTTTTAGTCTCTGATGACTGATGAAAAA +GTCAGTCAGATGAAGAACTGAATAAGGACTTCATCTTATGTACCTTGAAAACTGCATATA +GTAAAAATCAATAGATTTAGATAAATA +>SRR029699.14351 EXRA6YY04CD0L5 +CGGTGGCGATGCGCTTTTGGGAAACACCCGTCCACATCCCGAACACGATGGTTAAGACTG +AAGCGGCCGATAATACTATACTGGTAACGGTATGGGAAGGTAGGTGGCTGCCGGATTTAT +AAAGAACAGCATAGCATATATGCTTTCAAAATAGAACAGGCTCGAAAGAGCTAACCTACA +TCAGCAGGGAAGTGCTGTTTAGATAGCTGGTTTTACCAGTGATTAGAGTATTTTAAGATA +TTAAGGTTTCTAATGACTGAATAAAACAG +>SRR029699.30831 EYTWXA302GEDQR +GCTTATGGGACACACCCGTTCCCATCCCGAACACGACGGTTAAGACGTAAGCGGCCGATG +GTACTATGCTGGAGACGGCATGGGAGAGCAGGTGGGTGCCGGACTGAGACACGCAACAGG +GGATAGGCAAGGCACACAGGGGATAGG +>SRR029699.33623 EYTWXA302HT3FX +GACTACGAGGTTGATAGGCACGATGTGTAAGTGGAGCGATCCATTCAGCAAGTGTGTACT +AATAGATCGAGGGCTTGACCACAATTCGCTTGAATTCTCAAGTCAATGACAAAATGTTAG +CAGTGATTATTCAGTTTTGAAGGCACGTCCTTCAAGAAATACTGGACAAAGTAAAACAGA +AATGTTATACTGAACCAGTCATATTGGTCGGTGACGATGACGGTGAGGTTCCACCTGTTC +CCATTCCGAACACAG +>SRR029699.38075 EYTWXA302GR8DJ +ACTTTATCAAGATACCAAGTGGAGAATACGAGATTCGAACTCGTGACCTCCTGCTTGCAA +GGCAGGCGCTCTCCCAACTGAGCTAATCCCCCAAGGGAATCCGGCAGCCACCTGCTCTCC +CATGCCATCTCCAGCATAGTACCATCGGCCGCTCAGGTCTTAACCATCGTGTTCGGGATG +GGAACGGGTGTGTCCCCTGAGCGCATCGCCACCGGAAATATCTTATCAAGTTTTTGCTTG +ATAACTGAATAAAC \ No newline at end of file diff -r 000000000000 -r 04699558a38a test-data/mapping_file.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mapping_file.txt Fri Apr 15 06:04:56 2016 -0400 @@ -0,0 +1,7 @@ +SRR029699.4565 AAAAAA +SRR029699.5673 CCCCCC +SRR029699.5714 TTTTTT +SRR029699.14351 GGGGGG +SRR029699.30831 AAAAAA +SRR029699.33623 CCCCCC +SRR029699.38075 TTTTTT diff -r 000000000000 -r 04699558a38a test-data/output_test.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_test.fasta Fri Apr 15 06:04:56 2016 -0400 @@ -0,0 +1,33 @@ +>SRR029699.4565 +AAAAAAACGGCTCGGTGCCGTCCACCGGTCAGCGGCGCCCTGGCCTCCCACGGGCTGACC +CCGCAGTACTCTCGGCGCGATGGGGCTTAGCTTCCGGGTTCGGAACGGGACCGGGCGTGC +CCC +>SRR029699.5673 +CCCCCCGACCAGAACAACGCCGAAGATCAAAACGGTGAAAACAACGGCGAACAGGGAGGA +AACGAATAATGAAGAAGCTTGTTCTTTTAGCCCTCGGGGCCTTATTGTTAGCGGGCGCCA +CCTGCTCTCCCACACCGTCTCCAGTGCAGTACCATCGGCCGCTTGGGTCTTAACCATCGT +GTTCGGGATGGGAACGGGTGTGTCCCCCAAGCGCATCGCCACCAGCAGTAGTTATCCTAG +TTTTTGAAACTGTTAAAGTCTACGGCTCATCGTCCGTTACCTTAATAACTAAACAGTACG +TAAAACCC +>SRR029699.5714 +TTTTTTATCCCGAACACGATGGTAAAGACCTAAGCGGCCGATGGTACTGCACTGGAGACG +GTGTGGGAGAGTAGGTGGCCGCCAAATTAAAAAGAAAATAAACCGAGAGGTTCTGCGATA +AAGCTGGTCTTCACCAGTGATCAGAGTTTAAAAGAAAGCTTTTAGTCTCTGATGACTGAT +GAAAAAGTCAGTCAGATGAAGAACTGAATAAGGACTTCATCTTATGTACCTTGAAAACTG +CATATAGTAAAAATCAATAGATTTAGATAAATA +>SRR029699.14351 +GGGGGGCGGTGGCGATGCGCTTTTGGGAAACACCCGTCCACATCCCGAACACGATGGTTA +AGACTGAAGCGGCCGATAATACTATACTGGTAACGGTATGGGAAGGTAGGTGGCTGCCGG +ATTTATAAAGAACAGCATAGCATATATGCTTTCAAAATAGAACAGGCTCGAAAGAGCTAA +CCTACATCAGCAGGGAAGTGCTGTTTAGATAGCTGGTTTTACCAGTGATTAGAGTATTTT +AAGATATTAAGGTTTCTAATGACTGAATAAAACAG +>SRR029699.30831 +AAAAAAGCTTATGGGACACACCCGTTCCCATCCCGAACACGACGGTTAAGACGTAAGCGG +CCGATGGTACTATGCTGGAGACGGCATGGGAGAGCAGGTGGGTGCCGGACTGAGACACGC +AACAGGGGATAGGCAAGGCACACAGGGGATAGG +>SRR029699.33623 +CCCCCCGACTACGAGGTTGATAGGCACGATGTGTAAGTGGAGCGATCCATTCAGCAAGTG +TGTACTAATAGATCGAGGGCTTGACCACAATTCGCTTGAATTCTCAAGTCAATGACAAAA +TGTTAGCAGTGATTATTCAGTTTTGAAGGCACGTCCTTCAAGAAATACTGGACAAAGTAA +AACAGAAATGTTATACTGAACCAGTCATATTGGTCGGTGACGATGACGGTGAGGTTCCAC +CTGTTCCCATTCCGAACACAG