annotate trimal_repo/scripts/generateRandomAlignmentsUsingAsSeedRealAlignments.py @ 0:b15a3147e604 draft

"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
author padge
date Fri, 25 Mar 2022 17:10:43 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
1 #!/usr/bin/python
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
2 import os
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
3 import Bio
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
4 import sys
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
5 import random
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
6 import argparse
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
7 import numpy as np
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
8 from Bio import SeqIO
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
9
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
10 def splitSequence(seq, length = 80):
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
11 ''' Split a given sequence contained in one line into lines of size "length"
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
12 '''
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
13 return "\n".join([seq[i:i + length] for i in range(0, len(seq), length)])
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
14
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
15 if __name__ == "__main__":
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
16
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
17 parser = argparse.ArgumentParser()
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
18
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
19 parser.add_argument("-i", "--in", dest = "inFile", required = True, type = \
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
20 str, help = "Input Codon alignment")
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
21
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
22 parser.add_argument("-o", "--out", dest = "outFile", default = None, type = \
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
23 str, help = "Set output file")
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
24
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
25 parser.add_argument("-s", "--numb_sequences", dest = "numb_sequences", \
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
26 default = 2, type = int, help = "Set how many sequences the output "
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
27 + "alignment should contain")
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
28
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
29 parser.add_argument("-r", "--numb_residues", dest = "numb_residues", \
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
30 default = 100, type = int, help = "Set how many residues the output "
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
31 + "alignment should contain")
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
32
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
33 parser.add_argument("-f", "--input_format", dest = "inFormat", type = str, \
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
34 default = "fasta", help = "Set input alignment format")
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
35
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
36 parser.add_argument("-g", "--gap_symbol", dest = "gapSymbol", default = '-', \
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
37 type = str, help = "Define the gap symbol used in the input/output "
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
38 + "alignments")
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
39
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
40 parser.add_argument("-m", "--max_attempts", dest = "attempts", default = 10, \
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
41 type = int, help = "Define a maximum numnber of attempts when generating "
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
42 + "a random alignment before giving it up")
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
43
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
44 args = parser.parse_args()
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
45
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
46 ## Check input parameters
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
47 if not os.path.isfile(args.inFile):
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
48 sys.exit(("ERROR: Check input alignment file '%s'") % (args.inFile))
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
49
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
50 if args.numb_sequences < 2:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
51 sys.exit(("ERROR: Check input sequences '%s'") % (str(args.numb_sequences)))
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
52
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
53 if args.numb_residues < 2:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
54 sys.exit(("ERROR: Check input residues '%s'") % (str(args.numb_residues)))
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
55
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
56 if args.attempts < 1:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
57 sys.exit(("ERROR: Check max. number of attempts '%s'") % (str(args.attempts)))
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
58
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
59 ## Read input alignment and get some basic information from it e.g.
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
60 ## sequences names, residues number, etc.
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
61 algLen = -1
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
62 alignment = {}
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
63 for record in SeqIO.parse(args.inFile, args.inFormat):
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
64 seq = str(record.seq)
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
65 alignment.setdefault(record.id, seq)
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
66 if algLen == -1:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
67 algLen = len(seq)
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
68 if len(seq) != algLen:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
69 print("Detected Inconsistencies at Sequence's length", file = sys.stderr)
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
70
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
71 sequences = list(alignment.keys())
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
72 columns = list(range(algLen))
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
73
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
74 ## Select randomly sequences and columns from the input alignment to populate
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
75 ## the output alignment controlling there are not sequences nor columns
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
76 ## composed only by gaps.
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
77
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
78 ## This is an iterative process
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
79 selected_seqs = []
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
80 discarded_seqs = set()
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
81 selected_cols = []
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
82 discarded_cols = set()
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
83
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
84 ## Set a counter to control how many attempts are done for generating the
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
85 ## random alignment
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
86 max_attempts = 0
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
87 while True:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
88
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
89 while len(selected_seqs) < args.numb_sequences:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
90 selected = random.choice(sequences)
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
91 if not selected in discarded_seqs:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
92 selected_seqs.append(selected)
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
93
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
94 while len(selected_cols) < args.numb_residues:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
95 selected = random.choice(columns)
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
96 if not selected in discarded_cols:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
97 selected_cols.append(selected)
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
98
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
99 generated = {}
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
100 for seq in selected_seqs:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
101 if seq in generated:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
102 continue
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
103 ## We check generated sequences are not composed only by gaps
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
104 sequence = [alignment[seq][pos] for pos in selected_cols]
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
105 if set(sequence) - set([args.gapSymbol]) == set([]):
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
106 discarded_seqs.add(seq)
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
107 continue
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
108 generated.setdefault(seq, splitSequence("".join(sequence)))
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
109
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
110 ## We have to check there are not columns composed only by gaps
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
111 for column in range(len(selected_cols)):
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
112 individual_column = [generated[seq][column] for seq in generated]
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
113 if set(individual_column) - set([args.gapSymbol]) == set([]):
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
114 discarded_cols.add(selected_cols[column])
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
115
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
116 ## We check which sequences/residues remain after controlling by those
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
117 ## composed only by gaps
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
118 selected_seqs = [s for s in selected_seqs if not s in discarded_seqs]
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
119 selected_cols = [c for c in selected_cols if not c in discarded_cols]
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
120
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
121 if len(selected_seqs) == args.numb_sequences and \
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
122 len(selected_cols) == args.numb_residues:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
123 break
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
124
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
125 max_attempts += 1
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
126 if max_attempts == args.attempts:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
127 sys.exit(("ERROR: Impossible to generate random alignment after '%s' "
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
128 + "attempts. Check configuration") % (args.attempts))
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
129
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
130 ## Produce the output aligment.
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
131 n = 1
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
132 ofile = open(args.outFile, "w") if args.outFile else sys.stdout
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
133
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
134 ## How to properly name output sequences including a padding to have
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
135 ## homogeneuous ids
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
136 padding = int(np.ceil(np.log10(args.numb_sequences)))
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
137 if args.numb_sequences % 10 == 0:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
138 padding += 1
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
139
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
140 for seq in selected_seqs:
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
141 print(">seq_%s\n%s" % (str(n).zfill(padding), generated[seq]), file = ofile)
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
142 n += 1
b15a3147e604 "planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff changeset
143 ofile.close()