Mercurial > repos > padge > trimal
annotate trimal_repo/scripts/generateRandomAlignmentsUsingAsSeedRealAlignments.py @ 0:b15a3147e604 draft
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
author | padge |
---|---|
date | Fri, 25 Mar 2022 17:10:43 +0000 |
parents | |
children |
rev | line source |
---|---|
0
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
1 #!/usr/bin/python |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
2 import os |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
3 import Bio |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
4 import sys |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
5 import random |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
6 import argparse |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
7 import numpy as np |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
8 from Bio import SeqIO |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
9 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
10 def splitSequence(seq, length = 80): |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
11 ''' Split a given sequence contained in one line into lines of size "length" |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
12 ''' |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
13 return "\n".join([seq[i:i + length] for i in range(0, len(seq), length)]) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
14 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
15 if __name__ == "__main__": |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
16 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
17 parser = argparse.ArgumentParser() |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
18 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
19 parser.add_argument("-i", "--in", dest = "inFile", required = True, type = \ |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
20 str, help = "Input Codon alignment") |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
21 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
22 parser.add_argument("-o", "--out", dest = "outFile", default = None, type = \ |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
23 str, help = "Set output file") |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
24 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
25 parser.add_argument("-s", "--numb_sequences", dest = "numb_sequences", \ |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
26 default = 2, type = int, help = "Set how many sequences the output " |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
27 + "alignment should contain") |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
28 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
29 parser.add_argument("-r", "--numb_residues", dest = "numb_residues", \ |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
30 default = 100, type = int, help = "Set how many residues the output " |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
31 + "alignment should contain") |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
32 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
33 parser.add_argument("-f", "--input_format", dest = "inFormat", type = str, \ |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
34 default = "fasta", help = "Set input alignment format") |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
35 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
36 parser.add_argument("-g", "--gap_symbol", dest = "gapSymbol", default = '-', \ |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
37 type = str, help = "Define the gap symbol used in the input/output " |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
38 + "alignments") |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
39 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
40 parser.add_argument("-m", "--max_attempts", dest = "attempts", default = 10, \ |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
41 type = int, help = "Define a maximum numnber of attempts when generating " |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
42 + "a random alignment before giving it up") |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
43 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
44 args = parser.parse_args() |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
45 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
46 ## Check input parameters |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
47 if not os.path.isfile(args.inFile): |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
48 sys.exit(("ERROR: Check input alignment file '%s'") % (args.inFile)) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
49 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
50 if args.numb_sequences < 2: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
51 sys.exit(("ERROR: Check input sequences '%s'") % (str(args.numb_sequences))) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
52 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
53 if args.numb_residues < 2: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
54 sys.exit(("ERROR: Check input residues '%s'") % (str(args.numb_residues))) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
55 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
56 if args.attempts < 1: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
57 sys.exit(("ERROR: Check max. number of attempts '%s'") % (str(args.attempts))) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
58 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
59 ## Read input alignment and get some basic information from it e.g. |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
60 ## sequences names, residues number, etc. |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
61 algLen = -1 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
62 alignment = {} |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
63 for record in SeqIO.parse(args.inFile, args.inFormat): |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
64 seq = str(record.seq) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
65 alignment.setdefault(record.id, seq) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
66 if algLen == -1: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
67 algLen = len(seq) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
68 if len(seq) != algLen: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
69 print("Detected Inconsistencies at Sequence's length", file = sys.stderr) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
70 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
71 sequences = list(alignment.keys()) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
72 columns = list(range(algLen)) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
73 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
74 ## Select randomly sequences and columns from the input alignment to populate |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
75 ## the output alignment controlling there are not sequences nor columns |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
76 ## composed only by gaps. |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
77 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
78 ## This is an iterative process |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
79 selected_seqs = [] |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
80 discarded_seqs = set() |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
81 selected_cols = [] |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
82 discarded_cols = set() |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
83 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
84 ## Set a counter to control how many attempts are done for generating the |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
85 ## random alignment |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
86 max_attempts = 0 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
87 while True: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
88 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
89 while len(selected_seqs) < args.numb_sequences: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
90 selected = random.choice(sequences) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
91 if not selected in discarded_seqs: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
92 selected_seqs.append(selected) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
93 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
94 while len(selected_cols) < args.numb_residues: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
95 selected = random.choice(columns) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
96 if not selected in discarded_cols: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
97 selected_cols.append(selected) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
98 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
99 generated = {} |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
100 for seq in selected_seqs: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
101 if seq in generated: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
102 continue |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
103 ## We check generated sequences are not composed only by gaps |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
104 sequence = [alignment[seq][pos] for pos in selected_cols] |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
105 if set(sequence) - set([args.gapSymbol]) == set([]): |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
106 discarded_seqs.add(seq) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
107 continue |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
108 generated.setdefault(seq, splitSequence("".join(sequence))) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
109 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
110 ## We have to check there are not columns composed only by gaps |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
111 for column in range(len(selected_cols)): |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
112 individual_column = [generated[seq][column] for seq in generated] |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
113 if set(individual_column) - set([args.gapSymbol]) == set([]): |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
114 discarded_cols.add(selected_cols[column]) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
115 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
116 ## We check which sequences/residues remain after controlling by those |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
117 ## composed only by gaps |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
118 selected_seqs = [s for s in selected_seqs if not s in discarded_seqs] |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
119 selected_cols = [c for c in selected_cols if not c in discarded_cols] |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
120 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
121 if len(selected_seqs) == args.numb_sequences and \ |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
122 len(selected_cols) == args.numb_residues: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
123 break |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
124 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
125 max_attempts += 1 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
126 if max_attempts == args.attempts: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
127 sys.exit(("ERROR: Impossible to generate random alignment after '%s' " |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
128 + "attempts. Check configuration") % (args.attempts)) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
129 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
130 ## Produce the output aligment. |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
131 n = 1 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
132 ofile = open(args.outFile, "w") if args.outFile else sys.stdout |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
133 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
134 ## How to properly name output sequences including a padding to have |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
135 ## homogeneuous ids |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
136 padding = int(np.ceil(np.log10(args.numb_sequences))) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
137 if args.numb_sequences % 10 == 0: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
138 padding += 1 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
139 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
140 for seq in selected_seqs: |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
141 print(">seq_%s\n%s" % (str(n).zfill(padding), generated[seq]), file = ofile) |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
142 n += 1 |
b15a3147e604
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
padge
parents:
diff
changeset
|
143 ofile.close() |