Mercurial > repos > iuc > fasta_stats
annotate fasta-stats.py @ 4:0dbb995c7d35 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
author | iuc |
---|---|
date | Thu, 18 Nov 2021 20:56:57 +0000 |
parents | |
children |
rev | line source |
---|---|
4
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
2 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
3 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
4 # python version of fasta-stats with some extra features |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
5 # written by anmol.kiran@gmail.com |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
6 # git: @codemeleon |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
7 # date: 10/11/2021 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
8 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
9 import argparse |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
10 import re |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
11 from os import path |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
12 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
13 import numpy as np |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
14 from Bio import SeqIO |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
15 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
16 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
17 def calculate_NG50(estimated_genome, total_length, sequence_lengths): |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
18 temp = 0 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
19 teoretical_NG50 = estimated_genome / 2.0 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
20 NG50 = 0 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
21 for seq in sequence_lengths: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
22 temp += seq |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
23 if teoretical_NG50 < temp: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
24 NG50 = seq |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
25 break |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
26 return NG50 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
27 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
28 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
29 def run(fasta, stats_output, gaps_output, genome_size): |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
30 """Generates scaffold statistics.""" |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
31 if not fasta: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
32 exit("Input file not given.") |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
33 if not path.isfile(fasta): |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
34 exit(f"{fasta} path does not exist.") |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
35 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
36 seq_len = {} |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
37 bases_global = {"A": 0, "N": 0, "T": 0, "C": 0, "G": 0} |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
38 bases_seq = {} |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
39 seq_id_Ngaprange = {} |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
40 nstart = 0 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
41 contigs_len = [] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
42 gap_count = 0 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
43 for seq_record in SeqIO.parse(fasta, "fasta"): |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
44 seq = str(seq_record.seq).upper() |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
45 # print(len(seq)) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
46 seq_len[seq_record.id] = len(seq) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
47 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
48 # NOTE: Nucleotide count |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
49 bases_seq[seq_record.id] = { |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
50 "A": seq.count("A"), |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
51 "N": seq.count("N"), |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
52 "T": seq.count("T"), |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
53 "C": seq.count("C"), |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
54 "G": seq.count("G"), |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
55 } |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
56 bases_global["A"] += bases_seq[seq_record.id]["A"] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
57 bases_global["N"] += bases_seq[seq_record.id]["N"] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
58 bases_global["T"] += bases_seq[seq_record.id]["T"] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
59 bases_global["C"] += bases_seq[seq_record.id]["C"] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
60 bases_global["G"] += bases_seq[seq_record.id]["G"] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
61 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
62 # NOTE: Gap count and their range |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
63 range_gen = re.finditer("N+", seq) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
64 n_range = [match.span() for match in range_gen] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
65 for n_rng in n_range: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
66 if n_rng[0] == 0 or n_rng[1] == seq_len[seq_record.id]: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
67 continue |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
68 else: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
69 gap_count += 1 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
70 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
71 # NOTE: Contigs, their lenths from scaffold and their N gap range |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
72 seq_id_Ngaprange[seq_record.id] = n_range |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
73 n_range_len = len(n_range) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
74 if n_range_len > 0: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
75 n_range = ( |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
76 [(0, 0)] + n_range + [(seq_len[seq_record.id], seq_len[seq_record.id])] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
77 ) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
78 for idx in range(n_range_len + 1): |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
79 nstart = n_range[idx][1] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
80 nend = n_range[idx + 1][0] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
81 con_len = nend - nstart |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
82 if con_len: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
83 contigs_len.append(con_len) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
84 else: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
85 contigs_len.append(len(seq)) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
86 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
87 # NOTE: Scaffold statistics |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
88 SEQ_LEN_LIST = sorted(seq_len.values(), reverse=True) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
89 scaffold_lens = np.array(SEQ_LEN_LIST) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
90 scaffold_lens_sum = np.cumsum(scaffold_lens) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
91 N50_len = scaffold_lens_sum[-1] * 0.5 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
92 N50_idx = np.where(scaffold_lens_sum > N50_len)[0][0] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
93 N90_len = scaffold_lens_sum[-1] * 0.9 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
94 N90_idx = np.where(scaffold_lens_sum > N90_len)[0][0] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
95 NG50 = calculate_NG50(genome_size, scaffold_lens_sum[-1], scaffold_lens) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
96 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
97 # NOTE: Contig statistics |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
98 seq_len_list = sorted(contigs_len, reverse=True) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
99 contigs_len = np.array(seq_len_list) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
100 contigs_len_sum = np.cumsum(contigs_len) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
101 n50_len = contigs_len_sum[-1] * 0.5 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
102 n50_idx = np.where(contigs_len_sum > n50_len)[0][0] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
103 n90_len = contigs_len_sum[-1] * 0.9 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
104 n90_idx = np.where(contigs_len_sum > n90_len)[0][0] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
105 ng50 = calculate_NG50(genome_size, contigs_len_sum[-1], contigs_len) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
106 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
107 with open(stats_output, "w") as soutput: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
108 soutput.write("{}\t{}\n".format("Scaffold L50", N50_idx + 1)) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
109 soutput.write("{}\t{}\n".format("Scaffold N50", SEQ_LEN_LIST[N50_idx])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
110 soutput.write("{}\t{}\n".format("Scaffold L90", N90_idx + 1)) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
111 soutput.write("{}\t{}\n".format("Scaffold N90", SEQ_LEN_LIST[N90_idx])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
112 if genome_size != 0: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
113 soutput.write("{}\t{}\n".format("Scaffold NG50", NG50)) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
114 soutput.write("{}\t{}\n".format("Scaffold len_max", SEQ_LEN_LIST[0])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
115 soutput.write("{}\t{}\n".format("Scaffold len_min", SEQ_LEN_LIST[-1])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
116 soutput.write( |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
117 "{}\t{}\n".format("Scaffold len_mean", int(np.mean(SEQ_LEN_LIST))) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
118 ) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
119 soutput.write( |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
120 "{}\t{}\n".format("Scaffold len_median", int(np.median(SEQ_LEN_LIST))) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
121 ) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
122 soutput.write("{}\t{}\n".format("Scaffold len_std", int(np.std(SEQ_LEN_LIST)))) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
123 soutput.write("{}\t{}\n".format("Scaffold num_A", bases_global["A"])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
124 soutput.write("{}\t{}\n".format("Scaffold num_T", bases_global["T"])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
125 soutput.write("{}\t{}\n".format("Scaffold num_C", bases_global["C"])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
126 soutput.write("{}\t{}\n".format("Scaffold num_G", bases_global["G"])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
127 soutput.write("{}\t{}\n".format("Scaffold num_N", bases_global["N"])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
128 soutput.write("{}\t{}\n".format("Scaffold num_bp", scaffold_lens_sum[-1])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
129 soutput.write( |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
130 "{}\t{}\n".format( |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
131 "Scaffold num_bp_not_N", scaffold_lens_sum[-1] - bases_global["N"] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
132 ) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
133 ) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
134 soutput.write("{}\t{}\n".format("Scaffold num_seq", len(SEQ_LEN_LIST))) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
135 soutput.write( |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
136 "{}\t{:.2f}\n".format( |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
137 "Scaffold GC content overall", |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
138 ( |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
139 (bases_global["G"] + bases_global["C"]) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
140 * 100.0 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
141 / scaffold_lens_sum[-1] |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
142 ), |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
143 ) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
144 ) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
145 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
146 soutput.write("{}\t{}\n".format("Contig L50", n50_idx + 1)) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
147 soutput.write("{}\t{}\n".format("Contig N50", seq_len_list[n50_idx])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
148 soutput.write("{}\t{}\n".format("Contig L90", n90_idx + 1)) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
149 soutput.write("{}\t{}\n".format("Contig N90", seq_len_list[n90_idx])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
150 if genome_size != 0: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
151 soutput.write("{}\t{}\n".format("Contig NG50", ng50)) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
152 soutput.write("{}\t{}\n".format("Contig len_max", seq_len_list[0])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
153 soutput.write("{}\t{}\n".format("Contig len_min", seq_len_list[-1])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
154 soutput.write("{}\t{}\n".format("Contig len_mean", int(np.mean(seq_len_list)))) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
155 soutput.write( |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
156 "{}\t{}\n".format("Contig len_median", int(np.median(seq_len_list))) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
157 ) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
158 soutput.write("{}\t{}\n".format("Contig len_std", int(np.std(seq_len_list)))) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
159 soutput.write("{}\t{}\n".format("Contig num_bp", contigs_len_sum[-1])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
160 soutput.write("{}\t{}\n".format("Contig num_seq", len(contigs_len_sum))) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
161 soutput.write("{}\t{}\n".format("Number of gaps", gap_count)) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
162 if gaps_output is not None: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
163 # NOTE: generate gaps statistics file |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
164 with open(gaps_output, "w") as goutput: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
165 for key in seq_id_Ngaprange: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
166 for rng in seq_id_Ngaprange[key]: |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
167 goutput.write("{}\t{}\t{}\n".format(key, rng[0], rng[1])) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
168 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
169 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
170 if __name__ == "__main__": |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
171 parser = argparse.ArgumentParser() |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
172 parser.add_argument("-f", "--fasta", required=True, help="FASTA file") |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
173 parser.add_argument( |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
174 "-z", |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
175 "--genome_size", |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
176 required=False, |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
177 type=int, |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
178 help="If provided, the NG50 statistic will be computed", |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
179 default=0, |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
180 ) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
181 parser.add_argument( |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
182 "-s", |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
183 "--stats_output", |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
184 required=True, |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
185 help="File to store the general statistics", |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
186 ) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
187 parser.add_argument( |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
188 "-r", |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
189 "--gaps_output", |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
190 required=False, |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
191 help="File to store the gaps statistics", |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
192 default=None, |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
193 ) |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
194 args = parser.parse_args() |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
195 |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
196 run( |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
197 args.fasta, |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
198 args.stats_output, |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
199 args.gaps_output, |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
200 args.genome_size, |
0dbb995c7d35
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 50f5cce5a8c11001e2c59600a2b99a4243b6d06f"
iuc
parents:
diff
changeset
|
201 ) |