13
|
1 import argparse
|
|
2
|
|
3 parser = argparse.ArgumentParser(description='Summary stats about contig scaffolding with RaGOO.')
|
|
4 parser.add_argument("index", metavar="<contigs.fasta.fai>", type=str, help="Samtools fasta index file for input contigs. If chimera breaking mode was used, this must be"
|
|
5 "the index file of the chimera broken contigs, which can be found in ragoo_output/chimera_break."
|
|
6 "The correct file to use is the file with the .intra.chimera.broken.fa suffix.")
|
|
7 parser.add_argument("groupings", metavar="<groupings.fofn>", type=str, help="file of file names for all *_groupings.txt produced by RaGOO. Single column with full path to each grouping file.")
|
|
8
|
|
9 args = parser.parse_args()
|
|
10 contigs_index = args.index
|
|
11 grouping_fofn = args.groupings
|
|
12
|
|
13 remaining_ctg = []
|
|
14 all_ctg_len = dict()
|
|
15 with open(contigs_index) as f:
|
|
16 for line in f:
|
|
17 L1 = line.split('\t')
|
|
18 all_ctg_len[L1[0]] = int(L1[1])
|
|
19 remaining_ctg.append(L1[0])
|
|
20
|
|
21 grouping_files = []
|
|
22 with open(grouping_fofn) as f:
|
|
23 for line in f:
|
|
24 grouping_files.append(line.rstrip())
|
|
25
|
|
26 num_ctg_localized = 0
|
|
27 num_bp_localized = 0
|
|
28
|
|
29 for group_file in grouping_files:
|
|
30 with open(group_file) as f:
|
|
31 for line in f:
|
|
32 L1 = line.split('\t')
|
|
33 header = L1[0].rstrip()
|
|
34 num_ctg_localized += 1
|
|
35 num_bp_localized += all_ctg_len[header]
|
|
36 assert header in remaining_ctg
|
|
37 remaining_ctg.pop(remaining_ctg.index(header))
|
|
38
|
|
39 num_ctg_unlocalized = 0
|
|
40 num_bp_unlocalized = 0
|
|
41 for ctg in remaining_ctg:
|
|
42 num_ctg_unlocalized += 1
|
|
43 num_bp_unlocalized += all_ctg_len[ctg]
|
|
44
|
|
45 print('%r contigs were localized by RaGOO' %(num_ctg_localized))
|
|
46 print('%r bp were localized by RaGOO' %(num_bp_localized))
|
|
47 print('%r contigs were unlocalized by RaGOO' %(num_ctg_unlocalized))
|
|
48 print('%r bp were unlocalized by RaGOO' %(num_bp_unlocalized))
|
|
49
|
|
50 print('%r %% of contigs were localized by RaGOO' %((num_ctg_localized/(num_ctg_localized + num_ctg_unlocalized))*100))
|
|
51 print('%r %% of bp were localized by RaGOO' %((num_bp_localized/(num_bp_localized + num_bp_unlocalized))*100))
|
|
52
|
|
53
|
|
54
|
|
55
|