comparison blast_annotations_processor.py @ 3:ca2f07b71581 draft default tip

planemo upload for repository https://github.com/Onnodg/Naturalis_NLOOR/tree/main/NLOOR_scripts/process_annotations_tool commit 600e5a50a13a3a16a1970d6d4d31cb4f7bd549bf-dirty
author onnodg
date Thu, 12 Feb 2026 13:52:07 +0000
parents 9ca209477dfd
children
comparison
equal deleted inserted replaced
2:9ca209477dfd 3:ca2f07b71581
49 parser.add_argument('--header-anno', help='Output path for header annotations (tabular/xlsx)') 49 parser.add_argument('--header-anno', help='Output path for header annotations (tabular/xlsx)')
50 parser.add_argument('--log', help='Output path for log file (txt)', required=True) 50 parser.add_argument('--log', help='Output path for log file (txt)', required=True)
51 parser.add_argument('--filtered-fasta', required=True, 51 parser.add_argument('--filtered-fasta', required=True,
52 help='Filtered fasta file (fasta format) for downstream analysis') 52 help='Filtered fasta file (fasta format) for downstream analysis')
53 53
54 parser.add_argument('--uncertain-threshold', type=float, default=0.9, required=True, 54 parser.add_argument('--uncertain-threshold', type=float, default=1, required=True,
55 help='Threshold for resolving taxonomic conflicts (default: 0.9)') 55 help='Threshold for resolving taxonomic conflicts')
56 parser.add_argument('--eval-threshold', default='1e-10', type=float, required=True, 56 parser.add_argument('--eval-threshold', type=float, required=True,
57 help='E-value threshold for filtering results (default: 1e-10)') 57 help='E-value threshold for filtering results')
58 parser.add_argument('--use-counts', action='store_true', default=False, required=False, 58 parser.add_argument('--use-counts', action='store_true', default=False, required=False,
59 help='Use read counts in circular data') 59 help='Use read counts in circular data')
60 parser.add_argument('--ignore-rank', default='unknown', required=False, 60 parser.add_argument('--ignore-rank', default='', required=False,
61 help='Ignore rank when containing this text') 61 help='Ignore rank when containing this text')
62 parser.add_argument('--ignore-taxonomy', default='environmental', required=False, 62 parser.add_argument('--ignore-taxonomy', default='', required=False,
63 help="Don't use taxonomy containing this taxonomy") 63 help="Don't use taxonomy containing this taxonomy")
64 parser.add_argument('--bitscore-perc-cutoff', type=float, default=8, required=True, 64 parser.add_argument('--bitscore-perc-cutoff', type=float, default=0, required=True,
65 help='Bitscore percentage cutoff for considered hits') 65 help='Bitscore percentage cutoff for considered hits')
66 parser.add_argument('--min-bitscore', type=int, default=0, required=True, 66 parser.add_argument('--min-bitscore', type=int, default=0, required=True,
67 help='Minimum bitscore threshold for hits') 67 help='Minimum bitscore threshold for hits')
68 parser.add_argument('-iot', '--ignore-obiclean-type', type=str, default='singleton', required=False, 68 parser.add_argument('-iot', '--ignore-obiclean-type', type=str, default='singleton', required=False,
69 help='Ignore sequences with this obiclean type') 69 help='Ignore sequences with this obiclean type')
480 for line in f: 480 for line in f:
481 if line.startswith('>'): 481 if line.startswith('>'):
482 total_headers += 1 482 total_headers += 1
483 header_line = line.rstrip("\n") 483 header_line = line.rstrip("\n")
484 header = header_line.split()[0].strip('>') 484 header = header_line.split()[0].strip('>')
485 if "count=" in header_line: 485 if "obiclean_count={'XXX': " in header_line:
486 count = int(header_line.split("obiclean_count={'XXX': ")[1].split("}")[0])
487 elif "count=" in header_line:
486 count = int(header_line.split("count=")[1].split(";")[0]) 488 count = int(header_line.split("count=")[1].split(";")[0])
487 else: 489 else:
488 count = 0 490 count = 0
489 491
490 passes_header_filter = check_header_string(header_line, ignore_illuminapairend_type, 492 passes_header_filter = check_header_string(header_line, ignore_illuminapairend_type,
711 unanno_set = set(unanno_headers_ordered) 713 unanno_set = set(unanno_headers_ordered)
712 714
713 if not os.path.exists(anno_file_path): 715 if not os.path.exists(anno_file_path):
714 log_message(log_messages, f"Error: Input file {anno_file_path} not found") 716 log_message(log_messages, f"Error: Input file {anno_file_path} not found")
715 with open(args.log, 'w') as f: 717 with open(args.log, 'w') as f:
716 print('gaat nog niet goed hoor')
717 f.write("\n".join(log_messages)) 718 f.write("\n".join(log_messages))
718 return 719 return
719 720
720 log_message(log_messages, f"Reading BLAST annotations") 721 log_message(log_messages, f"Reading BLAST annotations")
721 blast_groups = parse_blast_output(anno_file_path, args.eval_threshold, args.ignore_taxonomy, args.ignore_rank, 722 blast_groups = parse_blast_output(anno_file_path, args.eval_threshold, args.ignore_taxonomy, args.ignore_rank,