Mercurial > repos > iuc > ivar_variants
changeset 20:5e5dfc9e71e2 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 97f230215d53e71748c78cd21633d92143710b94
author | iuc |
---|---|
date | Wed, 06 Aug 2025 08:23:57 +0000 |
parents | 1ef68546d37f |
children | |
files | macros.xml prepare_amplicon_info.py sanitize_bed.py test-data/zika/db/zika_primers.bed test-data/zika/db/zika_primers_consensus.bed write_amplicon_info_file.py |
diffstat | 6 files changed, 7 insertions(+), 144 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Thu Mar 13 09:04:01 2025 +0000 +++ b/macros.xml Wed Aug 06 08:23:57 2025 +0000 @@ -1,17 +1,18 @@ <?xml version="1.0"?> <macros> <token name="@TOOL_VERSION@">1.4.4</token> - <token name="@PROFILE@">21.01</token> + <token name="@PROFILE@">23.0</token> <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">ivar</requirement> - <requirement type="package" version="3.11.9">python</requirement> - <requirement type="package" version="1.21">samtools</requirement> + <requirement type="package" version="3.12">python</requirement> + <requirement type="package" version="1.22">samtools</requirement> + <requirement type="package" version="4.9">sed</requirement> <yield/> </requirements> </xml> <xml name="version_command"> - <version_command>ivar version | grep version</version_command> + <version_command>ivar version | sed -n '1p'</version_command> </xml> <xml name="xrefs"> <xrefs>
--- a/prepare_amplicon_info.py Thu Mar 13 09:04:01 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -#!/usr/bin/env python - -# extends ivar trim's amplicon info parsing abilities -# to include calculation of amplicon regions from -# sets of nested (more than two) primers - -import sys - - -# parse primers and their start positions from BED file -primer_starts = {} -with open(sys.argv[1]) as i: - for line in i: - line = line.strip() - if not line: - continue - f = line.split('\t') - try: - if f[5] == '+': - primer_starts[f[3]] = int(f[1]) - elif f[5] == '-': - primer_starts[f[3]] = int(f[2]) - 1 - else: - raise ValueError() - except (IndexError, ValueError): - sys.exit( - 'Primer BED file needs to be TAB-separated with the ' - 'following columns: ' - 'chrom, chromStart, chromEnd, name, score, strand, ' - 'where "chromStart", "chromEnd" need to be integer values ' - 'and "strand" needs to be either "+" or "-".' - ) - -# parse amplicon info and record outer primer names -with open(sys.argv[2]) as i: - ret_lines = [] - for line in i: - line = line.strip() - if not line: - continue - first = last = None - for pname in line.split('\t'): - try: - primer_start = primer_starts[pname] - except KeyError: - sys.exit( - 'Amplicon info with primer name not found in ' - f'primer BED file: "{pname}"' - ) - if first is None or primer_start < primer_starts[first]: - first = pname - if last is None or primer_start > primer_starts[last]: - last = pname - if first == last: - sys.exit( - line - + 'is not a proper amplicon info line.' - ) - ret_lines.append(f'{first}\t{last}\n') - -# write amended amplicon info -with open(sys.argv[3], 'w') as o: - o.writelines(ret_lines)
--- a/sanitize_bed.py Thu Mar 13 09:04:01 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/usr/bin/env python - -import sys - - -with open(sys.argv[1]) as i: - bed_data = i.readlines() - -sanitized_data = [] -try: - for record in bed_data: - if record.strip(): - fields = record.split('\t') - sanitized_data.append( - '\t'.join(fields[:4] + ['60'] + fields[5:]) - ) -except IndexError: - pass # leave column number issue to getmasked -else: - with open(sys.argv[1], 'w') as o: - o.writelines(sanitized_data)
--- a/test-data/zika/db/zika_primers.bed Thu Mar 13 09:04:01 2025 +0000 +++ b/test-data/zika/db/zika_primers.bed Wed Aug 06 08:23:57 2025 +0000 @@ -59,7 +59,7 @@ PRV 9184 9206 400_30_out_R 60 - PRV 9052 9074 400_31_out_L 60 + PRV 9473 9495 400_31_out_R 60 - -PRV 9336 9358 400_32_out_L 30 + +PRV 9336 9358 400_32_out_L 60 + PRV 9755 9777 400_32_out_R 60 - PRV 9637 9659 400_33_out_L* 60 + PRV 10104 10126 400_33_out_R* 60 -
--- a/test-data/zika/db/zika_primers_consensus.bed Thu Mar 13 09:04:01 2025 +0000 +++ b/test-data/zika/db/zika_primers_consensus.bed Wed Aug 06 08:23:57 2025 +0000 @@ -58,7 +58,7 @@ Consensus_Z52.consensus_threshold_0_quality_20 8621 8643 400_29_out_R 60 - Consensus_Z52.consensus_threshold_0_quality_20 8800 8822 400_31_out_L 60 + Consensus_Z52.consensus_threshold_0_quality_20 8932 8954 400_30_out_R 60 - -Consensus_Z52.consensus_threshold_0_quality_20 9084 9106 400_32_out_L 30 + +Consensus_Z52.consensus_threshold_0_quality_20 9084 9106 400_32_out_L 60 + Consensus_Z52.consensus_threshold_0_quality_20 9221 9243 400_31_out_R 60 - Consensus_Z52.consensus_threshold_0_quality_20 9385 9407 400_33_out_L* 60 + Consensus_Z52.consensus_threshold_0_quality_20 9503 9525 400_32_out_R 60 -
--- a/write_amplicon_info_file.py Thu Mar 13 09:04:01 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,54 +0,0 @@ -#!/usr/bin/env python - -import argparse -import re - - -AMPLICON_PAT = re.compile(r'.*_(?P<num>\d+).*_(?P<name>L(?:EFT)?|R(?:IGHT)?)') - - -def write_amplicon_info_file(bed_file, amplicon_info_file): - amplicon_sets = {} - for line in bed_file: - line = line.strip() - if not line: - continue - fields = line.split('\t') - start = int(fields[1]) - name = fields[3] - re_match = AMPLICON_PAT.match(name) - if re_match is None: - raise ValueError( - '{} does not match expected amplicon name format'.format(name) - ) - amplicon_id = int(re_match.group('num')) - amplicon_set = amplicon_sets.get(amplicon_id, []) - amplicon_set.append((name, start)) - amplicon_sets[amplicon_id] = amplicon_set - - # write amplicons sorted by number with primers sorted by start position - for id in sorted(amplicon_sets): - amplicon_info = '\t'.join( - [name for name, start in sorted( - amplicon_sets[id], key=lambda x: x[1] - )] - ) + '\n' - amplicon_info_file.write(amplicon_info) - amplicon_info_file.close() - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description='Write an amplicon info file for iVar ' - 'from a BED file describing primer positions' - ) - parser.add_argument( - 'bed_file', type=argparse.FileType(), help='Primer BED file' - ) - parser.add_argument( - 'amplicon_info_file', type=argparse.FileType('w'), - help='Output file: amplicon info file in TSV format' - ) - args = parser.parse_args() - - write_amplicon_info_file(args.bed_file, args.amplicon_info_file)