Mercurial > repos > iuc > ivar_removereads
changeset 20:dec59e6af429 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 97f230215d53e71748c78cd21633d92143710b94
author | iuc |
---|---|
date | Wed, 06 Aug 2025 08:21:59 +0000 |
parents | 55a7867809f4 |
children | |
files | ivar_removereads.xml macros.xml prepare_amplicon_info.py sanitize_bed.py test-data/zika/db/zika_primers.bed test-data/zika/db/zika_primers_consensus.bed write_amplicon_info_file.py |
diffstat | 7 files changed, 19 insertions(+), 154 deletions(-) [+] |
line wrap: on
line diff
--- a/ivar_removereads.xml Thu Mar 13 09:04:37 2025 +0000 +++ b/ivar_removereads.xml Wed Aug 06 08:21:59 2025 +0000 @@ -1,21 +1,23 @@ -<tool id="ivar_removereads" name="ivar removereads" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@"> +<tool id="ivar_removereads" name="ivar removereads" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@"> <description>Remove reads from trimmed BAM file</description> <macros> <import>macros.xml</import> </macros> <expand macro="xrefs"/> - <expand macro="requirements"/> + <expand macro="requirements"> + <requirement type="package" version="0.1.0">viramp-hub</requirement> + </expand> <expand macro="version_command"/> <command detect_errors="exit_code"><![CDATA[ - cp '$input_bed' binding_sites.bed && - python '$__tool_directory__/sanitize_bed.py' binding_sites.bed && - #if $amplicons.computed == "yes" - python '$__tool_directory__/write_amplicon_info_file.py' binding_sites.bed amplicon_info.tsv && - #else - ln -s '$amplicon_info' amplicon_info.tsv && + scheme-convert --to bed --bed-type ivar -o ivar.bed '$input_bed' && + #if $amplicons.computed == "yes": + scheme-convert --to amplicon-info -o amplicon_info.tsv ivar.bed && + #else: + ## just check the amplicon info file against the primer scheme + scheme-convert -a '$amplicon_info' --to amplicon-info -o amplicon_info.tsv ivar.bed && #end if ivar getmasked - -i '$variants_tsv' -b binding_sites.bed -f amplicon_info.tsv -p masked_primers && + -i '$variants_tsv' -b ivar.bed -f amplicon_info.tsv -p masked_primers && python '$__tool_directory__/completemask.py' masked_primers.txt amplicon_info.tsv && ln -s '$input_bam' sorted.bam && @@ -23,7 +25,7 @@ ivar removereads -i sorted.bam - -b binding_sites.bed + -b ivar.bed -p removed_reads.bam -t masked_primers.txt ]]></command>
--- a/macros.xml Thu Mar 13 09:04:37 2025 +0000 +++ b/macros.xml Wed Aug 06 08:21:59 2025 +0000 @@ -1,17 +1,18 @@ <?xml version="1.0"?> <macros> <token name="@TOOL_VERSION@">1.4.4</token> - <token name="@PROFILE@">21.01</token> + <token name="@PROFILE@">23.0</token> <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">ivar</requirement> - <requirement type="package" version="3.11.9">python</requirement> - <requirement type="package" version="1.21">samtools</requirement> + <requirement type="package" version="3.12">python</requirement> + <requirement type="package" version="1.22">samtools</requirement> + <requirement type="package" version="4.9">sed</requirement> <yield/> </requirements> </xml> <xml name="version_command"> - <version_command>ivar version | grep version</version_command> + <version_command>ivar version | sed -n '1p'</version_command> </xml> <xml name="xrefs"> <xrefs>
--- a/prepare_amplicon_info.py Thu Mar 13 09:04:37 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -#!/usr/bin/env python - -# extends ivar trim's amplicon info parsing abilities -# to include calculation of amplicon regions from -# sets of nested (more than two) primers - -import sys - - -# parse primers and their start positions from BED file -primer_starts = {} -with open(sys.argv[1]) as i: - for line in i: - line = line.strip() - if not line: - continue - f = line.split('\t') - try: - if f[5] == '+': - primer_starts[f[3]] = int(f[1]) - elif f[5] == '-': - primer_starts[f[3]] = int(f[2]) - 1 - else: - raise ValueError() - except (IndexError, ValueError): - sys.exit( - 'Primer BED file needs to be TAB-separated with the ' - 'following columns: ' - 'chrom, chromStart, chromEnd, name, score, strand, ' - 'where "chromStart", "chromEnd" need to be integer values ' - 'and "strand" needs to be either "+" or "-".' - ) - -# parse amplicon info and record outer primer names -with open(sys.argv[2]) as i: - ret_lines = [] - for line in i: - line = line.strip() - if not line: - continue - first = last = None - for pname in line.split('\t'): - try: - primer_start = primer_starts[pname] - except KeyError: - sys.exit( - 'Amplicon info with primer name not found in ' - f'primer BED file: "{pname}"' - ) - if first is None or primer_start < primer_starts[first]: - first = pname - if last is None or primer_start > primer_starts[last]: - last = pname - if first == last: - sys.exit( - line - + 'is not a proper amplicon info line.' - ) - ret_lines.append(f'{first}\t{last}\n') - -# write amended amplicon info -with open(sys.argv[3], 'w') as o: - o.writelines(ret_lines)
--- a/sanitize_bed.py Thu Mar 13 09:04:37 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/usr/bin/env python - -import sys - - -with open(sys.argv[1]) as i: - bed_data = i.readlines() - -sanitized_data = [] -try: - for record in bed_data: - if record.strip(): - fields = record.split('\t') - sanitized_data.append( - '\t'.join(fields[:4] + ['60'] + fields[5:]) - ) -except IndexError: - pass # leave column number issue to getmasked -else: - with open(sys.argv[1], 'w') as o: - o.writelines(sanitized_data)
--- a/test-data/zika/db/zika_primers.bed Thu Mar 13 09:04:37 2025 +0000 +++ b/test-data/zika/db/zika_primers.bed Wed Aug 06 08:21:59 2025 +0000 @@ -59,7 +59,7 @@ PRV 9184 9206 400_30_out_R 60 - PRV 9052 9074 400_31_out_L 60 + PRV 9473 9495 400_31_out_R 60 - -PRV 9336 9358 400_32_out_L 30 + +PRV 9336 9358 400_32_out_L 60 + PRV 9755 9777 400_32_out_R 60 - PRV 9637 9659 400_33_out_L* 60 + PRV 10104 10126 400_33_out_R* 60 -
--- a/test-data/zika/db/zika_primers_consensus.bed Thu Mar 13 09:04:37 2025 +0000 +++ b/test-data/zika/db/zika_primers_consensus.bed Wed Aug 06 08:21:59 2025 +0000 @@ -58,7 +58,7 @@ Consensus_Z52.consensus_threshold_0_quality_20 8621 8643 400_29_out_R 60 - Consensus_Z52.consensus_threshold_0_quality_20 8800 8822 400_31_out_L 60 + Consensus_Z52.consensus_threshold_0_quality_20 8932 8954 400_30_out_R 60 - -Consensus_Z52.consensus_threshold_0_quality_20 9084 9106 400_32_out_L 30 + +Consensus_Z52.consensus_threshold_0_quality_20 9084 9106 400_32_out_L 60 + Consensus_Z52.consensus_threshold_0_quality_20 9221 9243 400_31_out_R 60 - Consensus_Z52.consensus_threshold_0_quality_20 9385 9407 400_33_out_L* 60 + Consensus_Z52.consensus_threshold_0_quality_20 9503 9525 400_32_out_R 60 -
--- a/write_amplicon_info_file.py Thu Mar 13 09:04:37 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,54 +0,0 @@ -#!/usr/bin/env python - -import argparse -import re - - -AMPLICON_PAT = re.compile(r'.*_(?P<num>\d+).*_(?P<name>L(?:EFT)?|R(?:IGHT)?)') - - -def write_amplicon_info_file(bed_file, amplicon_info_file): - amplicon_sets = {} - for line in bed_file: - line = line.strip() - if not line: - continue - fields = line.split('\t') - start = int(fields[1]) - name = fields[3] - re_match = AMPLICON_PAT.match(name) - if re_match is None: - raise ValueError( - '{} does not match expected amplicon name format'.format(name) - ) - amplicon_id = int(re_match.group('num')) - amplicon_set = amplicon_sets.get(amplicon_id, []) - amplicon_set.append((name, start)) - amplicon_sets[amplicon_id] = amplicon_set - - # write amplicons sorted by number with primers sorted by start position - for id in sorted(amplicon_sets): - amplicon_info = '\t'.join( - [name for name, start in sorted( - amplicon_sets[id], key=lambda x: x[1] - )] - ) + '\n' - amplicon_info_file.write(amplicon_info) - amplicon_info_file.close() - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description='Write an amplicon info file for iVar ' - 'from a BED file describing primer positions' - ) - parser.add_argument( - 'bed_file', type=argparse.FileType(), help='Primer BED file' - ) - parser.add_argument( - 'amplicon_info_file', type=argparse.FileType('w'), - help='Output file: amplicon info file in TSV format' - ) - args = parser.parse_args() - - write_amplicon_info_file(args.bed_file, args.amplicon_info_file)