Mercurial > repos > iuc > ivar_trim
changeset 22:6606a8c97889 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 97f230215d53e71748c78cd21633d92143710b94
author | iuc |
---|---|
date | Wed, 06 Aug 2025 08:21:20 +0000 |
parents | 7bd020da0ce5 |
children | |
files | ivar_trim.xml macros.xml prepare_amplicon_info.py sanitize_bed.py test-data/zika/db/zika_primers.bed test-data/zika/db/zika_primers_consensus.bed write_amplicon_info_file.py |
diffstat | 7 files changed, 23 insertions(+), 159 deletions(-) [+] |
line wrap: on
line diff
--- a/ivar_trim.xml Thu Mar 13 09:02:49 2025 +0000 +++ b/ivar_trim.xml Wed Aug 06 08:21:20 2025 +0000 @@ -1,32 +1,33 @@ -<tool id="ivar_trim" name="ivar trim" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@"> +<tool id="ivar_trim" name="ivar trim" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@"> <description>Trim reads in aligned BAM</description> <macros> <import>macros.xml</import> </macros> <expand macro="xrefs"/> - <expand macro="requirements"/> + <expand macro="requirements"> + <requirement type="package" version="0.1.0">viramp-hub</requirement> + </expand> <expand macro="version_command"/> <command detect_errors="exit_code"><![CDATA[ + ## Prepare primer scheme and, if necessary, also amplicon info file #if $primer.source == 'history' - cp '$primer.input_bed' bed.bed && + ln -s '$primer.input_bed' bed.bed && #else - cp '$primer.cached_bed.fields.path' bed.bed && + ln -s '$primer.cached_bed.fields.path' bed.bed && #end if - python '$__tool_directory__/sanitize_bed.py' bed.bed && - #if $amplicons.filter_by == 'yes' or $amplicons.filter_by == 'yes_compute' - #if $amplicons.filter_by == 'yes_compute': - python '$__tool_directory__/write_amplicon_info_file.py' bed.bed amplicon_info_raw.tsv && - #else - ln -s '$amplicons.amplicon_info' amplicon_info_raw.tsv && - #end if - python '$__tool_directory__/prepare_amplicon_info.py' bed.bed amplicon_info_raw.tsv amplicon_info.tsv && + scheme-convert --to bed --bed-type ivar -o ivar.bed bed.bed && + #if $amplicons.filter_by == 'yes_compute': + scheme-convert --to amplicon-info -r outer -o amplicon_info.tsv ivar.bed && + #elif $amplicons.filter_by == 'yes': + ## just check the amplicon info file against the primer scheme and reduce it to its outer primers + scheme-convert -a '$amplicons.amplicon_info' --to amplicon-info -r outer -o amplicon_info.tsv ivar.bed && #end if ln -s '$input_bam' sorted.bam && ln -s '${input_bam.metadata.bam_index}' sorted.bam.bai && ivar trim -i sorted.bam - -b bed.bed + -b ivar.bed #if $amplicons.filter_by == 'yes' or $amplicons.filter_by == 'yes_compute' -f amplicon_info.tsv #end if @@ -113,7 +114,7 @@ <test> <!-- Test with primer bed file that needs to be sanitized --> <param name="input_bam" value="covid19/PC00101P_sub.sorted.bam" /> - <param name="input_bed" value="covid19/ARTIC-V1-bad.bed" /> + <param name="input_bed" ftype="bed" value="covid19/ARTIC-V1-bad.bed" /> <param name="inc_primers" value="true" /> <conditional name="trimmed_length"> <param name="filter" value="custom" /> @@ -180,7 +181,7 @@ <param name="min_len" value="30" /> </conditional> <assert_command> - <has_text text="write_amplicon_info_file" /> + <has_text text="scheme-convert --to amplicon-info -r outer" /> </assert_command> <output name="output_bam" file="sars-cov-2/sars_cov2_trimmed.bam" compare="sim_size" delta="100000"/> </test>
--- a/macros.xml Thu Mar 13 09:02:49 2025 +0000 +++ b/macros.xml Wed Aug 06 08:21:20 2025 +0000 @@ -1,17 +1,18 @@ <?xml version="1.0"?> <macros> <token name="@TOOL_VERSION@">1.4.4</token> - <token name="@PROFILE@">21.01</token> + <token name="@PROFILE@">23.0</token> <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">ivar</requirement> - <requirement type="package" version="3.11.9">python</requirement> - <requirement type="package" version="1.21">samtools</requirement> + <requirement type="package" version="3.12">python</requirement> + <requirement type="package" version="1.22">samtools</requirement> + <requirement type="package" version="4.9">sed</requirement> <yield/> </requirements> </xml> <xml name="version_command"> - <version_command>ivar version | grep version</version_command> + <version_command>ivar version | sed -n '1p'</version_command> </xml> <xml name="xrefs"> <xrefs>
--- a/prepare_amplicon_info.py Thu Mar 13 09:02:49 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -#!/usr/bin/env python - -# extends ivar trim's amplicon info parsing abilities -# to include calculation of amplicon regions from -# sets of nested (more than two) primers - -import sys - - -# parse primers and their start positions from BED file -primer_starts = {} -with open(sys.argv[1]) as i: - for line in i: - line = line.strip() - if not line: - continue - f = line.split('\t') - try: - if f[5] == '+': - primer_starts[f[3]] = int(f[1]) - elif f[5] == '-': - primer_starts[f[3]] = int(f[2]) - 1 - else: - raise ValueError() - except (IndexError, ValueError): - sys.exit( - 'Primer BED file needs to be TAB-separated with the ' - 'following columns: ' - 'chrom, chromStart, chromEnd, name, score, strand, ' - 'where "chromStart", "chromEnd" need to be integer values ' - 'and "strand" needs to be either "+" or "-".' - ) - -# parse amplicon info and record outer primer names -with open(sys.argv[2]) as i: - ret_lines = [] - for line in i: - line = line.strip() - if not line: - continue - first = last = None - for pname in line.split('\t'): - try: - primer_start = primer_starts[pname] - except KeyError: - sys.exit( - 'Amplicon info with primer name not found in ' - f'primer BED file: "{pname}"' - ) - if first is None or primer_start < primer_starts[first]: - first = pname - if last is None or primer_start > primer_starts[last]: - last = pname - if first == last: - sys.exit( - line - + 'is not a proper amplicon info line.' - ) - ret_lines.append(f'{first}\t{last}\n') - -# write amended amplicon info -with open(sys.argv[3], 'w') as o: - o.writelines(ret_lines)
--- a/sanitize_bed.py Thu Mar 13 09:02:49 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/usr/bin/env python - -import sys - - -with open(sys.argv[1]) as i: - bed_data = i.readlines() - -sanitized_data = [] -try: - for record in bed_data: - if record.strip(): - fields = record.split('\t') - sanitized_data.append( - '\t'.join(fields[:4] + ['60'] + fields[5:]) - ) -except IndexError: - pass # leave column number issue to getmasked -else: - with open(sys.argv[1], 'w') as o: - o.writelines(sanitized_data)
--- a/test-data/zika/db/zika_primers.bed Thu Mar 13 09:02:49 2025 +0000 +++ b/test-data/zika/db/zika_primers.bed Wed Aug 06 08:21:20 2025 +0000 @@ -59,7 +59,7 @@ PRV 9184 9206 400_30_out_R 60 - PRV 9052 9074 400_31_out_L 60 + PRV 9473 9495 400_31_out_R 60 - -PRV 9336 9358 400_32_out_L 30 + +PRV 9336 9358 400_32_out_L 60 + PRV 9755 9777 400_32_out_R 60 - PRV 9637 9659 400_33_out_L* 60 + PRV 10104 10126 400_33_out_R* 60 -
--- a/test-data/zika/db/zika_primers_consensus.bed Thu Mar 13 09:02:49 2025 +0000 +++ b/test-data/zika/db/zika_primers_consensus.bed Wed Aug 06 08:21:20 2025 +0000 @@ -58,7 +58,7 @@ Consensus_Z52.consensus_threshold_0_quality_20 8621 8643 400_29_out_R 60 - Consensus_Z52.consensus_threshold_0_quality_20 8800 8822 400_31_out_L 60 + Consensus_Z52.consensus_threshold_0_quality_20 8932 8954 400_30_out_R 60 - -Consensus_Z52.consensus_threshold_0_quality_20 9084 9106 400_32_out_L 30 + +Consensus_Z52.consensus_threshold_0_quality_20 9084 9106 400_32_out_L 60 + Consensus_Z52.consensus_threshold_0_quality_20 9221 9243 400_31_out_R 60 - Consensus_Z52.consensus_threshold_0_quality_20 9385 9407 400_33_out_L* 60 + Consensus_Z52.consensus_threshold_0_quality_20 9503 9525 400_32_out_R 60 -
--- a/write_amplicon_info_file.py Thu Mar 13 09:02:49 2025 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,54 +0,0 @@ -#!/usr/bin/env python - -import argparse -import re - - -AMPLICON_PAT = re.compile(r'.*_(?P<num>\d+).*_(?P<name>L(?:EFT)?|R(?:IGHT)?)') - - -def write_amplicon_info_file(bed_file, amplicon_info_file): - amplicon_sets = {} - for line in bed_file: - line = line.strip() - if not line: - continue - fields = line.split('\t') - start = int(fields[1]) - name = fields[3] - re_match = AMPLICON_PAT.match(name) - if re_match is None: - raise ValueError( - '{} does not match expected amplicon name format'.format(name) - ) - amplicon_id = int(re_match.group('num')) - amplicon_set = amplicon_sets.get(amplicon_id, []) - amplicon_set.append((name, start)) - amplicon_sets[amplicon_id] = amplicon_set - - # write amplicons sorted by number with primers sorted by start position - for id in sorted(amplicon_sets): - amplicon_info = '\t'.join( - [name for name, start in sorted( - amplicon_sets[id], key=lambda x: x[1] - )] - ) + '\n' - amplicon_info_file.write(amplicon_info) - amplicon_info_file.close() - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description='Write an amplicon info file for iVar ' - 'from a BED file describing primer positions' - ) - parser.add_argument( - 'bed_file', type=argparse.FileType(), help='Primer BED file' - ) - parser.add_argument( - 'amplicon_info_file', type=argparse.FileType('w'), - help='Output file: amplicon info file in TSV format' - ) - args = parser.parse_args() - - write_amplicon_info_file(args.bed_file, args.amplicon_info_file)