Mercurial > repos > iuc > ivar_trim

--- a/ivar_trim.xml	Thu Mar 13 09:02:49 2025 +0000
+++ b/ivar_trim.xml	Wed Aug 06 08:21:20 2025 +0000
@@ -1,32 +1,33 @@
-<tool id="ivar_trim" name="ivar trim" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
+<tool id="ivar_trim" name="ivar trim" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@">
     <description>Trim reads in aligned BAM</description>
     <macros>
         <import>macros.xml</import>
     </macros>
     <expand macro="xrefs"/>
-    <expand macro="requirements"/>
+    <expand macro="requirements">
+        <requirement type="package" version="0.1.0">viramp-hub</requirement>
+    </expand>
     <expand macro="version_command"/>
     <command detect_errors="exit_code"><![CDATA[
+        ## Prepare primer scheme and, if necessary, also amplicon info file
         #if $primer.source == 'history'
-            cp '$primer.input_bed' bed.bed &&
+            ln -s '$primer.input_bed' bed.bed &&
         #else
-            cp '$primer.cached_bed.fields.path' bed.bed &&
+            ln -s '$primer.cached_bed.fields.path' bed.bed &&
         #end if
-        python '$__tool_directory__/sanitize_bed.py' bed.bed &&
-        #if $amplicons.filter_by == 'yes' or $amplicons.filter_by == 'yes_compute'
-            #if $amplicons.filter_by == 'yes_compute':
-                python '$__tool_directory__/write_amplicon_info_file.py' bed.bed amplicon_info_raw.tsv &&
-            #else
-                ln -s '$amplicons.amplicon_info' amplicon_info_raw.tsv &&
-            #end if
-            python '$__tool_directory__/prepare_amplicon_info.py' bed.bed amplicon_info_raw.tsv amplicon_info.tsv &&
+        scheme-convert --to bed --bed-type ivar -o ivar.bed bed.bed &&
+        #if $amplicons.filter_by == 'yes_compute':
+            scheme-convert --to amplicon-info -r outer -o amplicon_info.tsv ivar.bed &&
+        #elif $amplicons.filter_by == 'yes':
+            ## just check the amplicon info file against the primer scheme and reduce it to its outer primers
+            scheme-convert -a '$amplicons.amplicon_info' --to amplicon-info -r outer -o amplicon_info.tsv ivar.bed &&
         #end if
         ln -s '$input_bam' sorted.bam &&
         ln -s '${input_bam.metadata.bam_index}' sorted.bam.bai &&

         ivar trim
         -i sorted.bam
-        -b bed.bed
+        -b ivar.bed
         #if $amplicons.filter_by == 'yes' or $amplicons.filter_by == 'yes_compute'
             -f amplicon_info.tsv
         #end if
@@ -113,7 +114,7 @@
         <test>
             <!-- Test with primer bed file that needs to be sanitized -->
             <param name="input_bam" value="covid19/PC00101P_sub.sorted.bam" />
-            <param name="input_bed" value="covid19/ARTIC-V1-bad.bed" />
+            <param name="input_bed" ftype="bed" value="covid19/ARTIC-V1-bad.bed" />
             <param name="inc_primers" value="true" />
             <conditional name="trimmed_length">
                 <param name="filter" value="custom" />
@@ -180,7 +181,7 @@
                 <param name="min_len" value="30" />
             </conditional>
             <assert_command>
-                <has_text text="write_amplicon_info_file" />
+                <has_text text="scheme-convert --to amplicon-info -r outer" />
             </assert_command>
             <output name="output_bam" file="sars-cov-2/sars_cov2_trimmed.bam" compare="sim_size" delta="100000"/>
         </test>
--- a/macros.xml	Thu Mar 13 09:02:49 2025 +0000
+++ b/macros.xml	Wed Aug 06 08:21:20 2025 +0000
@@ -1,17 +1,18 @@
 <?xml version="1.0"?>
 <macros>
   <token name="@TOOL_VERSION@">1.4.4</token>
-  <token name="@PROFILE@">21.01</token>
+  <token name="@PROFILE@">23.0</token>
   <xml name="requirements">
   <requirements>
       <requirement type="package" version="@TOOL_VERSION@">ivar</requirement>
-      <requirement type="package" version="3.11.9">python</requirement>
-      <requirement type="package" version="1.21">samtools</requirement>
+      <requirement type="package" version="3.12">python</requirement>
+      <requirement type="package" version="1.22">samtools</requirement>
+      <requirement type="package" version="4.9">sed</requirement>
       <yield/>
   </requirements>
   </xml>
   <xml name="version_command">
-    <version_command>ivar version | grep version</version_command>
+    <version_command>ivar version | sed -n '1p'</version_command>
   </xml>
   <xml name="xrefs">
     <xrefs>
--- a/prepare_amplicon_info.py	Thu Mar 13 09:02:49 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-#!/usr/bin/env python
-
-# extends ivar trim's amplicon info parsing abilities
-# to include calculation of amplicon regions from
-# sets of nested (more than two) primers
-
-import sys
-
-
-# parse primers and their start positions from BED file
-primer_starts = {}
-with open(sys.argv[1]) as i:
-    for line in i:
-        line = line.strip()
-        if not line:
-            continue
-        f = line.split('\t')
-        try:
-            if f[5] == '+':
-                primer_starts[f[3]] = int(f[1])
-            elif f[5] == '-':
-                primer_starts[f[3]] = int(f[2]) - 1
-            else:
-                raise ValueError()
-        except (IndexError, ValueError):
-            sys.exit(
-                'Primer BED file needs to be TAB-separated with the '
-                'following columns: '
-                'chrom, chromStart, chromEnd, name, score, strand, '
-                'where "chromStart", "chromEnd" need to be integer values '
-                'and "strand" needs to be either "+" or "-".'
-            )
-
-# parse amplicon info and record outer primer names
-with open(sys.argv[2]) as i:
-    ret_lines = []
-    for line in i:
-        line = line.strip()
-        if not line:
-            continue
-        first = last = None
-        for pname in line.split('\t'):
-            try:
-                primer_start = primer_starts[pname]
-            except KeyError:
-                sys.exit(
-                    'Amplicon info with primer name not found in '
-                    f'primer BED file: "{pname}"'
-                )
-            if first is None or primer_start < primer_starts[first]:
-                first = pname
-            if last is None or primer_start > primer_starts[last]:
-                last = pname
-        if first == last:
-            sys.exit(
-                line
-                + 'is not a proper amplicon info line.'
-            )
-        ret_lines.append(f'{first}\t{last}\n')
-
-# write amended amplicon info
-with open(sys.argv[3], 'w') as o:
-    o.writelines(ret_lines)
--- a/sanitize_bed.py	Thu Mar 13 09:02:49 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-
-with open(sys.argv[1]) as i:
-    bed_data = i.readlines()
-
-sanitized_data = []
-try:
-    for record in bed_data:
-        if record.strip():
-            fields = record.split('\t')
-            sanitized_data.append(
-                '\t'.join(fields[:4] + ['60'] + fields[5:])
-            )
-except IndexError:
-    pass  # leave column number issue to getmasked
-else:
-    with open(sys.argv[1], 'w') as o:
-        o.writelines(sanitized_data)
--- a/test-data/zika/db/zika_primers.bed	Thu Mar 13 09:02:49 2025 +0000
+++ b/test-data/zika/db/zika_primers.bed	Wed Aug 06 08:21:20 2025 +0000
@@ -59,7 +59,7 @@
 PRV	9184	9206	400_30_out_R	60	-
 PRV	9052	9074	400_31_out_L	60	+
 PRV	9473	9495	400_31_out_R	60	-
-PRV	9336	9358	400_32_out_L	30	+
+PRV	9336	9358	400_32_out_L	60	+
 PRV	9755	9777	400_32_out_R	60	-
 PRV	9637	9659	400_33_out_L*	60	+
 PRV	10104	10126	400_33_out_R*	60	-
--- a/test-data/zika/db/zika_primers_consensus.bed	Thu Mar 13 09:02:49 2025 +0000
+++ b/test-data/zika/db/zika_primers_consensus.bed	Wed Aug 06 08:21:20 2025 +0000
@@ -58,7 +58,7 @@
 Consensus_Z52.consensus_threshold_0_quality_20	8621	8643	400_29_out_R	60	-
 Consensus_Z52.consensus_threshold_0_quality_20	8800	8822	400_31_out_L	60	+
 Consensus_Z52.consensus_threshold_0_quality_20	8932	8954	400_30_out_R	60	-
-Consensus_Z52.consensus_threshold_0_quality_20	9084	9106	400_32_out_L	30	+
+Consensus_Z52.consensus_threshold_0_quality_20	9084	9106	400_32_out_L	60	+
 Consensus_Z52.consensus_threshold_0_quality_20	9221	9243	400_31_out_R	60	-
 Consensus_Z52.consensus_threshold_0_quality_20	9385	9407	400_33_out_L*	60	+
 Consensus_Z52.consensus_threshold_0_quality_20	9503	9525	400_32_out_R	60	-
--- a/write_amplicon_info_file.py	Thu Mar 13 09:02:49 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-#!/usr/bin/env python
-
-import argparse
-import re
-
-
-AMPLICON_PAT = re.compile(r'.*_(?P<num>\d+).*_(?P<name>L(?:EFT)?|R(?:IGHT)?)')
-
-
-def write_amplicon_info_file(bed_file, amplicon_info_file):
-    amplicon_sets = {}
-    for line in bed_file:
-        line = line.strip()
-        if not line:
-            continue
-        fields = line.split('\t')
-        start = int(fields[1])
-        name = fields[3]
-        re_match = AMPLICON_PAT.match(name)
-        if re_match is None:
-            raise ValueError(
-                '{} does not match expected amplicon name format'.format(name)
-            )
-        amplicon_id = int(re_match.group('num'))
-        amplicon_set = amplicon_sets.get(amplicon_id, [])
-        amplicon_set.append((name, start))
-        amplicon_sets[amplicon_id] = amplicon_set
-
-    # write amplicons sorted by number with primers sorted by start position
-    for id in sorted(amplicon_sets):
-        amplicon_info = '\t'.join(
-            [name for name, start in sorted(
-                amplicon_sets[id], key=lambda x: x[1]
-            )]
-        ) + '\n'
-        amplicon_info_file.write(amplicon_info)
-    amplicon_info_file.close()
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description='Write an amplicon info file for iVar '
-                    'from a BED file describing primer positions'
-    )
-    parser.add_argument(
-        'bed_file', type=argparse.FileType(), help='Primer BED file'
-    )
-    parser.add_argument(
-        'amplicon_info_file', type=argparse.FileType('w'),
-        help='Output file: amplicon info file in TSV format'
-    )
-    args = parser.parse_args()
-
-    write_amplicon_info_file(args.bed_file, args.amplicon_info_file)