changeset 5:49236b03e4fd draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
author iuc
date Wed, 19 May 2021 16:49:13 +0000
parents f95f403841ad
children 147465efa99c
files macros.xml prepare_amplicon_info.py
diffstat 2 files changed, 58 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Mon Jun 22 07:29:41 2020 -0400
+++ b/macros.xml	Wed May 19 16:49:13 2021 +0000
@@ -1,9 +1,8 @@
 <macros>
-  <token name="@VERSION@">1.2.2</token>
+  <token name="@VERSION@">1.3.1</token>
   <xml name="requirements">
     <requirements>
       <requirement type="package" version="@VERSION@">ivar</requirement>
-      <requirement type="package" version="1.9">samtools</requirement>
       <yield />
     </requirements>
   </xml>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prepare_amplicon_info.py	Wed May 19 16:49:13 2021 +0000
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+# extends ivar trim's amplicon info parsing abilities
+# to include calculation of amplicon regions from
+# sets of nested (more than two) primers
+
+import sys
+
+
+# parse primers and their start positions from BED file
+primer_starts = {}
+with open(sys.argv[1]) as i:
+    for line in i:
+        f = line.strip().split('\t')
+        try:
+            if f[5] == '+':
+                primer_starts[f[3]] = int(f[1])
+            elif f[5] == '-':
+                primer_starts[f[3]] = int(f[2]) - 1
+            else:
+                raise ValueError()
+        except (IndexError, ValueError):
+            sys.exit(
+                'Primer BED file needs to be TAB-separated with the '
+                'following columns: '
+                'chrom, chromStart, chromEnd, name, score, strand, '
+                'where "chromStart", "chromEnd" need to be integer values '
+                'and "strand" needs to be either "+" or "-".'
+            )
+
+# parse amplicon info and record outer primer names
+with open(sys.argv[2]) as i:
+    ret_lines = []
+    for line in i:
+        first = last = None
+        for pname in line.strip().split('\t'):
+            try:
+                primer_start = primer_starts[pname]
+            except KeyError:
+                sys.exit(
+                    'Amplicon info with primer name not found in '
+                    f'primer BED file: "{pname}"'
+                )
+            if first is None or primer_start < primer_starts[first]:
+                first = pname
+            if last is None or primer_start > primer_starts[last]:
+                last = pname
+        if first == last:
+            sys.exit(
+                line
+                + 'is not a proper amplicon info line.'
+            )
+        ret_lines.append(f'{first}\t{last}\n')
+
+# write amended amplicon info
+with open(sys.argv[3], 'w') as o:
+    o.writelines(ret_lines)