diff prepare_amplicon_info.py @ 5:5e668dc9f379 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b"
author iuc
date Wed, 19 May 2021 16:50:43 +0000
parents
children e319b5b65879
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prepare_amplicon_info.py	Wed May 19 16:50:43 2021 +0000
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+# extends ivar trim's amplicon info parsing abilities
+# to include calculation of amplicon regions from
+# sets of nested (more than two) primers
+
+import sys
+
+
+# parse primers and their start positions from BED file
+primer_starts = {}
+with open(sys.argv[1]) as i:
+    for line in i:
+        f = line.strip().split('\t')
+        try:
+            if f[5] == '+':
+                primer_starts[f[3]] = int(f[1])
+            elif f[5] == '-':
+                primer_starts[f[3]] = int(f[2]) - 1
+            else:
+                raise ValueError()
+        except (IndexError, ValueError):
+            sys.exit(
+                'Primer BED file needs to be TAB-separated with the '
+                'following columns: '
+                'chrom, chromStart, chromEnd, name, score, strand, '
+                'where "chromStart", "chromEnd" need to be integer values '
+                'and "strand" needs to be either "+" or "-".'
+            )
+
+# parse amplicon info and record outer primer names
+with open(sys.argv[2]) as i:
+    ret_lines = []
+    for line in i:
+        first = last = None
+        for pname in line.strip().split('\t'):
+            try:
+                primer_start = primer_starts[pname]
+            except KeyError:
+                sys.exit(
+                    'Amplicon info with primer name not found in '
+                    f'primer BED file: "{pname}"'
+                )
+            if first is None or primer_start < primer_starts[first]:
+                first = pname
+            if last is None or primer_start > primer_starts[last]:
+                last = pname
+        if first == last:
+            sys.exit(
+                line
+                + 'is not a proper amplicon info line.'
+            )
+        ret_lines.append(f'{first}\t{last}\n')
+
+# write amended amplicon info
+with open(sys.argv[3], 'w') as o:
+    o.writelines(ret_lines)