# HG changeset patch # User iuc # Date 1621443087 0 # Node ID cf65217ad61c0c3af3005dad5b275a35d13cb69f # Parent db536ad45f28b77dddba7d9fa277dd320a676334 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit a5ff06c631a2a5a0d5d44edd6cb58a599d50918b" diff -r db536ad45f28 -r cf65217ad61c ivar_trim.xml --- a/ivar_trim.xml Mon Jun 22 07:30:46 2020 -0400 +++ b/ivar_trim.xml Wed May 19 16:51:27 2021 +0000 @@ -1,4 +1,4 @@ - + Trim reads in aligned BAM macros.xml @@ -8,23 +8,29 @@ @@ -47,10 +53,25 @@ + + + + + + + + + + + + - @@ -93,21 +114,42 @@ - + `_. - ]]> +iVar uses primer positions supplied in a BED file to soft clip primer +sequences from an aligned and sorted BAM file. Following this, the reads are +trimmed further based on a quality threshold. + +**Primer and Amplicon info** + +The tool requires information about primers and their binding sites in 6-column +BED format. The information from this file is used to decide whether any mapped +read in the BAM input ends with a primer sequence and should, thus, be +soft-clipped. + +Optionally, the tool can also discard reads that do not fully map to within any +amplicon. Such reads are likely to be wet-lab or mapping artefacts and removing +them can increase variant calling precision. To calculate the extent of +expected amplicons the tool requires an additional amplicon info dataset that +lists the names of primers that together form any given amplicon. Primer names +(exactly matching those in the primer info dataset) need to be TAB-separated +with one line per amplicon. +If the primer scheme has more than two primers contributing to a given amplicon +(in schemes using alternate primers), you can (in this Galaxy tool only) +specify all of them on one line and the tool will calculate the maximum extent +of the amplicon. + +**Quality trimming details** + +To do the quality trimming, iVar uses a sliding window approach. The window +slides from the 5' end to the 3' end and if at any point the average base +quality in the window falls below the threshold, the remaining read is soft +clipped. If after trimming, the length of the read is greater than the minimum +length specified, the read is written to the new trimmed BAM file. + +Documentation can be found at ``_. + ]]> diff -r db536ad45f28 -r cf65217ad61c macros.xml --- a/macros.xml Mon Jun 22 07:30:46 2020 -0400 +++ b/macros.xml Wed May 19 16:51:27 2021 +0000 @@ -1,9 +1,8 @@ - 1.2.2 + 1.3.1 ivar - samtools diff -r db536ad45f28 -r cf65217ad61c prepare_amplicon_info.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prepare_amplicon_info.py Wed May 19 16:51:27 2021 +0000 @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +# extends ivar trim's amplicon info parsing abilities +# to include calculation of amplicon regions from +# sets of nested (more than two) primers + +import sys + + +# parse primers and their start positions from BED file +primer_starts = {} +with open(sys.argv[1]) as i: + for line in i: + f = line.strip().split('\t') + try: + if f[5] == '+': + primer_starts[f[3]] = int(f[1]) + elif f[5] == '-': + primer_starts[f[3]] = int(f[2]) - 1 + else: + raise ValueError() + except (IndexError, ValueError): + sys.exit( + 'Primer BED file needs to be TAB-separated with the ' + 'following columns: ' + 'chrom, chromStart, chromEnd, name, score, strand, ' + 'where "chromStart", "chromEnd" need to be integer values ' + 'and "strand" needs to be either "+" or "-".' + ) + +# parse amplicon info and record outer primer names +with open(sys.argv[2]) as i: + ret_lines = [] + for line in i: + first = last = None + for pname in line.strip().split('\t'): + try: + primer_start = primer_starts[pname] + except KeyError: + sys.exit( + 'Amplicon info with primer name not found in ' + f'primer BED file: "{pname}"' + ) + if first is None or primer_start < primer_starts[first]: + first = pname + if last is None or primer_start > primer_starts[last]: + last = pname + if first == last: + sys.exit( + line + + 'is not a proper amplicon info line.' + ) + ret_lines.append(f'{first}\t{last}\n') + +# write amended amplicon info +with open(sys.argv[3], 'w') as o: + o.writelines(ret_lines)