# HG changeset patch # User iuc # Date 1629491651 0 # Node ID 8d36959b000dc4dc2d154fada259880a7ceff5c7 # Parent 28a6f1908fcce0b500a83d1876683e7c0f704b26 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit f09d0bee3e957564beccb1bdb3610de02f639ec7" diff -r 28a6f1908fcc -r 8d36959b000d ivar_removereads.xml --- a/ivar_removereads.xml Thu Aug 05 12:44:59 2021 +0000 +++ b/ivar_removereads.xml Fri Aug 20 20:34:11 2021 +0000 @@ -1,4 +1,4 @@ - + Remove reads from trimmed BAM file macros.xml @@ -38,7 +38,7 @@ - + @@ -86,6 +86,23 @@ From this input it will remove reads that come from amplicons that have been generated with one or more primers that may have been affected in their binding by variants listed in the variants input file. +To do its job, the needs to know which primers work together to form an +amplicon. The tool can try to deduce this info from the names of the primers +found in the primer info dataset. This will require a primer naming scheme +following the regex pattern:: + + .*_(?P\d+).*_(?PL(?:EFT)?|R(?:IGHT)?) + +*i.e.*, the following schemes will work (and get parsed as): + +- ``nCoV-2019_1_LEFT`` (forward primer of amplicon 1) +- ``400_2_out_R`` (reverse primer of amplicon 2) +- ``QIAseq_163-2_LEFT`` (forward primer of amplicon 163) + +Alternatively, you can specify the amplicon information explicitly through a +dataset that lists the names of primers that together form any given amplicon. +In it, primer names (exactly matching those in the primer info dataset) need to +be TAB-separated with one line per amplicon. .. class:: Warning mark diff -r 28a6f1908fcc -r 8d36959b000d write_amplicon_info_file.py --- a/write_amplicon_info_file.py Thu Aug 05 12:44:59 2021 +0000 +++ b/write_amplicon_info_file.py Fri Aug 20 20:34:11 2021 +0000 @@ -3,48 +3,49 @@ import argparse import re -AMPLICON_NAME_RE = r'.*_(?P\d+)_[^0-9]*(?PL(?:EFT)?|R(?:IGHT)?)' - -def primer_info_to_position(name): - position = 0 - re_match = re.match(AMPLICON_NAME_RE, name) - if re_match is None: - raise ValueError("{} does not match expected amplicon name format".format(name)) - side = re_match.group('name') - num = re_match.group('num') - if side == 'RIGHT' or side == 'R': - position += 1000 - if num is not None: - position += int(num) - return position +AMPLICON_PAT = re.compile(r'.*_(?P\d+).*_(?PL(?:EFT)?|R(?:IGHT)?)') def write_amplicon_info_file(bed_file, amplicon_info_file): amplicon_sets = {} - amplicon_ids = set() for line in bed_file: fields = line.strip().split('\t') + start = int(fields[1]) name = fields[3] - re_match = re.match(AMPLICON_NAME_RE, name) + re_match = AMPLICON_PAT.match(name) if re_match is None: - raise ValueError("{} does not match expected amplicon name format".format(name)) + raise ValueError( + '{} does not match expected amplicon name format'.format(name) + ) amplicon_id = int(re_match.group('num')) amplicon_set = amplicon_sets.get(amplicon_id, []) - amplicon_set.append(name) + amplicon_set.append((name, start)) amplicon_sets[amplicon_id] = amplicon_set - amplicon_ids.add(amplicon_id) - for id in sorted(list(amplicon_ids)): - amplicon_info = '\t'.join([name for name in sorted(amplicon_sets[id], key=primer_info_to_position)]) + '\n' + # write amplicons sorted by number with primers sorted by start position + for id in sorted(amplicon_sets): + amplicon_info = '\t'.join( + [name for name, start in sorted( + amplicon_sets[id], key=lambda x: x[1] + )] + ) + '\n' amplicon_info_file.write(amplicon_info) amplicon_info_file.close() if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Write an amplicon info file for iVar from a BED file describing primer positions') - parser.add_argument('bed_file', type=argparse.FileType(), help='Primer BED file') - parser.add_argument('amplicon_info_file', type=argparse.FileType('w'), help='Output file: amplicon info file in TSV format') + parser = argparse.ArgumentParser( + description='Write an amplicon info file for iVar ' + 'from a BED file describing primer positions' + ) + parser.add_argument( + 'bed_file', type=argparse.FileType(), help='Primer BED file' + ) + parser.add_argument( + 'amplicon_info_file', type=argparse.FileType('w'), + help='Output file: amplicon info file in TSV format' + ) args = parser.parse_args() write_amplicon_info_file(args.bed_file, args.amplicon_info_file)