# HG changeset patch # User cpt # Date 1685933674 0 # Node ID 5d9bc33ec5d3eb7c3c7b04c67312aefeb9410bdb # Parent 21d00cf831372bcc3912ba231605308b9da2c70d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c diff -r 21d00cf83137 -r 5d9bc33ec5d3 cpt-macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt-macros.xml Mon Jun 05 02:54:34 2023 +0000 @@ -0,0 +1,115 @@ + + + + python + biopython + requests + cpt_gffparser + + + + + + + + 10.1371/journal.pcbi.1008214 + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {C. Ross}, + title = {CPT Galaxy Tools}, + year = {2020-}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + diff -r 21d00cf83137 -r 5d9bc33ec5d3 cpt_xmfa_split/cpt-macros.xml --- a/cpt_xmfa_split/cpt-macros.xml Tue Jul 05 05:19:47 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,115 +0,0 @@ - - - - - python - biopython - requests - - - - - - - - 10.1371/journal.pcbi.1008214 - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - 10.1371/journal.pcbi.1008214 - - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - - - 10.1371/journal.pcbi.1008214 - - @unpublished{galaxyTools, - author = {C. Ross}, - title = {CPT Galaxy Tools}, - year = {2020-}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - - - 10.1371/journal.pcbi.1008214 - - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - @unpublished{galaxyTools, - author = {A. Criscione}, - title = {CPT Galaxy Tools}, - year = {2019-2021}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - - - 10.1371/journal.pcbi.1008214 - - @unpublished{galaxyTools, - author = {A. Criscione}, - title = {CPT Galaxy Tools}, - year = {2019-2021}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - - - 10.1371/journal.pcbi.1008214 - - @unpublished{galaxyTools, - author = {C. Maughmer}, - title = {CPT Galaxy Tools}, - year = {2017-2020}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - - - @unpublished{galaxyTools, - author = {C. Maughmer}, - title = {CPT Galaxy Tools}, - year = {2017-2020}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - diff -r 21d00cf83137 -r 5d9bc33ec5d3 cpt_xmfa_split/lcb_split.py --- a/cpt_xmfa_split/lcb_split.py Tue Jul 05 05:19:47 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,94 +0,0 @@ -#!/usr/bin/env python -import argparse -import copy -import logging -import xmfa -from itertools import groupby - -logging.basicConfig(level=logging.INFO) -log = logging.getLogger(__name__) - - -def split_lcb(lcb, window_size=10, threshold=0.7): - # Transpose sequence - lines = [] - max_align_num = len(lcb[0]["seq"]) - for i in range(max_align_num): - lines.append([]) - for j in range(len(lcb)): - c = lcb[j]["seq"][i] - if c != "-": - lines[i].append(j) - - count_groups = [] - for i in range(0, len(lines), window_size): - current_lines = lines[i : i + window_size] - flat_list = [a for b in current_lines for a in b] - counts = [] - for i in range(len(lcb)): - value = float(flat_list.count(i)) / window_size - if value >= threshold: - counts.append(i) - count_groups.append(counts) - - # groups = [(next(j), len(list(j)) + 1) for i, j in ] - # [([4], 2), ([2, 3, 4, 5, 6], 2), ([0, 1, 2, 3, 4, 5, 6], 14), ([0, 3], 1)] - # This says for 2 window sizes, we emit a new LCB with just [0:10] and - # [10:20] for lcb #4, then one with all but 0/1 for 2, then all for 14. - new_lcbs = [] - position = 0 - for i, j in groupby(count_groups): - tmp = list(j) - count = len(tmp) - members = tmp[0] - local_members = [] - for member in members: - tmp_member = copy.deepcopy(lcb[member]) - tmp_member["seq"] = tmp_member["seq"][ - window_size * position : window_size * (position + count) - ] - tmp_member["start"] = tmp_member["start"] + (3 * window_size * position) - tmp_member["end"] = tmp_member["start"] + (3 * window_size * count) - local_members.append(tmp_member) - if len(local_members) > 0: - new_lcbs.append(local_members) - - position += count - return new_lcbs - - -def split_lcbs(lcbs, window_size=10, threshold=100): - new_lcbs = [] - for lcb in lcbs: - new_lcbs.extend(split_lcb(lcb, window_size=window_size, threshold=threshold)) - return new_lcbs - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Split XMFA alignments", prog="xmfa2smallerXmfa" - ) - parser.add_argument("xmfa_file", type=argparse.FileType("r"), help="XMFA File") - - parser.add_argument( - "--window_size", type=int, help="Window size for analysis", default=10 - ) - parser.add_argument( - "--threshold", - type=float, - help="All genomes must meet N percent similarity", - default=0.7, - ) - - args = parser.parse_args() - - # Write - xmfa.to_xmfa( - # Split - split_lcbs( - # Parse - xmfa.parse_xmfa(args.xmfa_file), - window_size=args.window_size, - threshold=args.threshold, - ) - ) diff -r 21d00cf83137 -r 5d9bc33ec5d3 cpt_xmfa_split/lcb_split.xml --- a/cpt_xmfa_split/lcb_split.xml Tue Jul 05 05:19:47 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ - - - - - macros.xml - cpt-macros.xml - - - $output -]]> - - - - - - - - - - - - diff -r 21d00cf83137 -r 5d9bc33ec5d3 cpt_xmfa_split/macros.xml --- a/cpt_xmfa_split/macros.xml Tue Jul 05 05:19:47 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ - - - - - progressivemauve - python - biopython - cpt_gffparser - - - - 2.4.0 - - 10.1371/journal.pone.0011147 - - - 10.1093/bioinformatics/btm039 - - - - "$xmfa" - - - - - - - "$sequences" - - - - - - - - - - - - - - - - - "$gff3_data" - - - genomeref.fa - - - ln -s $genome_fasta genomeref.fa; - - - genomeref.fa - - - - - diff -r 21d00cf83137 -r 5d9bc33ec5d3 lcb_split.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lcb_split.py Mon Jun 05 02:54:34 2023 +0000 @@ -0,0 +1,94 @@ +#!/usr/bin/env python +import argparse +import copy +import logging +import xmfa +from itertools import groupby + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +def split_lcb(lcb, window_size=10, threshold=0.7): + # Transpose sequence + lines = [] + max_align_num = len(lcb[0]["seq"]) + for i in range(max_align_num): + lines.append([]) + for j in range(len(lcb)): + c = lcb[j]["seq"][i] + if c != "-": + lines[i].append(j) + + count_groups = [] + for i in range(0, len(lines), window_size): + current_lines = lines[i : i + window_size] + flat_list = [a for b in current_lines for a in b] + counts = [] + for i in range(len(lcb)): + value = float(flat_list.count(i)) / window_size + if value >= threshold: + counts.append(i) + count_groups.append(counts) + + # groups = [(next(j), len(list(j)) + 1) for i, j in ] + # [([4], 2), ([2, 3, 4, 5, 6], 2), ([0, 1, 2, 3, 4, 5, 6], 14), ([0, 3], 1)] + # This says for 2 window sizes, we emit a new LCB with just [0:10] and + # [10:20] for lcb #4, then one with all but 0/1 for 2, then all for 14. + new_lcbs = [] + position = 0 + for i, j in groupby(count_groups): + tmp = list(j) + count = len(tmp) + members = tmp[0] + local_members = [] + for member in members: + tmp_member = copy.deepcopy(lcb[member]) + tmp_member["seq"] = tmp_member["seq"][ + window_size * position : window_size * (position + count) + ] + tmp_member["start"] = tmp_member["start"] + (3 * window_size * position) + tmp_member["end"] = tmp_member["start"] + (3 * window_size * count) + local_members.append(tmp_member) + if len(local_members) > 0: + new_lcbs.append(local_members) + + position += count + return new_lcbs + + +def split_lcbs(lcbs, window_size=10, threshold=100): + new_lcbs = [] + for lcb in lcbs: + new_lcbs.extend(split_lcb(lcb, window_size=window_size, threshold=threshold)) + return new_lcbs + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Split XMFA alignments", prog="xmfa2smallerXmfa" + ) + parser.add_argument("xmfa_file", type=argparse.FileType("r"), help="XMFA File") + + parser.add_argument( + "--window_size", type=int, help="Window size for analysis", default=10 + ) + parser.add_argument( + "--threshold", + type=float, + help="All genomes must meet N percent similarity", + default=0.7, + ) + + args = parser.parse_args() + + # Write + xmfa.to_xmfa( + # Split + split_lcbs( + # Parse + xmfa.parse_xmfa(args.xmfa_file), + window_size=args.window_size, + threshold=args.threshold, + ) + ) diff -r 21d00cf83137 -r 5d9bc33ec5d3 lcb_split.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lcb_split.xml Mon Jun 05 02:54:34 2023 +0000 @@ -0,0 +1,35 @@ + + + + macros.xml + cpt-macros.xml + + + '$output' +]]> + + + + + + + + + + + + diff -r 21d00cf83137 -r 5d9bc33ec5d3 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Jun 05 02:54:34 2023 +0000 @@ -0,0 +1,74 @@ + + + + progressivemauve + + bcbiogff + + + + 2.4.0 + + 10.1371/journal.pone.0011147 + + + 10.1093/bioinformatics/btm039 + + + '$xmfa' + + + + + + '$sequences' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + '$gff3_data' + + + #if str($reference_genome.reference_genome_source) == 'cached': + '${reference_genome.fasta_indexes.fields.path}' + #else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa + #end if + + + #if $reference_genome.reference_genome_source == 'history': + ln -s '$reference_genome.genome_fasta' genomeref.fa; + #end if + + + #if str($reference_genome.reference_genome_source) == 'cached': + '${reference_genome.fasta_indexes.fields.path}' + #else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa + #end if + +