Mercurial > repos > iuc > amas_concat
changeset 0:5acf23babd9a draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/amas commit 158ec0e635067d354c425baf14b95cb616fd93c4
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/amas_concat.xml Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,144 @@ +<tool id="amas_concat" name="AMAS concat" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>concatenate multiple alignments</description> + + <macros> + <import>macros.xml</import> + </macros> + + <xrefs> + <xref type="bio.tools">amas</xref> + </xrefs> + + <expand macro="requirements" /> + <expand macro="version_command" /> + + <command detect_errors="exit_code"><![CDATA[ + #import re + set -eu; + + @SNIFF_INPUT_FORMAT@ + + @CHECK_INTERLEAVED@ + + @SYMLINK_INPUTS@ + + python -m amas.AMAS + concat + --concat-part partitions.txt + --concat-out concatenated.out + --part-format $part_format + --out-format $out_format + --in-files + @INPUT_FILENAMES@ + --in-format "\${IN_FORMAT}" + --data-type $data_type + --cores "\${GALAXY_SLOTS:-1}" + $check_align + ]]></command> + + <inputs> + <param name="input_files" type="data" format="fasta,phylip,nex" label="Sequences to concatenate" multiple="true" + help="Provide pre-aligned FASTA/PHYLIP/NEXUS files (DNA or protein); mixes of unaligned reads or contigs will produce meaningless results." /> + <expand macro="output_format" label="Select output format for concatenated alignment" /> + <param name="part_format" type="select" label="Format of the partitions file" + help="A file defining how the concatenated alignment is split into separate gene/locus regions. Each line specifies a partition name and its position range (e.g., 'gene1 = 1-500' or 'DNA, gene1 = 1-500' for RAxML format)."> + <option value="unspecified" selected="true">unspecified</option> + <option value="nexus">nexus</option> + <option value="raxml">raxml</option> + </param> + <expand macro="data_type" /> + <expand macro="check_align" /> + </inputs> + + <outputs> + <data name="output" from_work_dir="concatenated.out" format="txt" label="${tool.name} on ${on_string}: Concatenated alignment"> + <change_format> + <when input="out_format" value="fasta" format="fasta" /> + <when input="out_format" value="phylip" format="phylip" /> + <when input="out_format" value="phylip-int" format="phylip" /> + <when input="out_format" value="nexus" format="nex" /> + <when input="out_format" value="nexus-int" format="nex" /> + </change_format> + </data> + <data name="partitions_out" from_work_dir="partitions.txt" format="txt" label="${tool.name} on ${on_string}: Partition file"> + <change_format> + <!-- Untitled and RAxML partition formats have no current equivalent datatypes so are outputted as txt by default --> + <when input="part_format" value="nexus" format="nex" /> + </change_format> + </data> + </outputs> + + <tests> + <test expect_num_outputs="2"> + <param name="input_files" value="inputs/concat_1.fasta,inputs/concat_2.fasta" /> + <param name="out_format" value="phylip" /> + <param name="part_format" value="nexus" /> + <param name="data_type" value="dna" /> + <param name="check_align" value="false" /> + <output name="output" file="outputs/expected_concat.phylip" ftype="phylip" /> + <output name="partitions_out" file="outputs/expected_partitions.nex" ftype="nex" /> + </test> + <test expect_num_outputs="2"> + <param name="input_files" value="inputs/concat_1.fasta,inputs/concat_2.fasta" /> + <param name="out_format" value="fasta" /> + <param name="part_format" value="raxml" /> + <param name="data_type" value="dna" /> + <param name="check_align" value="false" /> + <output name="output" file="outputs/expected_concat_fasta.fas" ftype="fasta" /> + <output name="partitions_out" file="outputs/expected_partitions_raxml.txt" ftype="txt" /> + </test> + <!-- Interleave tests: amas starts interleaving when sequences have lines > 500 characters --> + <test expect_num_outputs="2"> + <param name="input_files" value="inputs/concat_int_1.phylip,inputs/concat_int_2.phylip" /> + <param name="out_format" value="nexus-int" /> + <param name="part_format" value="unspecified" /> + <param name="data_type" value="dna" /> + <param name="check_align" value="false" /> + <output name="output" file="outputs/expected_concat_int.nex" ftype="nex" /> + <output name="partitions_out" file="outputs/expected_partitions_int.txt" ftype="txt" /> + </test> + <!-- Nexus interleave tests for: 'INTERLEAVE', 'Interleave=yes;', no interleave flag --> + <test expect_num_outputs="2"> + <param name="input_files" value="inputs/concat_int_1.nex,inputs/concat_int_2.nex,inputs/concat_int_3.nex" /> + <param name="out_format" value="nexus-int" /> + <param name="part_format" value="unspecified" /> + <param name="data_type" value="dna" /> + <param name="check_align" value="false" /> + <output name="output" file="outputs/expected_concat_int_multi.nex" ftype="nex" /> + <output name="partitions_out" file="outputs/expected_partitions_int_multi.txt" ftype="txt" /> + </test> + </tests> + + <help><![CDATA[ + **What it does** + + AMAS Concat combines multiple sequence alignments into a single concatenated alignment, commonly used in phylogenomic analyses. + + **Inputs** + + - **Multiple alignment files**: Select 2 or more pre-aligned sequence files (FASTA, PHYLIP, or NEXUS format) + - **Input format**: Specify the format of your input files + - **Partition format**: Specify how you want the partition file to be formatted (Unspecified, RAxML, NEXUS) + - **Data type**: Choose DNA for nucleotide sequences or Protein for amino acid sequences + - **Output format**: Select the desired format for the concatenated alignment + + **Outputs** + + 1. **Concatenated alignment**: A single file containing all input alignments joined end-to-end + 2. **Partitions file**: Defines the boundaries of each original alignment within the concatenated file + + @PARTITIONS_HELP@ + + **Use cases** + + - **Multi-locus phylogenomics**: Combine hundreds of genes for species tree inference + - **Partitioned phylogenetic analysis**: Apply different evolutionary models to different genes using tools like RAxML or IQ-TREE + - **Supermatrix construction**: Create dataset for concatenation-based phylogenetic methods + - **Increased phylogenetic signal**: Leverage information from multiple loci to resolve difficult nodes + - **Comparative analyses**: Prepare datasets for testing hypotheses across multiple genomic regions + + @AMAS_SHARED_HELP@ + ]]></help> + + <expand macro="citations" /> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/check_interleaved.py Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,112 @@ +""" +Helper script to check if AMAS input files are interleaved. +""" +import argparse +import re +import sys + + +def check_phylip_interleaved(filepath): + """Check if PHYLIP file is interleaved.""" + with open(filepath, encoding='utf-8') as f: + # First line is header: ntax nchar + header = next(f).strip().split() + ntax = int(header[0]) + + for idx, line in enumerate(f, 1): + if line.strip(): + if idx > ntax: + return True + + return False + + +def check_nexus_interleaved(filepath): + """Check if NEXUS file is interleaved.""" + in_data_block = False + in_matrix = False + ntax = None + seq_lines = 0 + + with open(filepath, encoding='utf-8') as f: + for line in f: + content = line.strip().lower() + + if not content: + continue + + if in_matrix: + if content == 'end;': + return seq_lines != ntax if ntax else False + + if content != ';': + seq_lines += 1 + if ntax and seq_lines > ntax: + return True + continue + + if not in_data_block: + if content.startswith('begin'): + words = content.split() + if len(words) > 1 and ( + words[1].startswith('data') + or words[1].startswith('characters')): + in_data_block = True + continue + + if content.startswith('dimensions') and ntax is None: + match = re.search(r'ntax=(\d+)', content) + if match: + ntax = int(match.group(1)) + + elif content.startswith('format'): + if re.search(r'\binterleave(?:;|=yes;?)?\b', content): + return True + + elif content.startswith('matrix'): + in_matrix = True + + return False + + +def check_fasta_interleaved(filepath): + """FASTA files are not interleaved.""" + return False + + +def main(): + parser = argparse.ArgumentParser( + description='Check if AMAS input files are interleaved' + ) + parser.add_argument('input_files', nargs='+', help='Input sequence files') + parser.add_argument('--format', required=True, + choices=['fasta', 'phylip', 'nexus'], + help='Input format') + + args = parser.parse_args() + + interleaved_status = [] + for filepath in args.input_files: + if args.format == 'phylip': + is_interleaved = check_phylip_interleaved(filepath) + elif args.format == 'nexus': + is_interleaved = check_nexus_interleaved(filepath) + else: + is_interleaved = check_fasta_interleaved(filepath) + + interleaved_status.append(is_interleaved) + + interleaved_status = list(set(interleaved_status)) + if len(interleaved_status) > 1: + raise Exception("Error: Input files are a mix of interleaved/sequential formats") + + if interleaved_status[0]: + print(f"{args.format}-int") + else: + print(args.format) + + return 0 + + +if __name__ == '__main__': + sys.exit(main())
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,173 @@ +<macros> + <token name="@TOOL_VERSION@">1.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">25.0</token> + + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">amas</requirement> + </requirements> + </xml> + + <xml name="version_command"> + <version_command>python -c "import amas; print(amas.__version__)"</version_command> + </xml> + + <token name="@SNIFF_INPUT_FORMAT@"><![CDATA[ + #set $in_format = $input_files[0].ext + #if $in_format == 'nex' + #set $in_format = 'nexus' + #end if + ]]></token> + + <token name="@CHECK_INTERLEAVED@"><![CDATA[ + ## Check if inputs are interleaved + IN_FORMAT=\$(python '$__tool_directory__/check_interleaved.py' + #for $f in $input_files + '${f}' + #end for + --format '${in_format}') && + ]]></token> + + <token name="@SYMLINK_INPUTS@"><![CDATA[ + ## Create symlinks with original filename for consistent tests + #for $f in $input_files + #set $safename_input = re.sub('[^\w\-_\.]', '_', $f.element_identifier) + ln -s '${f}' '${safename_input}'; + #end for + ]]></token> + + <token name="@INPUT_FILENAMES@"><![CDATA[ + #for $f in $input_files + #set $safename_input = re.sub('[^\w\-_\.]', '_', $f.element_identifier) + '${safename_input}' + #end for + ]]></token> + + <xml name="output_format" token_name="out_format" token_label="Format of the output file"> + <param name="out_format" type="select" label="@LABEL@"> + <option value="fasta">fasta</option> + <option value="phylip">phylip (sequential)</option> + <option value="phylip-int">phylip (interleaved)</option> + <option value="nexus">nexus (sequential)</option> + <option value="nexus-int">nexus (interleaved)</option> + </param> + </xml> + + <xml name="data_type"> + <param name="data_type" type="select" label="Data type"> + <option value="aa">Protein alignments</option> + <option value="dna">Nucleotide alignments</option> + </param> + </xml> + + <xml name="check_align"> + <param argument="--check-align" type="boolean" label="Check if input sequences are aligned" checked="false" truevalue="--check-align" falsevalue="" /> + </xml> + + <!-- Galaxy doesn't currently detect whether PHYLIP or NEXUS format is interleaved/sequential; if implemented update here and assoc in subcommands --> + <xml name="collection_outputs" token_name="alignments" token_label="alignment files"> + <collection name="@NAME@_fasta" type="list" label="${tool.name} on ${on_string}: fasta"> + <discover_datasets pattern="(?P<name>.+-out\..+)" format="fasta" /> + <filter>out_format == "fasta"</filter> + </collection> + + <collection name="@NAME@_phylip" type="list" label="${tool.name} on ${on_string}: phylip"> + <discover_datasets pattern="(?P<name>.+-out\..+)" format="phylip" /> + <filter>out_format == "phylip" or out_format == "phylip-int"</filter> + </collection> + + <collection name="@NAME@_nexus" type="list" label="${tool.name} on ${on_string}: nexus"> + <discover_datasets pattern="(?P<name>.+-out\..+)" format="nex" /> + <filter>out_format == "nexus" or out_format == "nexus-int"</filter> + </collection> + </xml> + + <token name="@PARTITIONS_HELP@"><![CDATA[ + **What is a partitions file?** + + The partitions file maps each gene/locus to its position in the concatenated alignment. This is essential for downstream phylogenetic analyses (e.g., RAxML, IQ-TREE) that can apply different evolutionary models to different partitions. + + **Example:** + + If you concatenate three genes:: + + gene1.fasta (500 bp) + gene2.fasta (700 bp) + gene3.fasta (400 bp) + + The partitions file (unspecified format) will contain:: + + gene1 = 1-500 + gene2 = 501-1200 + gene3 = 1201-1600 + + **Partition formats:** + + - **Unspecified** + + :: + + gene1 = 1-500 + gene2 = 501-1200 + + - **RAxML** + + :: + + DNA, gene1 = 1-500 + DNA, gene2 = 501-1200 + + - **NEXUS** + + :: + + #NEXUS + + Begin sets; + charset gene1 = 1-500; + charset gene2 = 501-1200; + End; + ]]></token> + + <token name="@AMAS_SHARED_HELP@"><![CDATA[ + **Sequential vs Interleaved Phylip Format** + + - **Sequential**: Each complete sequence is written in order, one after another. Easier for programmatic parsing. + + :: + + 4 60 + Seq1 ATGCATGCATATGCATGCATATGCATGCAT... + Seq2 ATGCATGCATATGCATGCATATGCATGCAT... + Seq3 ATGCATGCATATGCATGCATATGCATGCAT... + Seq4 ATGCATGCATATGCATGCATATGCATGCAT... + + - **Interleaved**: Sequences are written in aligned blocks, making it easier to visually compare positions across sequences. + + :: + + 4 60 + Seq1 ATGCATGCATATGCATGCAT + Seq2 ATGCATGCATATGCATGCAT + Seq3 ATGCATGCATATGCATGCAT + Seq4 ATGCATGCATATGCATGCAT + + Seq1 ATGCATGCAT... + Seq2 ATGCATGCAT... + Seq3 ATGCATGCAT... + Seq4 ATGCATGCAT... + + **About AMAS** + + AMAS (Alignment manipulation and summary statistics) is designed for modern phylogenomics workflows involving hundreds of taxa and thousands of loci. + + Source code and manual: https://github.com/marekborowiec/AMAS + ]]></token> + + <xml name="citations"> + <citations> + <citation type="doi">10.7717/peerj.1660</citation> + </citations> + </xml> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_1.fasta Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,6 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU10 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_2.fasta Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,6 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU10 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_int_1.nex Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,22 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=3 NCHAR=300; + FORMAT INTERLEAVE DATATYPE=DNA GAP = - MISSING = ?; + MATRIX +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT + +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT + +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT + + +; + +END; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_int_1.phylip Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,12 @@ +3 300 +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT + +ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT + +ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_int_2.nex Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,19 @@ +#NEXUS + +Begin data; + Dimensions Nchar=300 Ntax=3; + Format Datatype=DNA Gap = - Missing = ? Interleave=yes; + matrix +Taxon_A AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +Taxon_B GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +Taxon_C TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG + +Taxon_A GGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +Taxon_B CCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +Taxon_C AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + +Taxon_A AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +Taxon_B GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +Taxon_C TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG +; +End; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_int_2.phylip Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,12 @@ +3 300 +Taxon_A AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +Taxon_B GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +Taxon_C TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG + +GGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +CCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + +AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_int_3.nex Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,16 @@ +#NEXUS + +begin data; + dimensions nchar=200 ntax=3; + format datatype=DNA gap = - missing = ?; + matrix +Taxon_A AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +Taxon_B GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +Taxon_C TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG + +Taxon_A GGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +Taxon_B CCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +Taxon_C AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + +; +end; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_result.phylip Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,4 @@ +3 200 +OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU10 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCTACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/fasta1.fas Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,20 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU3 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU4 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU5 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU6 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU7 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU8 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU9 +ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU10 +ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/partitions_concat.nex Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,6 @@ +#NEXUS + +Begin sets; + charset p1_concat_1 = 1-100; + charset p2_concat_2 = 101-200; +End; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/partitions_concat_unspecified.txt Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,2 @@ +p1_concat_1 = 1-100 +p2_concat_2 = 101-200
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/remove_input.nex Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,20 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=10 NCHAR=100; + FORMAT DATATYPE=DNA GAP = - MISSING = ?; + MATRIX + OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU10 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU3 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU4 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU5 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU6 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU7 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU8 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU9 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + +; + +END; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_concat.phylip Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,4 @@ +3 200 +OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU10 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCTACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_concat_fasta.fas Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,12 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCT?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTC +CAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU10 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCTACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTC +CAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_concat_int.nex Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,18 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=3 NCHAR=600; + FORMAT INTERLEAVE DATATYPE=DNA GAP = - MISSING = ?; + MATRIX +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAACCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + +AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG + + +; + +END; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_concat_int_multi.nex Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,18 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=3 NCHAR=800; + FORMAT INTERLEAVE DATATYPE=DNA GAP = - MISSING = ?; + MATRIX +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAACCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + +AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAACCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + + +; + +END; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_partitions.nex Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,6 @@ +#NEXUS + +Begin sets; + charset p1_concat_1 = 1-100; + charset p2_concat_2 = 101-200; +End; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_partitions_int.txt Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,2 @@ +p1_concat_int_1 = 1-300 +p2_concat_int_2 = 301-600
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_partitions_int_multi.txt Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,3 @@ +p1_concat_int_1 = 1-300 +p2_concat_int_2 = 301-600 +p3_concat_int_3 = 601-800
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_partitions_raxml.txt Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,2 @@ +DNA, p1_concat_1 = 1-100 +DNA, p2_concat_2 = 101-200
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_remove_filtered.int-nex Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,19 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=8 NCHAR=100; + FORMAT INTERLEAVE DATATYPE=DNA GAP = - MISSING = ?; + MATRIX +OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU3 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU4 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU5 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU6 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU7 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU8 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + + +; + +END; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_replicate1.nex Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,20 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=10 NCHAR=100; + FORMAT DATATYPE=DNA GAP = - MISSING = ?; + MATRIX + OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU10 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU3 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU4 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU5 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU6 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU7 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU8 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU9 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + +; + +END; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_replicate2.nex Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,20 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=10 NCHAR=100; + FORMAT DATATYPE=DNA GAP = - MISSING = ?; + MATRIX + OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU10 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU3 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU4 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU5 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU6 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU7 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU8 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU9 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + +; + +END; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_split_partition1.fas Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,9 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCT +>OTU10 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAA +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_split_partition2.fas Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,9 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCT +>OTU10 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_summary.txt Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,2 @@ +Alignment_name No_of_taxa Alignment_length Total_matrix_cells Undetermined_characters Missing_percent No_variable_sites Proportion_variable_sites Parsimony_informative_sites Proportion_parsimony_informative AT_content GC_content A C G T K M R Y S W B V H D X N O - ? +fasta1.fas 10 100 1000 1 0.1 2 0.02 1 0.01 0.543 0.457 262 297 160 280 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_taxa_summary.txt Tue Dec 02 09:27:37 2025 +0000 @@ -0,0 +1,11 @@ +Alignment_name Taxon_name Sequence_length Undetermined_characters Missing_percent AT_content GC_content A C G T K M R Y S W B V H D X N O - ? +fasta1.fas OTU1 100 1 1.0 0.545 0.455 26 29 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +fasta1.fas OTU10 100 0 0.0 0.55 0.45 27 29 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU2 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU3 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU4 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU5 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU6 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU7 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU8 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU9 100 0 0.0 0.55 0.45 27 29 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ No newline at end of file
