Mercurial > repos > iuc > amas_split
changeset 0:e2e756484892 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/amas commit 158ec0e635067d354c425baf14b95cb616fd93c4
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/amas_split.xml Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,112 @@ +<tool id="amas_split" name="AMAS split" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>split multiple alignments</description> + + <macros> + <import>macros.xml</import> + </macros> + + <xrefs> + <xref type="bio.tools">amas</xref> + </xrefs> + + <expand macro="requirements" /> + <expand macro="version_command" /> + + <command detect_errors="exit_code"><![CDATA[ + #import re + set -eu; + + ## Let galaxy sniff input format + #set $in_format = $input_file.ext + #if $in_format == 'nex' + #set $in_format = 'nexus' + #end if + + ## Check if inputs are interleaved + IN_FORMAT=\$(python '$__tool_directory__/check_interleaved.py' + '${input_file}' + --format '${in_format}') && + + ## Create symlinks with original filename for consistent tests because + ## input filenames are used as str vars + #set $safename_input = re.sub('[^\w\-_\.]', '_', $input_file.element_identifier) + ln -s '${input_file}' '${safename_input}'; + + python -m amas.AMAS + split + --split-by $split_by + $remove_empty + --out-format $out_format + --in-files $safename_input + --in-format "\${IN_FORMAT}" + --data-type $data_type + --cores "\${GALAXY_SLOTS:-1}" + $check_align + ]]></command> + + <inputs> + <param name="input_file" type="data" format="fasta,phylip,nex" label="Sequence to split" multiple="false" help="Provide pre-aligned FASTA/PHYLIP/NEXUS file (DNA or protein); mixes of unaligned reads or contigs will produce meaningless results." /> + <expand macro="output_format" label="Select output format for split alignments" /> + <!-- If amas updates split to handle NEXUS format include nex format here --> + <param name="split_by" type="data" format="txt" label="Partitions file for splitting. Note: needs to be a partions file in the Unspecified format (See help section for more information)" + help="A file defining how to split the concatenated alignment into separate gene/locus regions. Each line specifies a partition name and its position range (e.g., 'gene1 = 1-500' for unspecified format). See the help section for more information about partitions." /> + <param argument="--remove-empty" type="boolean" label="Remove taxa that are entirely missing within a partition" checked="false" truevalue="--remove-empty" falsevalue="" /> + <expand macro="data_type" /> + <expand macro="check_align" /> + </inputs> + + <outputs> + <expand macro="collection_outputs" name="split_alignments" /> + </outputs> + + <tests> + <test expect_num_outputs="1"> + <param name="input_file" value="inputs/concat_result.phylip" /> + <param name="split_by" value="inputs/partitions_concat_unspecified.txt" /> + <param name="remove_empty" value="true" /> + <param name="out_format" value="fasta" /> + <param name="data_type" value="dna" /> + <param name="check_align" value="false" /> + <output_collection name="split_alignments_fasta" type="list"> + <element name="concat_result_p1_concat_1-out.fas" file="outputs/expected_split_partition1.fas" ftype="fasta" /> + <element name="concat_result_p2_concat_2-out.fas" file="outputs/expected_split_partition2.fas" ftype="fasta" /> + </output_collection> + </test> + </tests> + + <help><![CDATA[ + **What it does** + + AMAS Split divides a concatenated alignment back into separate gene/locus files using a partitions file. This is the reverse operation of AMAS Concat. + + **Inputs** + + - **Concatenated alignment**: A single alignment file containing multiple genes/loci joined end-to-end + - **Partitions file**: Defines the boundaries of each gene/locus (.txt file containing Unspecified formatting) + - **Input format**: Specify the format of your concatenated alignment + - **Data type**: Choose DNA for nucleotide sequences or Protein for amino acid sequences + - **Output format**: Select the desired format for the split alignment files + - **Remove empty sequences**: Optionally exclude taxa with only gaps/missing data in a partition + + **Outputs** + + A collection of alignment files, one per partition/gene defined in your partitions file. + + @PARTITIONS_HELP@ + + **IMPORTANT**: A .txt file containing RAxML, or NEXUS formatting, or a .nex file containing NEXUS formatting that are produced using AMAS Concat will not work. + + **Tip:** An example for your data can be generated using the AMAS concat tool. + + **Use cases** + + - Extract individual gene alignments from a concatenated dataset + - Analyze genes separately after joint phylogenetic analysis + - Apply gene-specific filtering or trimming + - Recover original locus alignments from published concatenated datasets + + @AMAS_SHARED_HELP@ + ]]></help> + + <expand macro="citations" /> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/check_interleaved.py Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,112 @@ +""" +Helper script to check if AMAS input files are interleaved. +""" +import argparse +import re +import sys + + +def check_phylip_interleaved(filepath): + """Check if PHYLIP file is interleaved.""" + with open(filepath, encoding='utf-8') as f: + # First line is header: ntax nchar + header = next(f).strip().split() + ntax = int(header[0]) + + for idx, line in enumerate(f, 1): + if line.strip(): + if idx > ntax: + return True + + return False + + +def check_nexus_interleaved(filepath): + """Check if NEXUS file is interleaved.""" + in_data_block = False + in_matrix = False + ntax = None + seq_lines = 0 + + with open(filepath, encoding='utf-8') as f: + for line in f: + content = line.strip().lower() + + if not content: + continue + + if in_matrix: + if content == 'end;': + return seq_lines != ntax if ntax else False + + if content != ';': + seq_lines += 1 + if ntax and seq_lines > ntax: + return True + continue + + if not in_data_block: + if content.startswith('begin'): + words = content.split() + if len(words) > 1 and ( + words[1].startswith('data') + or words[1].startswith('characters')): + in_data_block = True + continue + + if content.startswith('dimensions') and ntax is None: + match = re.search(r'ntax=(\d+)', content) + if match: + ntax = int(match.group(1)) + + elif content.startswith('format'): + if re.search(r'\binterleave(?:;|=yes;?)?\b', content): + return True + + elif content.startswith('matrix'): + in_matrix = True + + return False + + +def check_fasta_interleaved(filepath): + """FASTA files are not interleaved.""" + return False + + +def main(): + parser = argparse.ArgumentParser( + description='Check if AMAS input files are interleaved' + ) + parser.add_argument('input_files', nargs='+', help='Input sequence files') + parser.add_argument('--format', required=True, + choices=['fasta', 'phylip', 'nexus'], + help='Input format') + + args = parser.parse_args() + + interleaved_status = [] + for filepath in args.input_files: + if args.format == 'phylip': + is_interleaved = check_phylip_interleaved(filepath) + elif args.format == 'nexus': + is_interleaved = check_nexus_interleaved(filepath) + else: + is_interleaved = check_fasta_interleaved(filepath) + + interleaved_status.append(is_interleaved) + + interleaved_status = list(set(interleaved_status)) + if len(interleaved_status) > 1: + raise Exception("Error: Input files are a mix of interleaved/sequential formats") + + if interleaved_status[0]: + print(f"{args.format}-int") + else: + print(args.format) + + return 0 + + +if __name__ == '__main__': + sys.exit(main())
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,173 @@ +<macros> + <token name="@TOOL_VERSION@">1.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">25.0</token> + + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">amas</requirement> + </requirements> + </xml> + + <xml name="version_command"> + <version_command>python -c "import amas; print(amas.__version__)"</version_command> + </xml> + + <token name="@SNIFF_INPUT_FORMAT@"><![CDATA[ + #set $in_format = $input_files[0].ext + #if $in_format == 'nex' + #set $in_format = 'nexus' + #end if + ]]></token> + + <token name="@CHECK_INTERLEAVED@"><![CDATA[ + ## Check if inputs are interleaved + IN_FORMAT=\$(python '$__tool_directory__/check_interleaved.py' + #for $f in $input_files + '${f}' + #end for + --format '${in_format}') && + ]]></token> + + <token name="@SYMLINK_INPUTS@"><![CDATA[ + ## Create symlinks with original filename for consistent tests + #for $f in $input_files + #set $safename_input = re.sub('[^\w\-_\.]', '_', $f.element_identifier) + ln -s '${f}' '${safename_input}'; + #end for + ]]></token> + + <token name="@INPUT_FILENAMES@"><![CDATA[ + #for $f in $input_files + #set $safename_input = re.sub('[^\w\-_\.]', '_', $f.element_identifier) + '${safename_input}' + #end for + ]]></token> + + <xml name="output_format" token_name="out_format" token_label="Format of the output file"> + <param name="out_format" type="select" label="@LABEL@"> + <option value="fasta">fasta</option> + <option value="phylip">phylip (sequential)</option> + <option value="phylip-int">phylip (interleaved)</option> + <option value="nexus">nexus (sequential)</option> + <option value="nexus-int">nexus (interleaved)</option> + </param> + </xml> + + <xml name="data_type"> + <param name="data_type" type="select" label="Data type"> + <option value="aa">Protein alignments</option> + <option value="dna">Nucleotide alignments</option> + </param> + </xml> + + <xml name="check_align"> + <param argument="--check-align" type="boolean" label="Check if input sequences are aligned" checked="false" truevalue="--check-align" falsevalue="" /> + </xml> + + <!-- Galaxy doesn't currently detect whether PHYLIP or NEXUS format is interleaved/sequential; if implemented update here and assoc in subcommands --> + <xml name="collection_outputs" token_name="alignments" token_label="alignment files"> + <collection name="@NAME@_fasta" type="list" label="${tool.name} on ${on_string}: fasta"> + <discover_datasets pattern="(?P<name>.+-out\..+)" format="fasta" /> + <filter>out_format == "fasta"</filter> + </collection> + + <collection name="@NAME@_phylip" type="list" label="${tool.name} on ${on_string}: phylip"> + <discover_datasets pattern="(?P<name>.+-out\..+)" format="phylip" /> + <filter>out_format == "phylip" or out_format == "phylip-int"</filter> + </collection> + + <collection name="@NAME@_nexus" type="list" label="${tool.name} on ${on_string}: nexus"> + <discover_datasets pattern="(?P<name>.+-out\..+)" format="nex" /> + <filter>out_format == "nexus" or out_format == "nexus-int"</filter> + </collection> + </xml> + + <token name="@PARTITIONS_HELP@"><![CDATA[ + **What is a partitions file?** + + The partitions file maps each gene/locus to its position in the concatenated alignment. This is essential for downstream phylogenetic analyses (e.g., RAxML, IQ-TREE) that can apply different evolutionary models to different partitions. + + **Example:** + + If you concatenate three genes:: + + gene1.fasta (500 bp) + gene2.fasta (700 bp) + gene3.fasta (400 bp) + + The partitions file (unspecified format) will contain:: + + gene1 = 1-500 + gene2 = 501-1200 + gene3 = 1201-1600 + + **Partition formats:** + + - **Unspecified** + + :: + + gene1 = 1-500 + gene2 = 501-1200 + + - **RAxML** + + :: + + DNA, gene1 = 1-500 + DNA, gene2 = 501-1200 + + - **NEXUS** + + :: + + #NEXUS + + Begin sets; + charset gene1 = 1-500; + charset gene2 = 501-1200; + End; + ]]></token> + + <token name="@AMAS_SHARED_HELP@"><![CDATA[ + **Sequential vs Interleaved Phylip Format** + + - **Sequential**: Each complete sequence is written in order, one after another. Easier for programmatic parsing. + + :: + + 4 60 + Seq1 ATGCATGCATATGCATGCATATGCATGCAT... + Seq2 ATGCATGCATATGCATGCATATGCATGCAT... + Seq3 ATGCATGCATATGCATGCATATGCATGCAT... + Seq4 ATGCATGCATATGCATGCATATGCATGCAT... + + - **Interleaved**: Sequences are written in aligned blocks, making it easier to visually compare positions across sequences. + + :: + + 4 60 + Seq1 ATGCATGCATATGCATGCAT + Seq2 ATGCATGCATATGCATGCAT + Seq3 ATGCATGCATATGCATGCAT + Seq4 ATGCATGCATATGCATGCAT + + Seq1 ATGCATGCAT... + Seq2 ATGCATGCAT... + Seq3 ATGCATGCAT... + Seq4 ATGCATGCAT... + + **About AMAS** + + AMAS (Alignment manipulation and summary statistics) is designed for modern phylogenomics workflows involving hundreds of taxa and thousands of loci. + + Source code and manual: https://github.com/marekborowiec/AMAS + ]]></token> + + <xml name="citations"> + <citations> + <citation type="doi">10.7717/peerj.1660</citation> + </citations> + </xml> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_1.fasta Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,6 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU10 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_2.fasta Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,6 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU10 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_int_1.nex Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,22 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=3 NCHAR=300; + FORMAT INTERLEAVE DATATYPE=DNA GAP = - MISSING = ?; + MATRIX +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT + +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT + +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT + + +; + +END; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_int_1.phylip Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,12 @@ +3 300 +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT + +ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT + +ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_int_2.nex Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,19 @@ +#NEXUS + +Begin data; + Dimensions Nchar=300 Ntax=3; + Format Datatype=DNA Gap = - Missing = ? Interleave=yes; + matrix +Taxon_A AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +Taxon_B GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +Taxon_C TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG + +Taxon_A GGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +Taxon_B CCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +Taxon_C AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + +Taxon_A AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +Taxon_B GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +Taxon_C TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG +; +End; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_int_2.phylip Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,12 @@ +3 300 +Taxon_A AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +Taxon_B GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +Taxon_C TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG + +GGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +CCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + +AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_int_3.nex Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,16 @@ +#NEXUS + +begin data; + dimensions nchar=200 ntax=3; + format datatype=DNA gap = - missing = ?; + matrix +Taxon_A AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +Taxon_B GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +Taxon_C TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG + +Taxon_A GGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +Taxon_B CCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +Taxon_C AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + +; +end; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/concat_result.phylip Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,4 @@ +3 200 +OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU10 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCTACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/fasta1.fas Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,20 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU3 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU4 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU5 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU6 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU7 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU8 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU9 +ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU10 +ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/partitions_concat.nex Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,6 @@ +#NEXUS + +Begin sets; + charset p1_concat_1 = 1-100; + charset p2_concat_2 = 101-200; +End; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/partitions_concat_unspecified.txt Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,2 @@ +p1_concat_1 = 1-100 +p2_concat_2 = 101-200
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/inputs/remove_input.nex Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,20 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=10 NCHAR=100; + FORMAT DATATYPE=DNA GAP = - MISSING = ?; + MATRIX + OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU10 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU3 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU4 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU5 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU6 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU7 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU8 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU9 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + +; + +END; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_concat.phylip Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,4 @@ +3 200 +OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU10 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCTACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_concat_fasta.fas Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,12 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCT?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTC +CAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU10 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCTACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTC +CAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_concat_int.nex Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,18 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=3 NCHAR=600; + FORMAT INTERLEAVE DATATYPE=DNA GAP = - MISSING = ?; + MATRIX +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAACCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + +AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG + + +; + +END; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_concat_int_multi.nex Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,18 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=3 NCHAR=800; + FORMAT INTERLEAVE DATATYPE=DNA GAP = - MISSING = ?; + MATRIX +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAACCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + +AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAACCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + + +; + +END; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_partitions.nex Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,6 @@ +#NEXUS + +Begin sets; + charset p1_concat_1 = 1-100; + charset p2_concat_2 = 101-200; +End; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_partitions_int.txt Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,2 @@ +p1_concat_int_1 = 1-300 +p2_concat_int_2 = 301-600
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_partitions_int_multi.txt Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,3 @@ +p1_concat_int_1 = 1-300 +p2_concat_int_2 = 301-600 +p3_concat_int_3 = 601-800
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_partitions_raxml.txt Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,2 @@ +DNA, p1_concat_1 = 1-100 +DNA, p2_concat_2 = 101-200
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_remove_filtered.int-nex Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,19 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=8 NCHAR=100; + FORMAT INTERLEAVE DATATYPE=DNA GAP = - MISSING = ?; + MATRIX +OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU3 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU4 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU5 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU6 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU7 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU8 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + + +; + +END; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_replicate1.nex Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,20 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=10 NCHAR=100; + FORMAT DATATYPE=DNA GAP = - MISSING = ?; + MATRIX + OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU10 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU3 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU4 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU5 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU6 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU7 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU8 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU9 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + +; + +END; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_replicate2.nex Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,20 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=10 NCHAR=100; + FORMAT DATATYPE=DNA GAP = - MISSING = ?; + MATRIX + OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU10 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU3 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU4 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU5 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU6 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU7 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU8 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU9 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + +; + +END; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_split_partition1.fas Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,9 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCT +>OTU10 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAA +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_split_partition2.fas Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,9 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCT +>OTU10 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_summary.txt Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,2 @@ +Alignment_name No_of_taxa Alignment_length Total_matrix_cells Undetermined_characters Missing_percent No_variable_sites Proportion_variable_sites Parsimony_informative_sites Proportion_parsimony_informative AT_content GC_content A C G T K M R Y S W B V H D X N O - ? +fasta1.fas 10 100 1000 1 0.1 2 0.02 1 0.01 0.543 0.457 262 297 160 280 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outputs/expected_taxa_summary.txt Tue Dec 02 09:28:02 2025 +0000 @@ -0,0 +1,11 @@ +Alignment_name Taxon_name Sequence_length Undetermined_characters Missing_percent AT_content GC_content A C G T K M R Y S W B V H D X N O - ? +fasta1.fas OTU1 100 1 1.0 0.545 0.455 26 29 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +fasta1.fas OTU10 100 0 0.0 0.55 0.45 27 29 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU2 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU3 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU4 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU5 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU6 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU7 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU8 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU9 100 0 0.0 0.55 0.45 27 29 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ No newline at end of file
