changeset 0:44f6ec903688 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/clair3 commit 9968295e33bad5323ac9cbff8f64c2db35189a5f
author iuc
date Wed, 15 Jun 2022 09:45:05 +0000
parents
children 63e02ef6e153
files clair3.xml test-data/all_fasta.loc test-data/clair3_models.loc test-data/full_alignment_1.vcf test-data/merge_output_1.vcf test-data/phased_bam_1.bam test-data/pileup_1.vcf test-data/test1.bam test-data/test1.bed test-data/test1.fasta test-data/test1.vcf.gz tool-data/all_fasta.loc.sample tool-data/model.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 14 files changed, 464 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/clair3.xml	Wed Jun 15 09:45:05 2022 +0000
@@ -0,0 +1,311 @@
+<tool id="clair3" name="Clair3" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.09">
+    <macros>
+        <token name="@TOOL_VERSION@">0.1.11</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">clair3</requirement>
+    </requirements>
+    <version_command><![CDATA[run_clair3.sh --version | cut -f2 -d ' ']]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        #if $ref_source.source == "history"
+            ln -s '${ref_source.ref_fasta}' reference.fasta &&
+        #elif $ref_source.source == "builtin"
+            ln -s '${ref_source.ref_fasta_builtin.fields.path}' reference.fasta &&
+        #end if
+        samtools faidx reference.fasta &&
+
+        #if $model_source.source == "datatable"
+            #set model_path = $model_source.model.fields.path 
+        #end if
+
+        ln -s '${$bam_input}' input_reads.bam &&
+        ln -s '${$bam_input.metadata.bam_index}' input_reads.bai &&
+
+        #if $bed_or_vcf.bed_or_vcf_selector == 'bed'
+            ln -s '$bed_or_vcf.bed_fn' input.bed &&
+        #elif $bed_or_vcf.bed_or_vcf_selector == 'vcf'
+            ln -s '$bed_or_vcf.vcf_fn' input.vcf &&
+        #end if
+
+        run_clair3.sh
+            --bam_fn=input_reads.bam
+            --ref_fn=reference.fasta
+            #if $model_source.source == "datatable"
+                --platform='${model_source.model.fields.platform}'
+                --model_path=${model_source.model.fields.path}
+            #else
+                #if $model_source.select_built_in == "hifi" or $model_source.select_built_in == "ilmn":
+                    --platform='${$model_source.select_built_in}'
+                #else
+                    --platform='ont'
+                #end if
+                --model_path=\$(dirname \$(which run_clair3.sh))/models/$model_source.select_built_in
+            #end if
+            --output='.'
+            --threads=\${GALAXY_SLOTS:-1}
+            #if $bed_or_vcf.bed_or_vcf_selector == 'bed'
+                  --bed_fn=input.bed
+            #elif $bed_or_vcf.bed_or_vcf_selector == 'vcf'
+                  --vcf_fn=input.vcf
+            #else
+                $bed_or_vcf.include_all_ctgs
+            #end if
+            #if $output_options.selection_mode == 'advanced'
+                $output_options.gvcf
+            #end if
+            #if $adv.qual
+            --qual=$adv.qual
+            #end if
+            #if $adv.snp_min_af
+                --snp_min_af=$adv.snp_min_af
+            #end if
+            #if $adv.indel_min_af
+                --indel_min_af=$adv.indel_min_af
+            #end if
+            $adv.enable_phasing
+            $adv.no_phasing_for_fa
+            $adv.print_ref_calls
+            $adv.ploidity_model
+            #if $adv.chunk_size
+                --chunk_size=$adv.chunk_size
+            #end if
+
+    ]]></command>
+    <inputs>
+        <conditional name="ref_source">
+            <param type="select" label="Reference genome source" name="source">
+                <option value="history" selected="true">History</option>
+                <option value="builtin">Built-in</option>
+            </param>
+            <when value="history">
+                <param type="data" format="fasta" name="ref_fasta" label="Reference genome" />
+            </when>
+            <when value="builtin">
+                <param type="select" name="ref_fasta_builtin" label="Reference genome">
+                    <options from_data_table="all_fasta" />
+                </param>
+            </when>
+        </conditional>
+        <conditional name="model_source">
+            <param type="select" label="Select if you want to use built-in models or your own via datatable." name="source">
+                <option value="built-in">Built-in</option>
+                <option value="datatable">Datatable</option>
+            </param>
+            <when value="built-in">
+                <param type="select" name="select_built_in" label="Select built-in model.">
+                    <option value="r941_prom_sup_g5014">r941_prom_sup_g5014</option>
+                    <option value="r941_prom_hac_g360+g422">r941_prom_hac_g360+g422</option>
+                    <option value="hifi">hifi</option>
+                    <option value="ilmn">ilmn</option>
+                </param>
+            </when>
+            <when value="datatable">
+                <param argument="--model_path" type="select" name="model" label="Select model" >
+                    <options from_data_table="clair3_models">
+                        <validator type="no_options" message="no modles available" />
+                    </options>
+                </param>
+            </when>
+        </conditional>
+        <param type="data" name="bam_input" format="bam" label="BAM file input." />
+             <conditional name="bed_or_vcf">
+                <param type="select" name="bed_or_vcf_selector" label="Restict variant calling to:">
+                    <option value="unrestricted" selected="true">Whole reference (unrestricted)</option>
+                    <option value="bed" selected="true">Regions defined in BED dataset</option>
+                    <option value="vcf">Sites defined in VCF dataset</option>
+                </param>
+                <when value="bed">
+                    <param type="data" name="bed_fn" format="bed" label="Call variants only in the provided bed regions."/>
+                </when>
+                <when value="vcf">
+                    <param type="data" name="vcf_fn" format="vcf" label="Candidate sites VCF file input, variants will only be called at the sites in the VCF file if provided."/>
+                </when>
+                <when value="unrestricted">
+                    <param type="boolean" name="include_all_ctgs" truevalue="--include_all_ctgs" falsevalue="" checked="true" label="Call variants on all contigs, otherwise call in chr{1..22,X,Y} and {1..22,X,Y}, default: disable." />
+                </when>
+            </conditional>
+        <conditional name="output_options">
+                <param label="Use advanced output options" name="selection_mode" type="select">
+                    <option selected="true" value="defaults">Show merged output file only</option>
+                    <option value="advanced">Show intermediate output files</option>
+                </param>
+                <when value="defaults"/>
+                <when value="advanced">
+                    <param name="full_alignment_check" type="boolean" label="Show full alignment file"/>
+                    <param name="pileup_check" type="boolean" label="Show pileup file"/>
+                    <param name="phased_bam_check" type="boolean" label="Show itermediate phased BAM file"/>
+                    <param type="boolean" name="gvcf" truevalue="--gvcf" falsevalue="" label="Enable GVCF output, default: disable" />
+                </when>
+        </conditional>
+        <section name="adv" title="Advanced Parameters" expanded="false">
+            <param type="integer" name="qual" value="0" min="0" label="If set, variants with >qual will be marked PASS, or LowQual otherwise." />
+            <param type="float" name="snp_min_af" value="0.08" min="0" max="1" label="Minimum SNP AF required for a candidate variant. Lowering the value might increase a bit of sensitivity in trade of speed and accuracy, default: ont:0.08,hifi:0.08,ilmn:0.08." />
+            <param type="float" name="indel_min_af" value="0.15" min="0" max="1" label="Minimum INDEL AF required for a candidate variant. Lowering the value might increase a bit of sensitivity in trade of speed and accuracy, default: ont:0.15,hifi:0.08,ilmn:0.08." />
+            <param type="boolean" name="enable_phasing" truevalue="--enable_phasing" falsevalue="" label="Output phased variants using whatshap, default: disable" />
+            <param type="boolean" name="no_phasing_for_fa" truevalue="--no_phasing_for_fa" falsevalue="" label="EXPERIMENTAL: Call variants without whatshap phasing in full alignment calling, default: disable." />
+            <param type="boolean" name="print_ref_calls" truevalue="--print_ref_calls" falsevalue="" label="Show reference calls (0/0) in vcf file, default: disable." />
+            <param type="select" name="ploidity_model" label="Call with the following ploidy model" help="EXPERIMENTAL: Enable haploid calling mode. Only 1/1 is considered as a variant, default: disable. EXPERIMENTAL: Enable haploid calling mode. 0/1 and 1/1 are considered as a variant, default: disable.">
+                <option value="" selected="true">diploid</option>
+                <option value="--haploid_sensitive">haploid (sensitive)</option>
+                <option value="--haploid_precise">haploid (precise)</option>
+            </param>
+            <param type="integer" name="chunk_size" value="5000000" min="1" label="The size of each chuck for parallel processing, default: 5Mbp." optional="true" />
+        </section>
+    </inputs>
+    <outputs>
+      <data name="merge_output" format="vcf_bgzip" from_work_dir="./merge_output.vcf.gz" />
+      <data name="pileup" format="vcf_bgzip" from_work_dir="./pileup.vcf.gz">
+            <filter>output_options['selection_mode'] == 'advanced' and output_options['pileup_check']</filter>
+        </data>
+        <data name="full_alignment" format="vcf_bgzip" from_work_dir="./full_alignment.vcf.gz">
+            <filter>output_options['selection_mode'] == 'advanced' and output_options['full_alignment_check']</filter>
+        </data>
+        <data name="phased_bam" format="bam" from_work_dir="./phased_bam.bam">
+            <filter>output_options['selection_mode'] == 'advanced' and output_options['phased_bam_check']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <conditional name="model_source">
+                <param name="source" value="built-in"/>  
+                <param name="select_built_in" value="r941_prom_hac_g360+g422"/>
+            </conditional>
+            <param name="bam_input" value="test1.bam" />
+            <conditional name="ref_source">
+                <param name="source" value="history" />
+                <param name="ref_fasta" value="test1.fasta" />
+            </conditional>
+            <conditional name="bed_or_vcf">
+                <param name="bed_or_vcf_selector" value="vcf"/>
+                <param name="vcf_fn" value="test1.vcf.gz" />
+            </conditional>
+            <conditional name="output_options">
+                <param name="selection_mode" value="advanced"/>
+                <param name="gvcf" value="true" />
+            </conditional>
+            <section name="adv">
+                <param name="no_phasing_for_fa" value="true"/>
+                <param name="print_ref_calls" value="true"/>
+                <param name="ploidity_model" value=""/>
+            </section>
+            <assert_stdout>
+              <has_text text="[WARNING] No contig intersection found, output header only in" />
+            </assert_stdout>
+        </test>
+        <test expect_num_outputs="4">
+            <conditional name="model_source">
+                <param name="source" value="builtin" />
+                <param name="select_built_in" value="r941_prom_hac_g360+g422" />
+            </conditional>
+            <param name="bam_input" value="test1.bam" />
+            <conditional name="bed_or_vcf">
+                <param name="bed_or_vcf_selector" value="unrestricted"/>
+                <param name="include_all_ctgs" value="true" />
+            </conditional>
+            <conditional name="ref_source">
+                <param name="source" value="builtin" />
+                <param name="ref_fasta_builtin" value="test1" />
+            </conditional>
+            <conditional name="output_options">
+                <param name="selection_mode" value="advanced"/>
+                <param name="pileup_check" value="true"/>
+                <param name="full_alignment_check" value="true"/>
+                <param name="phased_bam_check" value="true"/>
+            </conditional>
+            <section name="adv">
+                <param name="print_ref_calls" value="true"/>
+            </section>
+            <output name="merge_output" decompress="true" file="merge_output_1.vcf" ftype="vcf_bgzip"/>
+            <output name="pileup" decompress="true" file="pileup_1.vcf" ftype="vcf_bgzip"/>
+            <output name="full_alignment" decompress="true" file="full_alignment_1.vcf" ftype="vcf_bgzip"/>
+            <output name="phased_bam" file="phased_bam_1.bam" ftype="bam"/>
+        </test>
+        <test expect_num_outputs="4">
+            <conditional name="model_source">
+                <param name="source" value="datatable" />
+                <param name="model" value="test_model" />
+            </conditional>
+            <param name="bam_input" value="test1.bam" />
+            <conditional name="bed_or_vcf">
+                <param name="bed_or_vcf_selector" value="bed"/>
+                <param name="bed_fn" value="test1.bed" />
+            </conditional>
+            <conditional name="ref_source">
+                <param name="source" value="builtin" />
+                <param name="ref_fasta_builtin" value="test1" />
+            </conditional>
+            <conditional name="output_options">
+                <param name="selection_mode" value="advanced"/>
+                <param name="pileup_check" value="true"/>
+                <param name="full_alignment_check" value="true"/>
+                <param name="phased_bam_check" value="true"/>
+            </conditional>
+            <section name="adv">
+                <param name="snp_min_af" value="0.5"/>
+                <param name="indel_min_af" value="0.12"/>
+                <param name="no_phasing_for_fa" value="true" />
+                <param name="print_ref_calls" value="true"/>
+            </section>
+            <output name="merge_output" ftype="vcf_bgzip">
+                <assert_contents>
+                    <has_size value="450" delta="50" />
+                </assert_contents>
+            </output>
+            <output name="pileup" ftype="vcf_bgzip">
+                <assert_contents>
+                    <has_size value="0" />
+                </assert_contents>
+            </output>
+            <output name="full_alignment" ftype="vcf_bgzip">
+                <assert_contents>
+                    <has_size value="0" />
+                </assert_contents>
+            </output>
+            <output name="phased_bam" ftype="bam">
+                <assert_contents>
+                    <has_size value="0" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+      Clair3 is a germline small variant caller for long-reads. Clair3 makes the best of two major method categories:
+      pileup calling handles most variant candidates with speed, and full-alignment tackles complicated candidates to maximize precision and recall.
+      Clair3 runs fast and has superior performance, especially at lower coverage. Clair3 is simple and modular for easy deployment and integration.
+      
+      https://github.com/HKU-BAL/Clair3
+
+      LICENSE:
+
+        Copyright 2021 The University of Hong Kong, Department of Computer Science
+
+        Redistribution and use in source and binary forms, with or without modification,
+        are permitted provided that the following conditions are met:
+
+        1. Redistributions of source code must retain the above copyright notice, this
+           list of conditions and the following disclaimer.
+
+        2. Redistributions in binary form must reproduce the above copyright notice,
+           this list of conditions and the following disclaimer in the documentation
+           and/or other materials provided with the distribution.
+
+        3. Neither the name of the copyright holder nor the names of its contributors
+           may be used to endorse or promote products derived from this software without
+           specific prior written permission.
+
+        THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+        ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+        WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+        DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+        ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+        (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+        LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+        ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+        (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+        SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1101/2021.12.29.474431</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc	Wed Jun 15 09:45:05 2022 +0000
@@ -0,0 +1,10 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+test1	"Test Genome"	${__HERE__}/test1.fasta
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/clair3_models.loc	Wed Jun 15 09:45:05 2022 +0000
@@ -0,0 +1,10 @@
+#Enter the model names in the second column, the platform name in the second and the path in the third.
+#This file lists the locations and dbkeys of all the model files
+#under the "models" directory (a directory that contains a directory
+#for each build). This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>  <display_name>   <platform> <file_path>
+#
+#So, all_model.loc could look something like this:
+test_model	"the_model_name"	ont	$(dirname $(which run_clair3.sh))/models/r941_prom_hac_g360+g422
Binary file test-data/full_alignment_1.vcf has changed
Binary file test-data/merge_output_1.vcf has changed
Binary file test-data/pileup_1.vcf has changed
Binary file test-data/test1.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1.bed	Wed Jun 15 09:45:05 2022 +0000
@@ -0,0 +1,3 @@
+chr1	14756	15038	JUNC00000001	294	-	14756	15038	255,0,0	2	73,69	0,213
+chr1	14969	15836	JUNC00000002	144	-	14969	15836	255,0,0	2	69,41	0,826
+chr1	15905	16677	JUNC00000003	12	-	15905	16677	255,0,0	2	42,71	0,701
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1.fasta	Wed Jun 15 09:45:05 2022 +0000
@@ -0,0 +1,85 @@
+>demo20
+TGCCTGCTTTGTGCCAGGTTCTGGGTTGGGAGGTGCTGGGGACAGGGAGATGAGTCAGAC
+CTgggaagatttcgtagaggaggtgacagtaagctggaacctgtgtaatgagcaggagtt
+gcccagtggagaaggggaaggtgttccaggcggaagaaacagcatatgcaaaggccccaa
+ggtaggaagggccctagtgtgtgcagaggacagggcatggggaggggaactaaggctgag
+gccaaggagaggaaatgactcacaccgtgagagaggagttgagaccagggaggCTGCTTG
+CTGTATGATGCAACTGAGAGGGTAGAACAAGGCTGGCACAGAGAAGGTGGGGAAGGAAAA
+GGAGAGACGAAGCTGAGATTTCAGCAGGGCCAAGTCAGCCGTGAGTGCCAGGCTGCGGAG
+CCCAGATTCTCTGGGCTGagaaagagcactctgtccagagtgtggaggggggcctggagg
+ggatgagactcaaagctgggaggccagagaggaggctgctagagttttctgggagagagt
+tactggggcctgaacTCCAGTGAGGCActtcccatttcacagaccaggaaactgaggccc
+aagagtgaggcaactggcccaagggcacacagccaggtaaggcagaacCTTCCTTCTTTG
+GAGCTCCCTTGGGTGGGAAGCTGTGGGCTCCCCTTCATAGCCCACCCTTTTGGCTGTATC
+TCCCTGCTGCCCTGGGCATATGCTCCCTTATTCTGTCCTCCCTTGAAGCTGACTGCTGGC
+CTTAAAGGGCCCCTGTTTCTTCCCTCTGGACTACATGAGATCGGGATAGTATTAATGACT
+AAAACCTACCAGGGGTTTCTAGGCCTGGCCTGGAAAAAGTGACTGTTGACAAACAAAGTG
+CAGAGATTTAAAATCTCCTCTGTCTCAATTAGTGGAATCCAGTTAGAGGTTTGAACTATG
+ATTCTACCAGAATCCAATCTCTCTGGGTAGCCAGGTACCCAGGATGGGGCTAAAATTCCA
+GATGGATAGGTTGTCAACACCAGTGAGGAACCAGGAGGCTACCACAGGGTGGGACTTCCT
+GGTTTGGCTTTACATCTGAACTTCAGGGATCCCAGATCCTGGATCTGGGGCACTTGTCCA
+GAGAAGGCTATTGCTCTCATGTCACAAATGAGATGACTAAGACCCCCAAATCAATTCCAG
+TTCACTCACAAGCATTTCCTGGGCAGTGGAGACCCCTGCCCCACCTGTTGGCACCCCCTC
+AGCTCCCCACAGGGAATTGGAGTCCAGCCAAGCATGAGGAGGCTGTTGGCCTCAAGGTGA
+GCAGGGATGGGCTGAACCTCACCCAGTAAGGCAAGGACAGAGCCAGGGTTGGCCTGAGAT
+TTCCAGCCACCCTTTCCAAGGCTCTGCTCACTGTTATTTTCCTTAGTCTacaacaatatc
+aataacaataacaacaataatatcaacaCAAAAAGTGAAATACTCACCATGTATTGTAGT
+GTTTCCAAGGTGTCATGTAATGCCAGGGGTAGTTTGGGGCCAGGAAAAATATTTTTGGGA
+GGCATAAGAATAGGATGGACTGATATTGATATGCAACAGTTTGATCTGGTCCTCCTCTGA
+ATATCTGGGCTGGTAATTTGTACCAGTTTCCCTCGCTTTtgtgcataggcactgtgctga
+acccttttgtatgcatgaactcatccgattctctgtgcaagaactctatgagattattat
+tcccgttttacaagtaagaaaaattgaggctctgagaagttaaataaatgacttgtatga
+agttccagtgctaattaataagtgaaggagccagggcttgaactccggcccatctgactg
+caaagccagtgcccttcctcctacacATCTTCCTTTGGATTTCCACCACTGAGCATATGT
+AAGGTTGGGCAAACAGCCTGCATGAACAATCGCTGCTTTTATATCATGCACAAGTTTGGT
+CTTTTCTGCCTGTGCCCATGTCCTTGTAACCTTCTGAACCAAACTCCCCAGTGCCTGGGA
+ACATCAGAAGACTTGACTCTCTTCTCCTTTCACTAGCCTCCACCTGACTGGGACAAAGCC
+ATGCAGAGAGCTAGTGCTCCCTTCCTGCTAGACTTCAAGGATGCCTGGTTCCTGTGCCCC
+ATCTCCATCCAGCCCTCTCTTCTACACCTGGTGACTGAGCCTCTCCTTCAGTTTCTCCAT
+CCAGAAGGGGGTGAAAGCAACTGCCTAGTGTCCTTCCCTGGTGATAGTGGAGCACGGGGG
+ACAGGGTGTTTGGGCAAAAGGTGCCCAAGGTGAGGTGCCCAACACAACCTCCTACTCAGA
+CGATTGAGCAGACATTCAGCCTCATCTGGGGACTGGGTTACCAGTGGGTTAGTGGGAGGC
+ATTGGGCCCAGGCCCTGTGCCTTGGGCTGAGCTACAAGAAACCCACACATGGGATGAATT
+CAGGCAGCTCAAGGCCAGGTCTGTGCATACGCCAGTAATAGGTTCAGGTTAATCCACATG
+TCGCGATTTGGAAGGTGTCTACTTTTCCTACCTGTAGCTTCCTTAGGCCTCAAACCCCTA
+CTCAGCTGGGTCTGCCAGACTGAGATGGAGCCAGGGTGGAATCTTCTGCCCTCAAATCCC
+TGTCAGCCCTGGTGGTGCCGGGAGCGCCATCACTATTGGGTCTTAAAGGCTTTCCAGCCT
+TCCACTATGGATCCAGGAGCAGCAGTAGCCCCTTTGGTCTTTCTCTCTCATCAGGACATC
+TCCACTCATGGTTCCAGTCAGAGCTTCTTGAAAGTAGTCCCACTCTGTTCAAAAGCCTCC
+CATGCCCCCTGCTAGCCTCAGGCTAAGAGCCCTTCTCCTTCGCACAGCCTTTGGACCTGT
+CTATTTTTATGGTCTGGAAACTTCAGGAACACTGATAGCTGAGCATCTGGCACATATTAT
+GCACTCAAAAACCATGTATTTCTTTCTCCTTCCCTTTGGGACCCGTAAACCAGGGactgg
+acatttttgcaagagacaggagctgtgactgtgcattcactgctgtatccccagcaccca
+gcactgggcctgccacacagtaagtgcttagtaaatgtttgttgactgagtgaTTGCAGC
+TGGGGCCAAGAATGCCTTGGACACCCCAAGTAGGCCGTGTTAGAAGGAGTCAGTGAGAGC
+CTGGGAGCCCAGCCCAGAATTGTTTTCTTGACCCAGAAGCCAGGGCCAGGGATGCCTCTT
+CACTTCTGTTTGGCCCTCTTGGGCTTAGGGGCAGGGGCATTAAGATGAgagaggtccttg
+gggtgcattgagtctaacctcccagttcctcccattctacagccaggaaaactgaggccc
+agggaggggtaggacaagcccaagAAAGTGGGGCTGGAGAATGAATCCTGGAGACCAAAC
+TTGTCAGTCTGGATTGCTGTTGCCCTCATCCTCGCCTCCAAAACCCatgggtaaactgag
+gccaagagaggggcaggggcatgcccaaggtcacccatggaatcaggggacagggcctgg
+attgggattgttgttgacgccattattactgtttattgttgtttctatttcacAGATGGT
+CGGGGAGGGTGGGGCCCGCAATGGCTCCCAGGCGCCCAGAGACCCTGGAGGGTGAGCAGG
+GTCTCCCCTCCCCTCTCCTGCCCGTCTTTAGCCACACTGGGGCGCACACCGCTCACTCAC
+CCGGGGCCGAGGCGTTAGCCCTTTCTTGCACCAGGTGCCGCAACAACACCAGCAGCTGGC
+GCAGGCTGTGCTGCTGGTCCTGCAGGAGGCTGGAGTTGTGCCTGACACCGCGCAGGCCGC
+GCTCGATGTTGGTGAGGGCGGCGCTCTGGCGGCTCAGCGTGTTCAGCAGCTTCGCCTTCT
+TGCTGAGGATGCTGGCCAGCTCCTCCTGCTGCTTGGTCTCCAGGGCCTGCAACCGCTTCT
+CGAGCGCGCTGCGGGGTAGGGGGCGCACAGAGGTGAGCCTGGCATCCTCGCGAAGCACGC
+ACCCCCGCGCGCCTCCCCGGCCCTGGAGTCCCTGCAGCCCGACGATGAGACTCAAGTGTG
+GTGGAACGTCCTGTGCCCACTGTAGGCACAGATTGAGGAGGGGAGAAAAGAGATACCCGG
+CCCTGGAGTAATATAGATTGAGGTTTAGTGGAAGAAAGAGGTGGTGTGGGAGGGACACCA
+GCAACTGGGTAGCTATTATCAAATCCCAACTGTGCTTGCTTTTTGACCCAGCAGTCTACT
+CTCAGGAGTTATCCTCTAGGGAGCATGGTCAAGAATGGCCCTGGGGACTTGCTTGTAATA
+GAAAAACAAACTAACCAACCAACCAAAAAAGATATAGCCTAGATGCCCAAAAGCCAGGAC
+TGGCTGAATCCGTTGCGTTTTGGCGTCCCTGGAAATGTTCCGCAGTCATGAAGGAGGAGG
+CATTTGCACAGAATTGGAAAGATGCCCAGGACTTGGGGCACATCAAGCCTAACCCCATGT
+GTGGCAAGAGAAGAAAGAAAGTATTAATGTAAATAAAGAGAAATGGGGTGAACATATAGG
+AGAAGGCTGGAAAGACCGCAGTGGTGCCTGTGTTTGGGAAGAATATGAAAGAAATTCCCT
+CAAGTGCTGTGACTTCTGCAGAGCAGGTTTGAGTGGATGAAGATGGAGAGGAGGAAGATG
+GGGGCAGGATGGAGGGCCCAACTTTCACTTTTATTTTGTACAGGTTCCTGTTGTCCGATG
+ATATTATAATAATCAAGAGACATTTTTTGTAATGGATTTAGAAGCAAAGAGGAGTTTTTC
+AAAAGAAAGCCTTAGACTCAGCTCTTTCTTTTTGGACATTTTATCCTCCAGATTTACTca
+catgtgtgtgaaatgagatatggaaatgttactcatcgtatcactggttggattagtaaa
+aggctggaagcaacctcaatatccattaactggggactggaggaataaaagcagggacca
+catatggtggagcattataa
Binary file test-data/test1.vcf.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Wed Jun 15 09:45:05 2022 +0000
@@ -0,0 +1,10 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#test1	Test-Genome	./test-data/test1.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/model.loc.sample	Wed Jun 15 09:45:05 2022 +0000
@@ -0,0 +1,10 @@
+#Enter the model names in the second column, the platform name in the second and the path in the third.
+#This file lists the locations and dbkeys of all the model files
+#under the "models" directory (a directory that contains a directory
+#for each build). This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>  <display_name>   <platform> <file_path>
+#
+#So, all_model.loc could look something like this:
+#test_model	"the_model_name"	r941_prom_hac_g360+g422	$(dirname $(which run_clair3.sh))/models/r941_prom_hac_g360+g422
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Jun 15 09:45:05 2022 +0000
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of indexes in the Bowtie mapper format -->
+    <table name="model" comment_char="#">
+        <columns>value, name, platform, path</columns>
+        <file path="tool-data/model.loc" />
+    </table>
+    <table name="all_fasta" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Wed Jun 15 09:45:05 2022 +0000
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of model folders -->
+    <table name="clair3_models" comment_char="#">
+        <columns>value, name, platform, path</columns>
+        <file path="${__HERE__}/test-data/clair3_models.loc" />
+    </table>
+    <!-- Locations of reference genome files in fasta format -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+</tables>