Repository 'irissv'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/irissv

Changeset 0:30fc9f2bcbe4 (2021-01-19)
Next changeset 1:1d4f065fa0ef (2021-05-17)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/irissv/ commit 80a64f32dbd465d72a10e69a749def733dea8ffc"
added:
irissv.xml
macros.xml
test-data/all_fasta.loc
test-data/genome.fa
test-data/iris.bam
test-data/sniffles.vcf
test-data/test_out.vcf
tool-data/all_fasta.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 30fc9f2bcbe4 irissv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/irissv.xml Tue Jan 19 20:30:21 2021 +0000
[
@@ -0,0 +1,118 @@
+<?xml version="1.0"?>
+<tool id="irissv" name="Iris" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
+    <description>Refine insertion sequences</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+
+    <command detect_errors="exit_code"><![CDATA[
+    @REF_FASTA@
+
+    iris
+        genome_in=reference.fa
+        vcf_in='${in_vcf}'
+        reads_in='${in_bam}'
+        vcf_out='${out_vcf}'
+        threads=\${GALAXY_SLOTS:-4}
+        padding_before='${padding_before}'
+        padding_after='${padding_after}'
+        genome_buffer='${genome_buffer}'
+        min_ins_length='${min_ins_length}'
+        max_ins_dist='${max_ins_dist}'
+        max_out_length='${max_out_length}'
+        max_len_change='${max_len_change}'
+        '${aligner}'
+        '${rerunracon}'
+        '${also_deletions}'
+        '${keep_long_variants}'
+    ]]></command>
+    <inputs>
+        <!--
+            Required arguments
+        -->
+        <expand macro="reference"/>
+        <param name="in_vcf" type="data" format="vcf" label="The VCF file with variant calls/supporting reads"/>
+        <param name="in_bam" type="data" format="bam" label="The BAM file containing the reads"/>
+        <!--log_out param doesn't seem to produce output for some reason, commenting out.-->
+        <!-- <param name="output_log" type="boolean" label="Output log file?" checked="true"/> -->
+        <!--
+            Optional arguments
+        -->
+        <param argument="padding_before" type="integer" value="1" min="1" label="The number of bases to output before the variant in REF/ALT fields" />
+        <param argument="padding_after" type="integer" value="0" min="0" label="The number of bases to output after the variant in REF/ALT fields" />
+        <param argument="genome_buffer" type="integer" value="100000" min="1" label="The genome region on each side of the SV to align assembled reads to" />
+        <param argument="min_ins_length" type="integer" value="30" min="1" label="The min length allowed for a refined insertion sequence" />
+        <param argument="max_ins_dist" type="integer" value="100" min="0" label="The max distance a refined insertion call can be from its old position" />
+        <param argument="max_out_length" type="integer" value="100000" min="0" label="The max length of variant which will be output" />
+        <param argument="max_len_change" type="float" value="0.25" min="0.0" label="The max proportion by which a variant's length can change" />
+        
+        <param name="aligner" type="select">
+            <option value="" selected="true">Use minimap2 for mapping</option>
+            <option value="--pacbio" selected="true">Use minimap2 for mapping in PacBio mode</option>
+            <option value="--hifi">Use minimap2 in hifi mode</option>
+        </param>
+        <param argument="--rerunracon" type="boolean" truevalue="--rerunracon" falsevalue="" checked="false" label="Use racon twice for consensus finding" />
+        <!--
+            Flags
+        -->
+        <param argument="--also_deletions" type="boolean" truevalue="also_deletions" falsevalue="" checked="false" label="also try to refine deletion positions/lengths"/>
+        <param argument="--keep_long_variants" type="boolean" truevalue="--keep_long_variants" falsevalue="" checked="false" label="output original VCF line for very long variants instead of ignoring them"/>
+    </inputs>
+    <outputs>
+        <!-- standard -->
+        <data name="out_vcf" format="vcf" label="${tool.name} on ${on_string}: Result"/>
+        <!--log_out param doesn't seem to produce output for some reason, commenting out.-->
+        <!-- <data name="out_log" format="txt" label="${tool.name} on ${on_string}: Log">
+            <filter>output_log</filter>
+        </data> -->
+    </outputs>
+    <tests>
+        <!-- #1 default -->
+        <test expect_num_outputs="1">
+            <conditional name="reference_source">
+                <param name="reference_source_selector" value="history"/>
+                <param name="ref_file" ftype="fasta" value="genome.fa"/>
+            </conditional>
+            <param name="in_vcf" value="sniffles.vcf"/>
+            <param name="in_bam" value="iris.bam"/>
+            <output name="out_vcf" file="test_out.vcf"/>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="reference_source">
+                <param name="reference_source_selector" value="cached"/>
+                <param name="ref_file" value="iris"/>
+            </conditional>
+            <param name="in_vcf" value="sniffles.vcf"/>
+            <param name="in_bam" value="iris.bam"/>
+            <output name="out_vcf" file="test_out.vcf"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+@WID@
+
+
+**Input**
+
+- FASTA file containing the reference genome
+
+- VCF file with variant calls & supporting reads determined by Sniffles
+
+- BAM file containing the reads
+
+**Output**
+
+- VCF file with refined insertion sequences
+
+**References**
+
+@REFERENCES@
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 30fc9f2bcbe4 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Jan 19 20:30:21 2021 +0000
[
@@ -0,0 +1,68 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">1.0.4</token>
+    <token name="@PROFILE@">18.01</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="1.9">samtools</requirement>
+            <requirement type="package" version="@TOOL_VERSION@">irissv</requirement>
+        </requirements>
+    </xml>
+    <xml name="version_command">
+        <version_command>iris</version_command>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">@online{iris,
+              author = {Melanie Kirsche},
+              title = {iris},
+              year = 2021,
+              url = {https://github.com/mkirsche/Iris},
+              urldate = {2021-01-13}
+            }</citation>
+        </citations>
+    </xml>
+
+    <!--
+        command
+    -->
+
+    <token name="@REF_FASTA@"><![CDATA[
+        #if $reference_source.reference_source_selector == 'history':
+            ln -f -s '$reference_source.ref_file' reference.fa &&
+        #else:
+            ln -f -s '$reference_source.ref_file.fields.path' reference.fa &&
+        #end if
+    ]]></token>
+
+    <xml name="reference">
+        <conditional name="reference_source">
+            <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
+                <option value="cached">Use a built-in genome</option>
+                <option value="history">Use a genome from history</option>
+            </param>
+            <when value="cached">
+                <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
+                    <options from_data_table="all_fasta">
+                        <filter type="sort_by" column="2"/>
+                        <validator type="no_options" message="No reference genomes are available"/>
+                    </options>
+                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="ref_file" type="data" format="fasta,fastq" label="Use the following dataset as the reference sequence" help="You can upload a FASTA or FASTQ sequence to the history and use it as reference"/>
+            </when>
+        </conditional>
+    </xml>
+    <!--
+        Help
+    -->
+
+    <token name="@WID@"><![CDATA[
+*irissv*, or Implement for Refining Insertion Sequences is a tool which corrects the sequences of structural variant calls (currently only insertions). It uses FalconSense to obtain consensus sequences of the reads surrounding each variant and aligns these sequences back to the reference at the insertion site, resulting in an insertion which takes into account the aggregate information of all supporting reads.
+    ]]></token>
+    <token name="@REFERENCES@"><![CDATA[
+More information is available in the `github <https://github.com/mkirsche/Iris>`_.
+    ]]></token>
+</macros>
b
diff -r 000000000000 -r 30fc9f2bcbe4 test-data/all_fasta.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc Tue Jan 19 20:30:21 2021 +0000
b
@@ -0,0 +1,19 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
+iris iris iris ${__HERE__}/genome.fa
\ No newline at end of file
b
diff -r 000000000000 -r 30fc9f2bcbe4 test-data/genome.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome.fa Tue Jan 19 20:30:21 2021 +0000
b
b'@@ -0,0 +1,1583 @@\n+>chr1\n+AATATGCCCATATTACAGATAAGAAAATTAAAAATTAAGGCTTACAGAGTTTATTACATGCTTATAGATG\n+GACCTAGGGTAATGTTGGATACTTAGAGTGGGATCACATTAAAGCCTCTACAGCAGTGGTCCCCAACCTT\n+TTTGGCACCAGGGACCGGTTTTGTGGAAGATAATTTTTCCACAGACGGGGGTGGAAGTGTATGGTTTCGG\n+GATGAAACCGTTCCATCTCAGATCATCAGACATTAGTTAGATTCTCATAAGGAGCGGGCAACCTAGATCC\n+CTCACATGTGCAGTTCACAATAGGATTCATGCTGGTATGAGAATCTGATGCCGCCACTGATCTGACAGAT\n+GCTCGCCTGTTGCTCACCTCCTCCTGTGTGGCCTGGTTCCTAATAGGCCATGGACAGTACCGGTCTCTGG\n+CCCAGGGGTGGGGCCCCCTGCTCTACAGCATTCTACTTCATCGGTAAAATCTTTGAAAATGTCTTCTAGA\n+AATATTTTTAAGGTCACTTTACCTTTTGATCATTGAATATTGGGTCAATTTTGTTGTTGTTGTTGTTGTT\n+GTTGAGACAGGGTCTCGTTATGTTGCCCAGGCTGGATGGAGTGTGGTGGTGCAGTAACGGCTCACTGTAG\n+CTTCAACTTCCTGGGTGCAAATGATCTTTCTGCCTCAGCTTCCTGAATAACTGGGACTACAGGCGCACGC\n+TACCATACTTGGCTAATTTTTTGTATTTTTAGTAGAGATAAGACCTCATTATGTTGCCCAGGCTGGTCTC\n+AAACTCCTGGGCTCAAGTGATCCTCCTACCTCAGCCTCCCAAAGTGCTGAGATTACAAGTGTGAGCTACT\n+GTGCTCAGCTGACATAATCTTTTGATGTATGAATGGACCCATCTCTCTTGCTGCCCCTGCAACACCAGAT\n+ACTGGGACTCATCGATTTCTGATGATGAAAGATGTCGTTCTCATGGGAGTCTTTATGTTATACAAATGGA\n+GAGCAAAGTTCCCAAACATGCTGCCGTGGTTGATGTGTTCTGTCATCTGGTAATAACTGGAGTTATTACA\n+GTCATCCAAGTGAGAGAAGTGGTGGTTGAACCAGGGTGGTCACGATGGTGGTGGGGAGAAGCCATATTTG\n+GAAAGTAGAACTGATAGGATACATCAGATGTGGAGGATGAAAGAAAAAAGCCAAAGATGGCTCCCAGATC\n+ATCTGTGGAATGTTGCTTGTAGGGTCCAGGTCTGGAGTATGCTGTGTGTCCAGAGCAACCTAGATTGAAG\n+AAAAGACCTCAGGCACAGAAGCTGTAAAAGAGGCCTTGAAAAGCAGCTTGAAAAGGTGGATTATAGGAAG\n+TAAAACAATATGAAGTCTATAATACTTTTAAAAGAGTTATTTTCTGACACTAGTCCTCAAAAATATTCCT\n+GGAGTCCATTCTTTGCAGGAAAAGTGGCGACTTGTCCTAGACAAAGCCTACGTGCTGATTCCATGTGACC\n+CTGGGGGTGGGTACTAAATTCGTAACAGCTTGTGGGTGAAAATATGGTTTATATGTTGAATGTGTTCCTT\n+AATTACTGTAATTCTGTTTTTATGTGCATGGTTTCTGTGAAACAAATTAGAAGTCCAACACACTATTTTA\n+GCTCAGTTGACCTTGATACAAGAACTGCTGTGTTAATTATGCCCAGTCTTTCAGTAGTACAATCCTCTTT\n+TTTTTTTCTGTGAATGTTAGTAAAATCTACTGGTATTTTATGTATATATTGCACAGCTATACACTGTTTT\n+TCTTTTTATGCTCTTGAAAATGGAATTTAATCAGAGTTGTACATGACACATGGAGGGCATTCATTTGAGT\n+CCTTGGTATAGGGCAGTATAGAGGATCCATAAGACATGGTCCAAGCACTTTGAGGATATATGATGTTGGG\n+GGAGCATCAGAGATTTGTTGACACATTTAAGTAAGGACTTGAATGTCGTACTTTGACTGTTGGCAAGAAA\n+TAGGCTCTTGGCTGGGTCCAGTGGCCCACACTTGTAATCCTAGCACTTTGGGAGGCCAAAACAGGAGAAT\n+TGCTTCAGCCCAGGAGTTCGAGACCAGCTGGAGCAACCTAGGGAGACCCTGTCTCTACAAAAAAATAAAC\n+AAATTAGCCGTGCATGGTGGCACGTGCCTGTGGTCCCAGTCACTCAGGAGGCTGAGGTGGGAGGATCACT\n+TGAGCCCAGGAGGCTGAGGATGCGTGAGCTGTAATCTCACCACTGCACTCCAGCCTAGGGCGACAGAGCA\n+AGACCCTGTCTCTAAAAGAGAAAAGAAACAGAATTCTTACATTGTGCTTTAGTTTATCAGTTAGTGAACT\n+TTTCTTTTTTTGTTGTTGCTTTTTATTTTTTGTGGAGATGGGGTCTCACTATGTTGCCCAGGCTAGTCTC\n+GAACGCGTAGGCTCAGATGATCCTCCCACCTTGGCCTCCTAAAGTGTTGAGATTACAGGCATGAGCCACT\n+GAACCTGGGCCTGGTGTGTTTTTCTTACCTTGGTTTAAAGATCAAATGGAATATTATCATACCTACTGAA\n+TATTGAGAGTTTAAAACAGGGTAAAATACGTATTTAAGAAAAATTAACGTATGTGTAAGTGTTTCACTCC\n+TTTAATTCTGTAAACTAGGTATTACCTCAATTTTAGAGACGACACAGACGTGCAGTCATACCCAAGGTCG\n+TAAAAAACACCTAAATACAGTATTTCTTAACTTTTTTTTTTTTTTTGAAACGGAGCCTCGCTCTGTTGCC\n+CAGGGTGGGGTGCAGTGGTGCAATCTCAGGTCATTGCAACCTCTGCCTTCTGGTTTCAAGCAGATCTCTT\n+GCCTCAGCCTCCCAAGTAGCTGGGGTTACAGGTGCCCACCACCACGCCCAGCTAATTTTTGTAAATACAG\n+TATTTTTAAATTGTTCTTTCATATGTTTAGAATATTAAGCCAGGCATGGTAGCCCATGACTTTGGGAGGG\n+CAAGGAAAGAGGATTGCTTGAGCCAAGAGTCTCAGTCCAGCCTAGGCAACATGGCAAAGCCCCATCTCTG\n+CAAAAAAAATTAAAATTTGGCTTGGCATGGTAGTGCACACCTATAGGCCCAATTACTTGGGAGGCAGAGG\n+TGAGAGGTTCACTTGAGCCCAGGAGTTGCAGGCTGCAGTAAGCCATGATTGAGCCACTGTACTCCAGCCT\n+GGGTGACAGACCGTGACGCTGTCTGGGGAAAACAAATGGATTAGTTGTTGATAGAAGCAGTTATTCTAAA\n+AGCAGCTTGGGGGCGGGTGCAGTGGCTCACACGTGTAATCCTAGCAATTTGGGATGCCGAGGCGGGTGGA\n+TCACCTGAGGTAAGGATCCTGTTCAAGGCCAGCCTGGCCAACATGGTGAAACCCTGTCTCTAGTAAAATA\n+CAAAAATCAGCCAGGCATGATGGCGGGTGCCTGTAATCCTAGCTACTGGGGAGGCTGAGACAGGAGAATC\n+GCTTGAACCTAGGAGACAGCGGTTGCAGTGAGCCGAGATCGTGCCACTGCACTCCAGCCTGGGCAGCTGA\n+GCTAGACTGTCTCAAAAAAATAAATAAATAAAATAAAAGCAGCTTGGTTTTGAAGATATAAGTTAGGAAA\n+ATCTATTAGTTTCAGAAGCTTTAAAAGTCTGGGAGAAAGGAATATTGAAGAAGAGGAAAAGGATTTCATG\n+TGAATTGAGGAAGTTAACTCATTGGTGTAGTTTTTCCTTTCTTTTTTTTTTTTTTTACTAAGGAAAAGAG\n+AACAAAGCGATAAAATCTGAGTAGTCTTTCAGTCATTCTTGGTAGAATTGGTAGCTAAAAGGCTGAGTGA\n+AATATGGCTTAAAAGCTTTATTGGCTGGGCGCGGTGGCTCACCCCTGTAATCCCAGCACTTTGGGTGGCT\n+TAGGTGGGTGGA'..b'AAATTATGAGGAAAAAGCTGCGGGGAAGGGGCAGATGATTATCAGTTTACATGTCTTT\n+GGGCATTTTAGGTAGAGTGAAATCTAGGCCTTGCAAATCATATCTCTTTAGAATCATTCAAAATTTGCTT\n+AGGAAGAGCAGAATTGTTGATTGTTTATTCAATGTGTATGCTAAGTAACATGTTTTATAGAAGAGTTATA\n+ATTTTTTTCCTATTTTCCTATTGTATATTTGTTTTTTAATACTCTGGGATCAAGTATACAGTGAATATGT\n+ATAGTACATTTATCCCTGTTTTATAAAAAAATGGGAAACTAAATTTGTTGTAAATGATTAAGGAACTTTA\n+TGATTATAGATCTTTTGCTCAAATCTTTATGGCAATAGGTTTTACTATGTTTCTATATTGTGAGGAGGGG\n+AGCGAGTTGCCTGTGGGTTTTGGTTTTATGGTTTTAAAATATAAGCAGTGTATAAAATATTTTTATTGTG\n+TGCCATTTTTCTTTTTTTTCTTTTCTTTTTTTTTTTTTTTTGAGATGGAGTTTCGCTCTTGTTGCCCAGG\n+CTGGAGTGCAATGGCCCGATCTCGGCTCACCGCAACCTCCGCCTCCCAGGTTCAAGCAATTCTCCTGCCT\n+CAGCCTACCAAGTAGCTAGGCTTACAGGCATGCACTACTACGCCCGGCTAATTTTTGTATTTTTAGTAGA\n+GACAGGGTTTCCCCATCTTGAGGCTGGTCTCGAACTTCTGACCTCAGGTGATCCGCCCGCCTCAGCCTCC\n+CAAAGTGCTGGGATTACAGGCCTGAGCCACCGCGCCCGGCCCTCCTTTTTTTTTTTTTTTGAGACGGAGT\n+CTTGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGTGATCTCTGCTCACTGCAAGCTCCGCCTCCCGGGTT\n+CACGCCATTCTCCTGCCTCGGCCTCCCGAGTAGCTGGGACTACAGGCGCCCACCACCACGCCTGGCTAAT\n+TTTTTGTATTTTTAGTAGAGACGGGGTTTCACTGTTTTAGCCAGGATGGTCTCAATCTCCTGACTTCGTG\n+ATCCGCCCGCCTCGGCCTCCCAAAGGACTGGGATTACAGGCATGAGCCACCGCGCCCGGCCGCCATTTTT\n+CTTAATTGTTGCTCTAGGAGGCTTTTAGAGCGGAAGATTGTGCTGAAGATGTCACTGGGATTTTTAACAA\n+GCCCTAAACGTGCTTGGTTACTAAAGTTAATATAAAGTTACAGTGTTGCCACACAGATAACATCAATTAT\n+AGTTGCAGGCAAAATCTTTAGTAGTTTGCCAACAACAGTATAATTACTAGACACTGAGATATTAGATAAA\n+TTTATTCTGGGGAATTAAAAGGCGTTTAAATACTAATCTTGTTTTGACTGCCTTATTTTCTAGGGCTCAG\n+TAATTAAAGTGTTATTTCTGCATCTTTATGCTAAAGTAGCAAAGTTGTGAATAAGGCAGGGAGAAAAGGG\n+GGTAAAATTAGGGAATTTAGTAGACTGTCTTAAGTCTTAACTTGAAACTTCTTCAAGAAAGAGAAGATGG\n+TCAGTTGTGGTTCATCAGTAAACTAAGACCTTTGAAACATTTTGTGATAGTCATTTTGATAGCATTTACG\n+TTTATCTTGGATAAAAGTCTTCAAGTCAGCCTTTATGGAAATTATAGTGTCCTGCCCTTCTCCAGCTCTC\n+CTAAGGTTAGTGTTGGCTACACCATTAGGAATAAAAAACCTGAAGACAAAAGGGAAAAGATACAAAAGTT\n+GTATGGAGAGGATAAAGGAAGGATCATTTTTCCTTTGGACTTGGAAAAAAATACATTTAGGTCTTAGATG\n+TTAAAGCTGCCTTTTAGATTTATGAAGGAAAATTTAGGAAAACAGCCTTGAAACCTCATGGGATTTGGAT\n+GGTAACCAAGAGGCAAGGCCTGGAGGTGAGAGTGAGCAGAGCAAGTTATTGGGGAAATCAGGAGATAGCC\n+TGAACTCATTCTTCTAGAGAGTCCTGTAAGTGGTTATGATGATAATCATTGTAATAAAGGTAGGGAATAT\n+AACGTGACTGACTACTTCAAACAGTTCATTTTCTTGGCTCTTAGTACCCCATACTCTCCTAATTGCCTTC\n+CTTCCTCCCAGGTTGGTTATTCTTTACCTCCTTCGGTTAAAGTCCACCAGGCTTTTGTCCTCAGATCTCT\n+CTCTCCAGAAAAGAGAGACGCCCTAGATGATTTCTTCTAGTTCTGGGGGCTTAAATGTCATTGAGATGGT\n+GATAACTCCCAGATTCACTTCTGTAGCCTGGACCTCTCTCTAGAAGTATAGACTTGGCTACCCAGCTGCC\n+AACTGAACTCTACTTGAAGGCCACAGAAGCATCTCAAACTCAACATGTCCTAAACATAACTATTGTTTTT\n+TCCCTTCCTGACCACCCAGAGTGGCTCCTCCTTTATTTTTATCAATTCTTTTAGTTGCCTAGACCAAAAA\n+TATTGGAGTCATTCTTACCTTCTCCGTTTTTCTCATAGGCCACATATATCAATCAGTAAGTCTTGCCATC\n+TTTGCCTTCACAATATATCCTGAATCAGTTACTTTTTGCCATTTCTACTCTAGTCAAGATAAGTACTGCA\n+ATAGCCTCCTGTCTCTCTGCTTTTGCCCTTGTTTCCATGCAGTAGCCAGTCATATTTTAGAAATATACAT\n+CAGGGCTGGGTGTGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGTGGGCGGATCACGAG\n+GTCAAGAGTTGGAGACCAGCCTGGCCAACGTGGTGAAACCCCGTCTCTACTAAAAATACAGAAATTAGCC\n+GGGCATGGTGGTGCACGCCCATAGTCCCAGCTACTCAGGAAGCCGAGGCAGGAGGATCGCTTGAACCTGG\n+GAGGCGAAGTTTGCAGTGAGCTGAGATCGTGCCACTGCACTCTAGCCTGCTGGGCGACAGAGTGAGACTC\n+TGTCTCAAAGAAAAAAAAGAAATATAAATCAGATTATGTCACCCCTTTGCAATCAGATCATGTCACCCCT\n+CTGCTTAAAATTCTTCACTGGTTTCCTGTCACATTTATTTCATTTTAATGTAATTTTAAATTTTTTTTGT\n+AGTCAGAGTCAGAGCCTCTGTCGCCCAGGCTGGAGTGCAGTGGCATGATCTAGTCACACTTGCAGCCTCT\n+GCCTCCCAGGTTCAAGCTGTTCTCGTGTCTCAGCCTCCCAAATAGCTGGGACTACAGGTGCGTGCTACCA\n+CACCCAGCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTTGCCATGTTGGCCAGGCTGGTATCGAACTC\n+CTGAGCTCAAGCAATATGTCCACCTAGGCCTCCCAAAGTGCTAGGATTACAGGTGTGAACCACTGCGACC\n+AGTCCTTATCACTTTTAGAATAAAACTAAAAATCCTCACTTTGGCCACACAGCCTCACAGAATTTGGCCT\n+CTGGCTAGCTATCTGATTTCACATTTGATCATTGTTTGCCCACCTGGCTATACTGCTGGTCTTGCTGTTC\n+CTTAAATTTCATTCAGATTCCTGCTCAGATATCTCCTCAGAGGTGGCAGCTTGTTTGCTTTTATCTAAAA\n+TAGCCACTTCTACCACTTTCACTGTGATCTTTTATCTCCTAACCCGGCTTTATTTTTCTTTCCAACATTA\n+TGCTATTTGTATTAGTTAATTGTCTCATGCTGTCATTAGAATGTAAGCTCCATGTAGGCAGGGACTTGTT\n+TTCACTAGGTATCTATAGGTCATGGTAGACATGCAAGTATTTGTTAAATGAAAGAATATTTTGTACATTT\n+ACTTTATGCTAGGCCTTGAGCCAAAATGTGAGAATTAAATGACAGTTCTGTATAGTTAGGCACTATTATT\n+CAGCAGAACTCAAGCGCAGAGCATCAGACTCAGAGCATCTAGCGACTCAT\n'
b
diff -r 000000000000 -r 30fc9f2bcbe4 test-data/iris.bam
b
Binary file test-data/iris.bam has changed
b
diff -r 000000000000 -r 30fc9f2bcbe4 test-data/sniffles.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sniffles.vcf Tue Jan 19 20:30:21 2021 +0000
b
@@ -0,0 +1,37 @@
+##fileformat=VCFv4.1
+##source=Sniffles
+##fileDate=20190906
+##contig=<ID=chr1,length=110720>
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
+##ALT=<ID=TRA,Description="Translocation">
+##ALT=<ID=INS,Description="Insertion">
+##FILTER=<ID=UNRESOLVED,Description="An insertion that is longer than the read and thus we cannot predict the full size.">
+##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
+##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends">
+##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
+##INFO=<ID=REF_strand,Number=2,Type=Integer,Description="Length of the SV">
+##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=RNAMES,Number=.,Type=String,Description="Names of reads supporting SVs (comma separated)">
+##INFO=<ID=SEQ,Number=1,Type=String,Description="Extracted sequence from the best representative read.">
+##INFO=<ID=STD_quant_start,Number=A,Type=Float,Description="STD of the start breakpoints across the reads.">
+##INFO=<ID=STD_quant_stop,Number=A,Type=Float,Description="STD of the stop breakpoints across the reads.">
+##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Float,Description="Kurtosis value of the start breakpoints across the reads.">
+##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Float,Description="Kurtosis value of the stop breakpoints across the reads.">
+##INFO=<ID=SUPTYPE,Number=A,Type=String,Description="Type by which the variant is supported.(SR,ALN,NR)">
+##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency.">
+##INFO=<ID=ZMW,Number=A,Type=Integer,Description="Number of ZMWs (Pacbio) supporting SV.">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads">
+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT aln_sorted.bam
+chr1 6930 0 N GGGGGGCGGCCCCCCCCGGGGGGGGCCCCCCCCCGGGGGGGGGCCCCCCCGCGGGGGGGGGCCCCCCCCGGGGG . PASS PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=6930;STD_quant_start=0.000000;STD_quant_stop=1.346291;Kurtosis_quant_start=8.663097;Kurtosis_quant_stop=3.560172;SVTYPE=INS;RNAMES=CHR1_1354_+,CHR1_1732_-,CHR1_2052_-,CHR1_2093_-,CHR1_2095_+,CHR1_2109_-,CHR1_2620_+,CHR1_2797_-,CHR1_3081_-,CHR1_3198_+,CHR1_3222_-,CHR1_3277_+,CHR1_3405_+,CHR1_3951_+,CHR1_3993_+,CHR1_4076_+,CHR1_4421_+,CHR1_4453_+,CHR1_4485_-,CHR1_4606_+,CHR1_4654_-,CHR1_4663_+,CHR1_4698_+,CHR1_4981_-,CHR1_5004_+,CHR1_5087_-,CHR1_5139_+,CHR1_5805_-,CHR1_5942_-,CHR1_5979_-,CHR1_601_-,CHR1_686_-;SUPTYPE=AL,SR;SVLEN=72;STRANDS=+-;RE=32;REF_strand=32;AF=1 GT:DR:DV 1/1:0:32
+chr1 13790 1 ACCATGTTGGCCAAGATGTTCTCGATCTCCTGACCTTGTGATCTGCCTGCCTCGTCCTCCCAAAGTGCTG N . PASS PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=13860;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=5.511580;Kurtosis_quant_stop=7.842635;SVTYPE=DEL;RNAMES=CHR1_10314_-,CHR1_10536_-,CHR1_10726_+,CHR1_10836_-,CHR1_10943_-,CHR1_11036_+,CHR1_11496_+,CHR1_11511_+,CHR1_11624_+,CHR1_11653_-,CHR1_11724_+,CHR1_11735_-,CHR1_11742_+,CHR1_11912_+,CHR1_11924_-,CHR1_12188_+,CHR1_12517_+,CHR1_12570_-,CHR1_12685_+,CHR1_13137_+,CHR1_13143_+,CHR1_13164_+,CHR1_13172_+,CHR1_13264_+,CHR1_13410_+,CHR1_13416_+,CHR1_13449_-,CHR1_320_+,CHR1_3277_+,CHR1_4421_+,CHR1_4654_-,CHR1_5139_+,CHR1_5979_-,CHR1_7604_-,CHR1_7697_+,CHR1_7747_-,CHR1_7838_-,CHR1_8144_+,CHR1_8491_-,CHR1_8598_+,CHR1_9029_+,CHR1_9687_-,CHR1_9723_+,CHR1_9750_+,CHR1_9968_-;SUPTYPE=AL;SVLEN=-70;STRANDS=+-;RE=45;REF_strand=45;AF=1 GT:DR:DV 1/1:0:45
b
diff -r 000000000000 -r 30fc9f2bcbe4 test-data/test_out.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out.vcf Tue Jan 19 20:30:21 2021 +0000
b
@@ -0,0 +1,39 @@
+##fileformat=VCFv4.1
+##source=Sniffles
+##fileDate=20190906
+##contig=<ID=chr1,length=110720>
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
+##ALT=<ID=TRA,Description="Translocation">
+##ALT=<ID=INS,Description="Insertion">
+##FILTER=<ID=UNRESOLVED,Description="An insertion that is longer than the read and thus we cannot predict the full size.">
+##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
+##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends">
+##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
+##INFO=<ID=REF_strand,Number=2,Type=Integer,Description="Length of the SV">
+##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=RNAMES,Number=.,Type=String,Description="Names of reads supporting SVs (comma separated)">
+##INFO=<ID=SEQ,Number=1,Type=String,Description="Extracted sequence from the best representative read.">
+##INFO=<ID=STD_quant_start,Number=A,Type=Float,Description="STD of the start breakpoints across the reads.">
+##INFO=<ID=STD_quant_stop,Number=A,Type=Float,Description="STD of the stop breakpoints across the reads.">
+##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Float,Description="Kurtosis value of the start breakpoints across the reads.">
+##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Float,Description="Kurtosis value of the stop breakpoints across the reads.">
+##INFO=<ID=SUPTYPE,Number=A,Type=String,Description="Type by which the variant is supported.(SR,ALN,NR)">
+##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency.">
+##INFO=<ID=ZMW,Number=A,Type=Integer,Description="Number of ZMWs (Pacbio) supporting SV.">
+##INFO=<ID=IRIS_PROCESSED,Number=1,Type=String,Description="Whether or not a variant has been considered by Iris for refinement">
+##INFO=<ID=IRIS_REFINED,Number=1,Type=String,Description="Whether or not a variant has been refined by Iris">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads">
+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT aln_sorted.bam
+chr1 6932 0 A ATGGGGGGAGGCCCCCCCCGGGGGGGGGCCCCCCCCAGGGGGGGGGCCCCCCCCGGGGGGGGCCCCCCCCCGGGGGG . PASS PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=6932;STD_quant_start=0.000000;STD_quant_stop=1.346291;Kurtosis_quant_start=8.663097;Kurtosis_quant_stop=3.560172;SVTYPE=INS;RNAMES=CHR1_1354_+,CHR1_1732_-,CHR1_2052_-,CHR1_2093_-,CHR1_2095_+,CHR1_2109_-,CHR1_2620_+,CHR1_2797_-,CHR1_3081_-,CHR1_3198_+,CHR1_3222_-,CHR1_3277_+,CHR1_3405_+,CHR1_3951_+,CHR1_3993_+,CHR1_4076_+,CHR1_4421_+,CHR1_4453_+,CHR1_4485_-,CHR1_4606_+,CHR1_4654_-,CHR1_4663_+,CHR1_4698_+,CHR1_4981_-,CHR1_5004_+,CHR1_5087_-,CHR1_5139_+,CHR1_5805_-,CHR1_5942_-,CHR1_5979_-,CHR1_601_-,CHR1_686_-;SUPTYPE=AL,SR;SVLEN=76;STRANDS=+-;RE=32;REF_strand=32;AF=1;IRIS_PROCESSED=1;IRIS_REFINED=1 GT:DR:DV 1/1:0:32
+chr1 13790 1 CACCATGTTGGCCAAGATGTTCTCGATCTCCTGACCTTGTGATCTGCCTGCCTCGTCCTCCCAAAGTGCTG C . PASS PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=13860;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=5.511580;Kurtosis_quant_stop=7.842635;SVTYPE=DEL;RNAMES=CHR1_10314_-,CHR1_10536_-,CHR1_10726_+,CHR1_10836_-,CHR1_10943_-,CHR1_11036_+,CHR1_11496_+,CHR1_11511_+,CHR1_11624_+,CHR1_11653_-,CHR1_11724_+,CHR1_11735_-,CHR1_11742_+,CHR1_11912_+,CHR1_11924_-,CHR1_12188_+,CHR1_12517_+,CHR1_12570_-,CHR1_12685_+,CHR1_13137_+,CHR1_13143_+,CHR1_13164_+,CHR1_13172_+,CHR1_13264_+,CHR1_13410_+,CHR1_13416_+,CHR1_13449_-,CHR1_320_+,CHR1_3277_+,CHR1_4421_+,CHR1_4654_-,CHR1_5139_+,CHR1_5979_-,CHR1_7604_-,CHR1_7697_+,CHR1_7747_-,CHR1_7838_-,CHR1_8144_+,CHR1_8491_-,CHR1_8598_+,CHR1_9029_+,CHR1_9687_-,CHR1_9723_+,CHR1_9750_+,CHR1_9968_-;SUPTYPE=AL;SVLEN=-70;STRANDS=+-;RE=45;REF_strand=45;AF=1;IRIS_PROCESSED=1;IRIS_REFINED=0 GT:DR:DV 1/1:0:45
b
diff -r 000000000000 -r 30fc9f2bcbe4 tool-data/all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample Tue Jan 19 20:30:21 2021 +0000
b
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
\ No newline at end of file
b
diff -r 000000000000 -r 30fc9f2bcbe4 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Jan 19 20:30:21 2021 +0000
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>
\ No newline at end of file
b
diff -r 000000000000 -r 30fc9f2bcbe4 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Tue Jan 19 20:30:21 2021 +0000
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+</tables>
\ No newline at end of file