Repository 'jasminesv'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/jasminesv

Changeset 0:630e2929a131 (2021-01-20)
Next changeset 1:2b62154e39c8 (2021-04-29)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/jasminesv/ commit eb5baa10589b31c422ec8b8980617a3f375608ad"
added:
jasminesv.xml
macros.xml
test-data/a.vcf
test-data/all_fasta.loc
test-data/b.vcf
test-data/c.vcf
test-data/chr_norm_file.txt
test-data/d.vcf
test-data/genome.fa
test-data/out1.vcf
tool_data/all_fasta.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 630e2929a131 jasminesv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jasminesv.xml Wed Jan 20 19:49:40 2021 +0000
[
b'@@ -0,0 +1,197 @@\n+<?xml version="1.0"?>\n+<tool id="jasminesv" name="JasmineSV" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">\n+    <description>Merge structural variants across samples</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+\n+    <expand macro="requirements"/>\n+    <expand macro="version_command"/>\n+\n+    <command detect_errors="exit_code"><![CDATA[\n+    #if $dup_to_ins.dup_to_ins:\n+        @REF_FASTA@\n+    #end if\n+\n+    jasmine\n+    ## Optional params\n+    \'max_dist=${max_dist}\'\n+    #if float($max_dist_linear) != 0.0:\n+        \'max_dist_linear=${max_dist_linear}\'\n+    #end if\n+    \'min_dist=${min_dist}\'\n+    \'kd_tree_norm=${kd_tree_norm}\'\n+    \'min_seq_id=${min_seq_id}\'\n+    \'k_jaccard=${k_jaccard}\'\n+    \'max_dup_length=${max_dup_length}\'\n+    \'min_support=${min_support}\'\n+    threads=\\${GALAXY_SLOTS:-4}\n+    \'spec_reads=${spec_reads}\'\n+    \'spec_len=${spec_len}\'\n+    #if $dup_to_ins.dup_to_ins:\n+        \'genome_file=reference.fa\'\n+    #end if\n+\n+    ## Flags:\n+    \'${ignore_strand}\'\n+    \'${ignore_type}\'\n+    #if $dup_to_ins.dup_to_ins:\n+        \'${dup_to_ins}\'\n+    #end if\n+    \'${mark_specific}\'\n+    \'${pre_normalize}\'\n+    \'${use_edit_dist}\'\n+    \'${preprocess_only}\'\n+    \'${postprocess_only}\'\n+    \'${keep_var_ids}\'\n+    \'${use_end}\'\n+    \'${output_genotypes}\'\n+    \'${ignore_merged_inputs}\'\n+    \'${centroid_merging}\'\n+    \'${clique_merging}\'\n+    \'${allow_intrasample}\'\n+    \'${normalize_type}\'\n+    \'${leave_breakpoints}\'\n+    \'${require_first_sample}\'\n+\n+    \'${normalize.normalize_chrs}\'\n+    #if $normalize.normalize_chrs and $normalize.chr_norm_file:\n+        \'chr_norm_file=${normalize.chr_norm_file}\'\n+    #end if\n+\n+    ## Required args\n+    file_list=\'${vcffilelist}\'\n+    out_file=\'${out_vcf}\'\n+    ]]></command>\n+    <configfiles>\n+        <configfile name="vcffilelist">#\n+#for $vcf_file in $vcf_list:\n+${vcf_file}\n+#end for\n+        </configfile>\n+    </configfiles>\n+    <inputs>\n+        <!--TODO in future versions (?)-\n+        add IrisSV support for post-processing. For now just make it accessible as a separate tool and allow users to run independently\n+        -->\n+        <!--\n+            Input files\n+        -->\n+        <param name="vcf_list" type="data" multiple="true" format="vcf" label="VCF file(s) to merge" help=""/>\n+        <!--\n+            Params\n+        -->\n+        <param argument="max_dist" type="integer" value="1000" min="0" label="The maximum distance variants can be apart when being merged" help="Setting both max_dist_linear and max_dist sets thresholds to minimum of max_dist and max_dist_linear * sv_length"/>\n+        <param argument="min_dist" type="integer" value="-1" min="-1" label="The minimum distance threshold a variant can have when using max_dist_linear" />\n+        <param argument="max_dist_linear" type="float" value="0." min="0.0" label="Make max_dist this proportion of the length of each variant" help="Setting both max_dist_linear and max_dist sets thresholds to minimum of max_dist and max_dist_linear * sv_length"/>\n+        <param argument="kd_tree_norm" type="integer" value="2" min="1" label="The power to use in kd-tree distances (1 is Manhattan, 2 is Euclidean, etc.)" />\n+        <param argument="min_seq_id" type="float" value="0." min="0." label="The minimum sequence identity for two insertions to be merged" />\n+        <param argument="k_jaccard" type="integer" value="9" min="1" label="The kmer size to use when computing Jaccard similarity of insertions" />\n+        <param argument="max_dup_length" type="integer" value="10000" min="0" label="The maximum length of duplication that can be converted to an insertion" />\n+        <param argument="min_support" type="integer" value="1" min="1" label="The minimum number of callsets a variant must be in to be output" />\n+        <param argument="spec_reads" type="integer" value="10" min="1" label="The minimum number of reads a variant needs to be in the specific callset" '..b'rging" type="boolean" checked="false" truevalue="--centroid_merging" falsevalue="" label="Require every group to have a centroid which is within the distance threshold of each variant" />\n+        <param argument="--clique_merging" type="boolean" checked="false" truevalue="--clique_merging" falsevalue="" label="Require every group to have each pair within in it be mergeable" />\n+        <param argument="--allow_intrasample" type="boolean" checked="false" truevalue="--allow_intrasample" falsevalue="" label="Allow variants in the same sample to be merged" />\n+        <param argument="--normalize_type" type="boolean" checked="false" truevalue="--normalize_type" falsevalue="" label="Convert all variants to INS/DEL/DUP/INV/TRA" />\n+        <param argument="--leave_breakpoints" type="boolean" checked="false" truevalue="--leave_breakpoints" falsevalue="" label="Leave breakpoints as they are even if they are inconsistent" />\n+        <param argument="--require_first_sample" type="boolean" checked="false" truevalue="--require_first_sample" falsevalue="" label="Only output merged variants which include a variant from the first sample" />\n+        <conditional name="normalize">\n+            <param argument="--normalize_chrs" type="select" checked="false" label="Whether to normalize chromosome names" help="(to NCBI standards - without \'chr\' - by default)">\n+                <option value="--normalize_chrs">Normalize chromosome names</option>\n+                <option value="" selected="true">Don\'t normalize chromosome names</option>\n+            </param>\n+            <when value="--normalize_chrs">\n+                <param name="chr_norm_file" type="data" format="txt,tsv" value="" label="A file containing chromosome name mappings" optional="true"/>\n+            </when>\n+            <when value=""/>\n+        </conditional>\n+    </inputs>\n+    <outputs>\n+        <!-- standard -->\n+        <data name="out_vcf" format="vcf" label="${tool.name} on ${on_string}: Result"/>\n+    </outputs>\n+    <tests>\n+        <!-- #1 default -->\n+        <test expect_num_outputs="1">\n+            <param name="vcf_list" value="a.vcf,b.vcf" ftype="vcf"/>\n+            <output name="out_vcf" file="out1.vcf"/>\n+        </test>\n+        <test expect_num_outputs="1">\n+            <param name="vcf_list" value="c.vcf,d.vcf" ftype="vcf"/>\n+            <conditional name="normalize">\n+                <param name="normalize_chrs" value="--normalize_chrs"/>\n+                <param name="chr_norm_file" value="chr_norm_file.txt"/>\n+            </conditional>\n+            <output name="out_vcf" file="out1.vcf"/>\n+        </test>\n+        <test expect_num_outputs="1">\n+            <param name="vcf_list" value="a.vcf,b.vcf" ftype="vcf"/>\n+            <conditional name="dup_to_ins">\n+                <param name="dup_to_ins" value="--dup_to_ins"/>\n+                <conditional name="reference_source">\n+                    <param name="reference_source_selector" value="history"/>\n+                    <param name="ref_file" ftype="fasta" value="genome.fa"/>\n+                </conditional>\n+            </conditional>\n+            <output name="out_vcf" file="out1.vcf"/>\n+        </test>\n+        <test expect_num_outputs="1">\n+            <param name="vcf_list" value="a.vcf,b.vcf" ftype="vcf"/>\n+            <conditional name="dup_to_ins">\n+                <param name="dup_to_ins" value="--dup_to_ins"/>\n+                <conditional name="reference_source">\n+                    <param name="reference_source_selector" value="cached"/>\n+                    <param name="ref_file" value="jasmine"/>\n+                </conditional>\n+            </conditional>\n+            <output name="out_vcf" file="out1.vcf"/>\n+        </test>\n+\n+    </tests>\n+    <help><![CDATA[\n+.. class:: infomark\n+\n+**What it does**\n+\n+@WID@\n+\n+**Input**\n+\n+- Multiple VCF files to be merged\n+\n+**Output**\n+\n+- Merged Variants (VCF)\n+\n+**References**\n+\n+@REFERENCES@\n+    ]]></help>\n+    <expand macro="citations"/>\n+</tool>\n'
b
diff -r 000000000000 -r 630e2929a131 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Jan 20 19:49:40 2021 +0000
[
@@ -0,0 +1,66 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">1.0.11</token>
+    <token name="@VERSION_SUFFIX@">0</token>    
+    <token name="@PROFILE@">20.01</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">jasminesv</requirement>
+        </requirements>
+    </xml>
+    <xml name="version_command">
+        <version_command>jasmine</version_command>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">@online{jasmine,
+              author = {Melanie Kirsche},
+              title = {jasmine},
+              year = 2021,
+              url = {https://github.com/mkirsche/Jasmine},
+              urldate = {2021-01-13}
+            }</citation>
+        </citations>
+    </xml>
+    <!--
+        Command
+    -->
+    <token name="@REF_FASTA@"><![CDATA[
+        #if $dup_to_ins.reference_source.reference_source_selector == 'history':
+            ln -f -s '$dup_to_ins.reference_source.ref_file' reference.fa &&
+        #else:
+            ln -f -s '$dup_to_ins.reference_source.ref_file.fields.path' reference.fa &&
+        #end if
+    ]]></token>
+
+    <xml name="reference">
+        <conditional name="reference_source">
+            <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
+                <option value="cached">Use a built-in genome</option>
+                <option value="history">Use a genome from history</option>
+            </param>
+            <when value="cached">
+                <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
+                    <options from_data_table="all_fasta">
+                        <filter type="sort_by" column="2"/>
+                        <validator type="no_options" message="No reference genomes are available"/>
+                    </options>
+                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="ref_file" type="data" format="fasta,fastq" label="Use the following dataset as the reference sequence" help="You can upload a FASTA or FASTQ sequence to the history and use it as reference"/>
+            </when>
+        </conditional>
+    </xml>
+    <!--
+        Help
+    -->
+
+    <token name="@WID@"><![CDATA[
+*Jasmine*, or Jointly Accurate Sv Merging with Intersample Network Edges is a tool used to merge structural variants (SVs) across samples. Each sample has a number of SV calls, consisting of position information (chromosome, start, end, length), type and strand information, and a number of other values. Jasmine represents the set of all SVs across samples as a network, and uses a modified minimum spanning forest algorithm to determine the best way of merging the variants such that each merged variants represents a set of analogous variants occurring in different samples.
+]]></token>
+    <token name="@REFERENCES@"><![CDATA[
+More information is available in the `github <https://github.com/mkirsche/Jasmine>`_.
+    ]]></token>
+</macros>
b
diff -r 000000000000 -r 630e2929a131 test-data/a.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/a.vcf Wed Jan 20 19:49:40 2021 +0000
b
@@ -0,0 +1,15 @@
+##fileformat=VCFv4.1
+##contig=<ID=1,length=400>
+##contig=<ID=2,length=400>
+##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
+##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
+##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=IS_SPECIFIC,Number=1,Type=String,Description="Whether or not a variant has enough read support and length to be specific">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+1 100 1 CACGTACGTACGTACGTACGTACGTACTGACGTACGT C . PASS PRECISE;CHR2=1;END=136;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-36;STRANDS=+-;RE=12;IS_SPECIFIC=1 GT 1/1
+1 200 2 C CACGTACGTACGTACGTACGTACGTACTGACGTACGT . PASS PRECISE;CHR2=1;END=200;SVTYPE=INS;SUPTYPE=AL;SVLEN=36;STRANDS=+-;RE=10;IS_SPECIFIC=1 GT 1/1
+1 300 3 N ]2:1000000]N . PASS PRECISE;CHR2=2;END=3000000;SVTYPE=BND;SUPTYPE=AL;SVLEN=1;STRANDS=-+;RE=15;IS_SPECIFIC=1 GT 1/1
b
diff -r 000000000000 -r 630e2929a131 test-data/all_fasta.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc Wed Jan 20 19:49:40 2021 +0000
b
@@ -0,0 +1,19 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
+jasmine jasmine jasmine ${__HERE__}/genome.fa
\ No newline at end of file
b
diff -r 000000000000 -r 630e2929a131 test-data/b.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/b.vcf Wed Jan 20 19:49:40 2021 +0000
b
@@ -0,0 +1,15 @@
+##fileformat=VCFv4.1
+##contig=<ID=1,length=400>
+##contig=<ID=2,length=400>
+##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
+##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
+##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=IS_SPECIFIC,Number=1,Type=String,Description="Whether or not a variant has enough read support and length to be specific">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+1 100 1 CACGTACGTACGTACGTACGTACGTACTGACGT C . PASS PRECISE;CHR2=1;END=132;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-32;STRANDS=+-;RE=12;IS_SPECIFIC=1 GT 1/1
+1 205 2 C CACGTACGTACGTACGTACGTACGTACTGACGTACGTACGT . PASS PRECISE;CHR2=1;END=205;SVTYPE=INS;SUPTYPE=AL;SVLEN=40;STRANDS=+-;RE=10;IS_SPECIFIC=1 GT 1/1
+1 300 3 N ]2:1000000]N . PASS PRECISE;CHR2=2;END=3000000;SVTYPE=BND;SUPTYPE=AL;SVLEN=1;STRANDS=-+;RE=15;IS_SPECIFIC=1 GT 1/1
b
diff -r 000000000000 -r 630e2929a131 test-data/c.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/c.vcf Wed Jan 20 19:49:40 2021 +0000
b
@@ -0,0 +1,15 @@
+##fileformat=VCFv4.1
+##contig=<ID=1,length=400>
+##contig=<ID=2,length=400>
+##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
+##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
+##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=IS_SPECIFIC,Number=1,Type=String,Description="Whether or not a variant has enough read support and length to be specific">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+chr1 100 1 CACGTACGTACGTACGTACGTACGTACTGACGTACGT C . PASS PRECISE;CHR2=1;END=136;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-36;STRANDS=+-;RE=12;IS_SPECIFIC=1 GT 1/1
+chr1 200 2 C CACGTACGTACGTACGTACGTACGTACTGACGTACGT . PASS PRECISE;CHR2=1;END=200;SVTYPE=INS;SUPTYPE=AL;SVLEN=36;STRANDS=+-;RE=10;IS_SPECIFIC=1 GT 1/1
+chr1 300 3 N ]2:1000000]N . PASS PRECISE;CHR2=2;END=3000000;SVTYPE=BND;SUPTYPE=AL;SVLEN=1;STRANDS=-+;RE=15;IS_SPECIFIC=1 GT 1/1
b
diff -r 000000000000 -r 630e2929a131 test-data/chr_norm_file.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chr_norm_file.txt Wed Jan 20 19:49:40 2021 +0000
b
@@ -0,0 +1,2 @@
+chr1 1
+chr2 2
b
diff -r 000000000000 -r 630e2929a131 test-data/d.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/d.vcf Wed Jan 20 19:49:40 2021 +0000
b
@@ -0,0 +1,15 @@
+##fileformat=VCFv4.1
+##contig=<ID=1,length=400>
+##contig=<ID=2,length=400>
+##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
+##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
+##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=IS_SPECIFIC,Number=1,Type=String,Description="Whether or not a variant has enough read support and length to be specific">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+chr1 100 1 CACGTACGTACGTACGTACGTACGTACTGACGT C . PASS PRECISE;CHR2=1;END=132;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-32;STRANDS=+-;RE=12;IS_SPECIFIC=1 GT 1/1
+chr1 205 2 C CACGTACGTACGTACGTACGTACGTACTGACGTACGTACGT . PASS PRECISE;CHR2=1;END=205;SVTYPE=INS;SUPTYPE=AL;SVLEN=40;STRANDS=+-;RE=10;IS_SPECIFIC=1 GT 1/1
+chr1 300 3 N ]2:1000000]N . PASS PRECISE;CHR2=2;END=3000000;SVTYPE=BND;SUPTYPE=AL;SVLEN=1;STRANDS=-+;RE=15;IS_SPECIFIC=1 GT 1/1
b
diff -r 000000000000 -r 630e2929a131 test-data/genome.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome.fa Wed Jan 20 19:49:40 2021 +0000
b
@@ -0,0 +1,20 @@
+>1
+TAACCCTAACACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC
+TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCAA
+CACGTACGTACGTACGTACGTACGTACTGACGTACGT
+AACCCTAACCCAA
+CCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAAC
+CCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCT
+CACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTAAACCC
+TAACCCTAACCCTAACCCTAACCCTAACCCCAACCCCAACCCCAACCCCA
+ACCCCAACCCCAACCCTAACCCCTAACCCTAACCCTAACCCTACCCTAAC
+>2
+TAACCCTAACACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC
+TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCAA
+CACGTACGTACGTACGTACGTACGTACTGACGTACGT
+AACCCTAACCCAA
+CCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAAC
+CCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCT
+CACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTAAACCC
+TAACCCTAACCCTAACCCTAACCCTAACCCCAACCCCAACCCCAACCCCA
+ACCCCAACCCCAACCCTAACCCCTAACCCTAACCCTAACCCTACCCTAAC
\ No newline at end of file
b
diff -r 000000000000 -r 630e2929a131 test-data/out1.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out1.vcf Wed Jan 20 19:49:40 2021 +0000
b
@@ -0,0 +1,27 @@
+##fileformat=VCFv4.1
+##contig=<ID=1,length=400>
+##contig=<ID=2,length=400>
+##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
+##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
+##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=IS_SPECIFIC,Number=1,Type=String,Description="Whether or not a variant has enough read support and length to be specific">
+##INFO=<ID=SUPP_VEC,Number=1,Type=String,Description="Vector of supporting samples">
+##INFO=<ID=SUPP_VEC_EXT,Number=1,Type=String,Description="Vector of supporting samples, potentially extended across multiple merges">
+##INFO=<ID=SUPP,Number=1,Type=String,Description="Number of samples supporting the variant">
+##INFO=<ID=SUPP_EXT,Number=1,Type=String,Description="Number of samples supporting the variant, potentially extended across multiple merges">
+##INFO=<ID=IDLIST,Number=.,Type=String,Description="Variant IDs of variants merged to make this call (at most 1 per sample)">
+##INFO=<ID=IDLIST_EXT,Number=.,Type=String,Description="Variant IDs of variants merged, potentially extended across multiple merges">
+##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="">
+##INFO=<ID=STARTVARIANCE,Number=1,Type=String,Description="Variance of start position for variants merged into this one">
+##INFO=<ID=ENDVARIANCE,Number=1,Type=String,Description="Variance of end position for variants merged into this one">
+##INFO=<ID=AVG_START,Number=1,Type=String,Description="Average start position for variants merged into this one">
+##INFO=<ID=AVG_END,Number=1,Type=String,Description="Average end position for variants merged into this one">
+##INFO=<ID=AVG_LEN,Number=1,Type=String,Description="Average length for variants merged into this one">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+1 100 0_1 CACGTACGTACGTACGTACGTACGTACTGACGTACGT C . PASS PRECISE;CHR2=1;END=136;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-36;STRANDS=+-;RE=12;IS_SPECIFIC=1;STARTVARIANCE=0.000000;ENDVARIANCE=4.000000;AVG_LEN=-34.000000;AVG_START=100.000000;AVG_END=134.000000;SUPP_VEC_EXT=11;IDLIST_EXT=1,1;SUPP_EXT=2;SUPP_VEC=11;SUPP=2;SVMETHOD=JASMINE;IDLIST=1,1 GT 1/1
+1 200 0_2 C CACGTACGTACGTACGTACGTACGTACTGACGTACGT . PASS PRECISE;CHR2=1;END=200;SVTYPE=INS;SUPTYPE=AL;SVLEN=36;STRANDS=+-;RE=10;IS_SPECIFIC=1;STARTVARIANCE=6.250000;ENDVARIANCE=6.250000;AVG_LEN=38.000000;AVG_START=202.500000;AVG_END=202.500000;SUPP_VEC_EXT=11;IDLIST_EXT=2,2;SUPP_EXT=2;SUPP_VEC=11;SUPP=2;SVMETHOD=JASMINE;IDLIST=2,2 GT 1/1
+1 300 0_3 N ]2:1000000]N . PASS PRECISE;CHR2=2;END=3000000;SVTYPE=BND;SUPTYPE=AL;SVLEN=1;STRANDS=-+;RE=15;IS_SPECIFIC=1;STARTVARIANCE=0.000000;ENDVARIANCE=0.000000;AVG_LEN=1.000000;AVG_START=300.000000;AVG_END=3000000.000000;SUPP_VEC_EXT=11;IDLIST_EXT=3,3;SUPP_EXT=2;SUPP_VEC=11;SUPP=2;SVMETHOD=JASMINE;IDLIST=3,3 GT 1/1
b
diff -r 000000000000 -r 630e2929a131 tool_data/all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data/all_fasta.loc.sample Wed Jan 20 19:49:40 2021 +0000
b
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
\ No newline at end of file
b
diff -r 000000000000 -r 630e2929a131 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Jan 20 19:49:40 2021 +0000
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>
\ No newline at end of file
b
diff -r 000000000000 -r 630e2929a131 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Wed Jan 20 19:49:40 2021 +0000
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+</tables>
\ No newline at end of file