Repository 'basil'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/basil

Changeset 0:e6ef29001647 (2019-05-30)
Next changeset 1:77fc7640abc7 (2024-08-11)
Commit message:
planemo upload commit b89c8017aeef91f940543a1cc7dadb4a85290865
added:
basil.xml
test-data/basil.vcf
test-data/ref.fa
test-data/simulated.bam
b
diff -r 000000000000 -r e6ef29001647 basil.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/basil.xml Thu May 30 21:14:58 2019 -0400
[
@@ -0,0 +1,61 @@
+<tool id="basil" name="basil" version="1.2.0">
+    <description>Breakpoint detection, including large insertions</description>
+    <requirements>
+        <requirement type="package" version="1.2.0">anise_basil</requirement>
+    </requirements>
+    <version_command>basil --version 2>&amp;1 | grep 'basil version' | cut -f 3 -d ' '</version_command>
+    <command detect_errors="aggressive"><![CDATA[
+        ln -s '$ref' 'ref.fa' &&
+        ln -s '$bam' 'in.bam' &&
+        ln -s '$vcf' 'out.vcf' &&
+        basil
+        --input-reference 'ref.fa'
+        --input-mapping 'in.bam'
+        --out-vcf 'out.vcf'
+        --oea-min-support-each-side '$min_oea_each_side'
+    ]]></command>
+    <inputs>
+        <param name="ref" argument="--input-reference" type="data" format="Fasta" label="Reference Sequence File" help="FASTA file with the reference."/>
+        <param name="bam" argument="--input-mapping" type="data" format="sam,bam" label="Alignment File" help="SAM/BAM file to use as the input."/>
+        <param name="min_oea_each_side" argument="--oea-min-support-each-side" type="integer" value="2" label="Minimum supporting reads, each side" help="Smallest number of OEA (one-end-anchor) reads on each side to support an insertion.  In range [1..inf].  This is the minimum number of supporting reads (without mapped partners) on each side of an insertion breakpoint required to not be filtered." />
+    </inputs>
+    <outputs>
+        <data name="vcf" format="vcf" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="ref" value="ref.fa"/>
+            <param name="bam" value="simulated.bam"/>
+            <param name="min_oea_each_side"  value="2"/>
+            <output name="vcf" file="basil.vcf"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+        BASIL is a method to detect breakpoints for structural variants (including insertion breakpoints) from aligned paired HTS reads in BAM format.  Use BASIL to analyze BAM files for tentative insertion sites.
+
+        Note that BASIL will in general detect all kinds of breakpoints, e.g. for inversions on real-world data.
+
+        BASIL VCF fields
+
+        A typical line in BASIL might look as follows.
+
+        1 5001 site_0 T <INS>   . PASS IMPRECISE;SVTYPE=INS GSCORE:CLEFT:CRIGHT:OEALEFT:OEARIGHT    46.4256:10:12:35:32
+
+        The first seven columns are as usually in VCF files (ref name, 1-based position, reference base, abbreviation for long insertion, no assigned quality, passing all filters, imprecise insertion SV).
+
+        The eighth column contains the names of the score values given in the ninth column:
+
+        GSCORE Geometric mean of the sum of "1 + $score" for all of the following scores.
+        CLEFT Number of clipping signatures supporting the site from the left side.
+        CRIGHT Number of clipping signatures supporting the site from the right side.
+        OEALEFT Number of OEA alignments supporting the site from the left.
+        OEARIGHT Number of OEA alignmetns supproting the site from the right.
+
+        Generally, one should filter for a minimum support of OEA records on each side, e.g. a value of 10 makes sense for a 30x coverage and showed good results on simulated data.
+
+        For a ranking, GSCORE is a suitable measure but we did not develop any statistical model for BASIL matches and it is a mean of pseudocounts only. It carries no statistically precise meaning.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btv051</citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r e6ef29001647 test-data/basil.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/basil.vcf Thu May 30 21:14:58 2019 -0400
b
@@ -0,0 +1,15 @@
+##fileformat=VCFv4.1
+##source=BASIL
+##reference=ref.fa
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=OEA_ONLY,Number=0,Type=Flag,Description="Breakpoint support by OEA signature only">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##FORMAT=<ID=GSCORE,Number=1,Type=String,Description="Sum of Geometric score means (see BASIL documentation)">
+##FORMAT=<ID=CLEFT,Number=1,Type=String,Description="Clipped alignments supporting call from left side.">
+##FORMAT=<ID=CRIGHT,Number=1,Type=String,Description="Clipped alignments supporting call from right side.">
+##FORMAT=<ID=OEALEFT,Number=1,Type=String,Description="One-end anchored alignments supporting call from left side.">
+##FORMAT=<ID=OEARIGHT,Number=1,Type=String,Description="One-end anchored alignments supporting call from right side.">
+##contig=<ID=1,length=10000>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT individual
+1 5001 site_0 T <INS> . PASS IMPRECISE;SVTYPE=INS GSCORE:CLEFT:CRIGHT:OEALEFT:OEARIGHT 46.4256:10:12:35:32
b
diff -r 000000000000 -r e6ef29001647 test-data/ref.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ref.fa Thu May 30 21:14:58 2019 -0400
b
b'@@ -0,0 +1,144 @@\n+>1\n+CTGTAACTAAACACCGCTGTCCGGACTAAGGTCCGCATATTGTTCAGGTTTTATACAATGAGCGTCGAGC\n+CAGTTCAATGTGGCCCTAAACCTACTCAAGAAGTATCCCGAGGGATGCGAAGCTGCCGATGAGAGTTAGT\n+TGACATTCCGAGTCCGTGGATGGGTCCTCCTTCTTAGCGAATTACCGTCCATTTGCATGACGGATGCCTT\n+GCACTTGGTAACAGTAGCAATGTATAGGAAACAAAGAATCGAGTTCAACAGGTCGCATTCAGTGCCTTGC\n+GCACAAATATATAGGAGGAAACTCGCAACAGAATGCTATGACCAGTTTATAGAACTGTGCCGTAGACGGA\n+GTGCCAATCAGAAATAACCATAATGGGTTGCCAGGGTGTATGACAGGTTCTTGATAGTCCCAGAGCTTTC\n+CGTAACAAAGGAACTTTCACAACGATTGCCTCGTTATATTCCGCTGCACTGCATGACATTCATTGGAATG\n+GAAACAGCTGATTTCCTAGATAGCGCCGGGTATATTGTTACCGCCCCACACGCAGGCATCGGACCACATA\n+TTTTGGCACCTGGTTAGAGAGCTAGACTCGATCGAATAACTTTAGGGAAGCTCCATGTCATGTCCAACAC\n+CCGAAGAGTACAGGACATGTCAAAGGGAGGATTAAAGTTGACCCTATCGATGTATGTCAAGGGTGCTGAG\n+TTGCCCCAGGCTTGCGTATCCTACTGGCTTGATTGGGTACCTAGAGGTACCGTTGCGCATCGGCTCACAA\n+CGTTCAATGCAGTCCCAAGATACGGTGTGATGTCAGGACCTAAGCGACCCATGATCGCTGCTTCCTTGGA\n+AGAGAGAGCGACATTTCCGTCTCGCGCATTCTTTAGTAAGGGAGGAACTTTTTGTGAAAGATGGTGTTGG\n+GCTTCGAGACATTTATCGTGCCTTCTGGCTTACTCAGATTACGCTGAAGCCTTTGGGCCTGCCCATGACT\n+GTTGCAATCATTTGTCTGAAACACCAATGACTTAACGAATGCCCACAAGACTGAAGGTTATCGATCTTGG\n+AGAAAACTCCTGATTCTATAGGGACCATCTCAGGCCCGACAATCCTTAACCGGATCCTAACAGGGTTCAC\n+ATGTTCATGGGTCGTAGTAACCTGTGGCGTGACCGTGGCATAAAGACGTCCGTCCAATTCCTGAGAAAAG\n+ACACCGAAATGGTGAAAGAGTGGGGACCTCCGTAGACAACTTACATCGCCACGCAAGCTTATGTGTGCGT\n+CAGCTACTTCATTGACCACAAATTCGGCCGAAAGATGGCGCTATTGATTTATGGATTAAGGGGCACGCAG\n+TTGTACTTATTCGGCGTAGGTTCCAATCTAGTGGGATTCCTATGGGCCGAAGTCTGAGAAACCTTAGACA\n+AACGGTCACCCATGCGCCGCACCTCCGGTGCCTCCAGTAACATAGTTTGTCCTCAGGTTTGAAATGCTGG\n+GTTTGGTAAGTGAACCTTAATATCCGCTTGCGATTGTCCATGCGGACCGGATTTCAAGAAAGGAGATAGA\n+TTGATCCAGGAACCATCTATCGTACATCGGCCTCGCCCTGCACGCAGACCTGCAGTCATCTTATGATGCG\n+TCCCGTGCAAACCCTCCGTTATCTAGGAGCGGCAGCAGCGCACGAAGAGCGGGAAGATGATTATCTCCCG\n+AGCTCTTGTGCCGAAGGGCATTGTGGAACATTTGGAACGGGGCTCGGATCTCGCTCACTAAAATCTAAGG\n+ATCGAGGAGACAAAGTCATTCGGCAAGACAAACTCACGTCCGTGGTTGCAACTGATTAGACTTTATGGAT\n+GAACGCTATGCTGCTACCGAACCGACGACCCGTGGTCACCAGCTATCCGCTCAAGACATTATATCATTAA\n+TCCGAGACCCCTGCAGACACACGTCTCGGCTATTGTTGAGAGCCCTTCCTGCTCCATACCCCCGGCACGT\n+ATAATAAACCGAGTACTATGTCCCGGCGGATTCCGTCTTACCTATCTACGAGATCCAGGTGCCGTAGCAT\n+AAAGCTATGCGGGACGTTCTACTCGTTCTAACAGAACCTCAGCCATTGTAACCCACTTTTGTTGCATCCT\n+ATTGGCAGTGGGGCGACCGCTGAGATTGGGGTAGCTTCCTAGTAACTGTGCAGCGAGGTACGTCGCAGAG\n+GGATACACTTGTTACGGCGGGGACTTTGGCAGTGACCCTAGGTTGATGTCCTAAGACGATGTAAATTGAG\n+CTATTCGACGTAGTCGGTCCCTCTAAGTGAACTTTCTATCACCGCGGCCCGAGGGGGAATTATGTGATAG\n+CGCGTGGCTCCAACATCTATTCGGGGCAAAACCGACACGTGTCGTACGTTGCGTACGTTACCTTTTCTGG\n+TTACTTATTCGACCAAGTTATTGCTTGTATCACTTTCGTTGTAAACCTATGCCCTTTTAACACAGCTACG\n+CCCATGAGTGACGTCATGACATTAGACAAAACACAGTATTGCCATTCCCACCTTCTCACATGCCCAGCAA\n+AATGGGGAACGTGTCGACTTCCTGCTGTCACAACAGAACGGAGCGTATGTGAAGAAACTGTATCCCTCGG\n+TGCCGTTCAAGACCGGTACACTTTGACGGCCAACCGCAAGTGGCGCGGGGGCTGTTAGCTAATCAGGCAT\n+ATGCATCGGCCAGCGGCGGGGCGAGACGGGTGGGTGCGGAAATAGAATCCGATGAGGTTCGTTAATCGTC\n+TTGCTTTGTGTTACATACAATAAATCGTCCCCCCCATGGTAACACGAGTTTCCATTCACATTTGGCTCTT\n+GCCCGGCAGGTGTGCGATAAGTCTTATGACTTTTTTAGACATCCACCACTTGCACGGCGTACTCTTTATT\n+CTCCATAATGCCTGTGTGATTCGCAGCTACCAATAAAGGTGCTTACGCAAGGTATTACCCAAATATCAAC\n+CCGGCCCCGCCCGCCGGACCCCAATATGTTATTCTGCCCTGTAGCGACTCCGTGGTCGTCAAGAGTCTAT\n+CACATACTCTATGGTAACTAAGATAGTGTCCGTGTCCTCTGGATAACGAAGCGACCTAACATCAAGACTC\n+AGGTAAATGGTCTTCTACTATCTTGATGTCCTGTGAGCGAGATGGATACCATTCGCCTGATCAAACGCAG\n+TAGCGTATAGGGCTAGCGGGAATTTCTTGACGGCCGCAGAGTTTCTTGTGATCGGATGACTGCTTGATAC\n+TTGGCGACGAAGCCCTGTGGTATGGGGGCCTGACTTCCGCGGACATGGTCGTAATGCCTAGGATAGTTGC\n+TACGGTGGGAACCTATTTTTATGCCCGCCAGACTCGTGTGGGACTCTATCAGTTAAGCGCGACTTCTCGC\n+AGAGAAAGATTAAGATTATGGCTTAATTCTTCTGTACATTCCTTGGTGAGGCGGCTCTCTAGAGTTGCAT\n+TTGGGTGGCATCACTCTTCCAGGTAGTTCACCTCTTGTTGCCTGCCCCAATGCGGGGCGAACTTTCGCTG\n+CTGACCGCCGTTACACAGAACCTGAAGGGACAACACACGAGACCGGAATGTAGTCTCTCTTGTCCAGTTC\n+TGGGGGCATATAGATTTATGATGTTATTCGTGCCCTTATGTTACCCATCCGTGTCGCCGTGAGCGACATC\n+CGGCTAATGTATGGGCAATGGGAATCAGGTATGACTGCGCGAGCTATCGACGCGTAGAAGGCTCACTCTG\n+CAAAAATCACCAGCACTAACGCGTAACACATTGGCTGTCAGCGCACTCGTTGATCTTTGTGTCGGGTGAT\n+TCTAAAATAGTTAGTCCGTGACCTTTGGCGGCGGCGAGAGATATGGTCCTAACCGCCGTATTTGGACAGA\n+AATTAGTCTATGCGAT'..b'TCCCTTCGGTGACTTCGCGACTGTCATCATAAGCGCCCTTTTCATGCG\n+GTATACTCTGCCGTCGAACCCGATCATATAGCCAAATCGGACAATAACACAGTGGACATCGTCCATTACA\n+GATTAGTCGTACTGTGGGGGACTCCAATTATCGTAGATGACATCTTTCGGTTCTATTACGCCGCGACACC\n+AGGAATTTGGATTCTGATAACCCGGTGCTCACGTCGTGTCGCGAACAAATCTACGAGGAGCCAGGAGCGC\n+TGAAGCCTATCAATTGGGATTACCTTCTTGAACGCTATCCGACCTGCCCTCTGTACAGTCCCGTAGACAT\n+TCATGGATCCTGGATGGGAGCCACCACTGCAAGACAGCACATTCCAGAGCTTTCCCAAGCGGGATCTTGG\n+GTACGTTCTTTCGTCTCCTTGTTTGCGTCCGCAATAGTAGTCATACCGTAAGATTTTCTTCCCGCGGAGA\n+TGGCGGCTCTCCCAGCTAGAACATTATTATAATGGCAAGGGTGGCACACCAAAGGTGCGCACCCAAGGTT\n+CACATGTCCCTTTCACGCTACCCATGAGCGGTAGTTTATCGAAGTTTAGGTGTTTAGTGCTTGAATGAGT\n+CAATACGTTGTCAGAATTCACAATAAAGATCTTCTGTGCGGACTATACACTAGCCGGAGGAACCTAAATT\n+TTTCACGGGGTAAATCCCGAGGTTCGGACATATACCTGGCTTACCCAGCGCGGGCTGACTAGCCCACTTG\n+CGCGAGCGCTGACTCAGGTAAGACAAGAACGAAGTGCAGTATCTCCGGGCTTACTCGAGCGTACACCAGG\n+ACCCCGTAAATTTGCTTACATACAGAAGGTATGCCTTGCATGCCCTTGCTCAGCTAGACGACGATGAACG\n+TAGAGTAAGTGGACTTAGGGCCAACACTCGACGTTTTCGTCTAGACCAAAGGTTTAAAAGCTTTCCGAGT\n+GTCGACTCAAAACATGGGCTAATCTGATTCGTCAGTATCCAGGCCACACTTCAGCGGGTGGAGCACCTAC\n+GATAAACTAAGAAGGTAGTTAGCAAGTGCGTTAGCTTCTGGAAGTTAATATCGGCCTTAGATCGGAGACG\n+CGACTGCCCATAGTCTATCACTTCCCAAACAGAACACTTATAAGTTAAGCGTAATATGTACGAATTTCAA\n+CGTGGGATTGCCAGAGTAGAATGTGTGTAGATTCGCAACAGATAAAAGCTTTCGAAAGCAAAGCGACAGC\n+GCGAACGTGGGTCGGAATCCACATCCGTCCAGGGTTGTAATCACCTTGCGAATATCCCAGAATCGGTGCC\n+ACCGAGGCGACTCGACCACGGATCCGATTCTTCTCATACGGATCAGCAAATTGTAATAGCGGTTTCTTAC\n+AGAGAGGTGGGCACCCTAGCCAGGCTAGGGCGCATAGCGAACCCCTGCCACCTTAGAAAAGTGATGCCTG\n+GAGTTCGACAACGCGCGGCAGAAAGGATTGAGCGTATTTTCGTCTGGGTATTCACCCTATCGCCTATGTA\n+CCGAGAAGTGAGGGAGAGGCGGCGTTCGTCTGTACGCGGCTGGATGTACTGCGAGGTCGTTTGGCCGTTT\n+GCGAGACCACCCATTCGCCGAATGGGTAAAACAAGAACTGCCGTAGGTATATTATAAATCTCGGGATAGA\n+CGGGGGTAAGTTTATGTCTCCGGTTCTTTGTAGTCGTAACTCAGGGGCCCTCGACCCTAGCGAGGCGTTT\n+GTAGTGGGACACCAGGTATCTCCCGGAATAATTCTAGCATCATGAGAGGTGGAGTCGGGATTTGATCAAC\n+CTACATCATTTTTCAGGCAGATACATGGGACCGATCACACCTCACCTCCGTGTTGTCAAACTTAAGGACA\n+ACTTAAGGTTGGCTACGCGGCCGCGGATTGCGCTTGCGCGGGAGGGGCGGGGGATATCAGCCAAACGGAG\n+AGTGCTCCGTTGATGAGATTAAGCAGATGTATAAATCATTTGAATGTATGCAATTTGTCTAGATCACCCC\n+TACACATCCTTCTTGAGTAAGTCACAAATACGCCGCAGGTTTGGCGGTTTTTGTCCCCGGCACCTCACTA\n+GATTTTCATCCAACATACGTAGTGACGTTCCGGTGGCTCCGGAAATCGATCCACACTGCCGAGACTGACT\n+TGAGGTCGCGCAGTAGCCCCAACAACTGGGGAGATGGCGGACCCGAGATCAAGGCGCTTTCGCGCCACCT\n+GGGCTTGAGCGTACTAACCCCGGAGACGTGTGTACGGCCCCAGAACATGTATCGTGTGATGAATACCAGT\n+GTATTCCTTCAAAGGTCTAGAGACATACGGCCTGACTCGTTCAAACTAGAGAAGGATTTACCCGCCACAC\n+CTGGGGTCATGGTTGACAACAACAGGAGCCCATGTGTTTATCACTGAATTCTCGGGAATTCCTCGCGAGT\n+CGGAGCACTATCTCGTTCAAACCCGCTAGCCATGCCGTAGAGCGCGCGTGCCAAGGTGCGCGTAGGAGTG\n+CGCTGGCCCCGCATCCGCTAAATATATATCACTCAGCCCTCGCCGCAGCAGAATATCTAATCCCCGCTCG\n+ACTAGCCGAAACCGACACCGTTCAGGGCTGACGACCTGCCTTTTGCACAAAATAACTGAGCCCAGCGATC\n+GTATATTGGAGTCGGACTCAGACGATTCTACGGGTGCTTAAGGCTGACAGGGCGGCAACCATAGGCACGA\n+CTTCGTTGGCGCCGCCCAATAGCTACGTCGCGATGATTAAATCAGCCCTATTCGCCAGGGAGAGAGACAA\n+AATTGGCAATATTTCTACAGGTCTAGCTAACCGACCTAAAGGAGAACCAGGTCGTTGTAGGTCTGTGTAC\n+CACTGGCAGTCCGTCATCGCTGAGAAGGATGTCACGATAGGCTCCGCCTAGTAACTTTTCCCTGCTAGCT\n+ACACCGTTGCGGATATATAGTTTGAGCTCGGGGTCAATGGATTAGAACGTACAACATTGTGTCTGCTTAC\n+GATTGCATAGGTAATAGTCAGCCAGTATATGATCGTACAACACATCTGAAAACTAGTCAAGATACGTGCG\n+TTTGTCCATCTGTATTTACTCAATTCCTTAAACAGCTCGTCCAACGCGGGTCAAAACTGTGGCTTGGTAC\n+AAGTCGTACTTAAACCTCAACCACTACTTGTCAACTCGTTTACGTTTTCTACGTGGAAGTCCGAAAGTGT\n+CATGCGCCCTGCTTCGTATGCAAGCTCACTGATGATCACACCGATGCTTTACGTGCTCTATTGATTGTAA\n+AGTCGGCAATGCGTGTGCCTTAGCTGGTGAGATAATCGTTATAACCACGCTCCATAAGGAGGTTGGTTCC\n+TCTTCCCGATCTATCCCTGGCAGTTGTCGACACTAGCGCTAGTGGCTTCGCAGACTGATGGCAGTGTCTA\n+CCGCGGATTACAGTTAGCTAGCACGCCCCGTTTTTAAGGCTAGAAATGTTGCGATATTTCCGTGAAGCCG\n+AAAGTGAGGTAGGAAACCCCGGTCCTGTAGATCATTAATACTACGTAAGCGATTGTCAACGGAATGGGAA\n+GGCAGTTTATGAACGGCTGGACTGCTTAACCGTTTTCTTGGCACCTATGCCTAGTATGCACAATACGAGA\n+TGGGTTGCGGAGGGACGGAGAATGAGCCTAAGCGAGTTACCTCAGACCACCGGTTACGCTGACCCCTCGG\n+TTAAAGGCGATTCTCGATGCTTTCAGAACTACGGGTCTGTAATCAAAATCTCTATACGTTAGACGTCAGG\n+CGCCGTTCATTAGGAGTCCGCCATAGGATAAACAAGTCGAGTGTTGTGGTCGCTACTAGTTGCATATTTG\n+AACAATTTGCCGCGGCGTTATGTCCTTCAGTATATTGGCAAATGTGGCTACTGCCATACC\n'
b
diff -r 000000000000 -r e6ef29001647 test-data/simulated.bam
b
Binary file test-data/simulated.bam has changed