# HG changeset patch # User iuc # Date 1559265298 14400 # Node ID e6ef29001647809082b4ec32b8606a3a29163171 planemo upload commit b89c8017aeef91f940543a1cc7dadb4a85290865 diff -r 000000000000 -r e6ef29001647 basil.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/basil.xml Thu May 30 21:14:58 2019 -0400 @@ -0,0 +1,61 @@ + + Breakpoint detection, including large insertions + + anise_basil + + basil --version 2>&1 | grep 'basil version' | cut -f 3 -d ' ' + + + + + + + + + + + + + + + + + + . PASS IMPRECISE;SVTYPE=INS GSCORE:CLEFT:CRIGHT:OEALEFT:OEARIGHT 46.4256:10:12:35:32 + + The first seven columns are as usually in VCF files (ref name, 1-based position, reference base, abbreviation for long insertion, no assigned quality, passing all filters, imprecise insertion SV). + + The eighth column contains the names of the score values given in the ninth column: + + GSCORE Geometric mean of the sum of "1 + $score" for all of the following scores. + CLEFT Number of clipping signatures supporting the site from the left side. + CRIGHT Number of clipping signatures supporting the site from the right side. + OEALEFT Number of OEA alignments supporting the site from the left. + OEARIGHT Number of OEA alignmetns supproting the site from the right. + + Generally, one should filter for a minimum support of OEA records on each side, e.g. a value of 10 makes sense for a 30x coverage and showed good results on simulated data. + + For a ranking, GSCORE is a suitable measure but we did not develop any statistical model for BASIL matches and it is a mean of pseudocounts only. It carries no statistically precise meaning. + ]]> + + 10.1093/bioinformatics/btv051 + + diff -r 000000000000 -r e6ef29001647 test-data/basil.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/basil.vcf Thu May 30 21:14:58 2019 -0400 @@ -0,0 +1,15 @@ +##fileformat=VCFv4.1 +##source=BASIL +##reference=ref.fa +##INFO= +##INFO= +##INFO= +##ALT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT individual +1 5001 site_0 T . PASS IMPRECISE;SVTYPE=INS GSCORE:CLEFT:CRIGHT:OEALEFT:OEARIGHT 46.4256:10:12:35:32 diff -r 000000000000 -r e6ef29001647 test-data/ref.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ref.fa Thu May 30 21:14:58 2019 -0400 @@ -0,0 +1,144 @@ +>1 +CTGTAACTAAACACCGCTGTCCGGACTAAGGTCCGCATATTGTTCAGGTTTTATACAATGAGCGTCGAGC +CAGTTCAATGTGGCCCTAAACCTACTCAAGAAGTATCCCGAGGGATGCGAAGCTGCCGATGAGAGTTAGT +TGACATTCCGAGTCCGTGGATGGGTCCTCCTTCTTAGCGAATTACCGTCCATTTGCATGACGGATGCCTT +GCACTTGGTAACAGTAGCAATGTATAGGAAACAAAGAATCGAGTTCAACAGGTCGCATTCAGTGCCTTGC +GCACAAATATATAGGAGGAAACTCGCAACAGAATGCTATGACCAGTTTATAGAACTGTGCCGTAGACGGA +GTGCCAATCAGAAATAACCATAATGGGTTGCCAGGGTGTATGACAGGTTCTTGATAGTCCCAGAGCTTTC +CGTAACAAAGGAACTTTCACAACGATTGCCTCGTTATATTCCGCTGCACTGCATGACATTCATTGGAATG +GAAACAGCTGATTTCCTAGATAGCGCCGGGTATATTGTTACCGCCCCACACGCAGGCATCGGACCACATA +TTTTGGCACCTGGTTAGAGAGCTAGACTCGATCGAATAACTTTAGGGAAGCTCCATGTCATGTCCAACAC +CCGAAGAGTACAGGACATGTCAAAGGGAGGATTAAAGTTGACCCTATCGATGTATGTCAAGGGTGCTGAG +TTGCCCCAGGCTTGCGTATCCTACTGGCTTGATTGGGTACCTAGAGGTACCGTTGCGCATCGGCTCACAA +CGTTCAATGCAGTCCCAAGATACGGTGTGATGTCAGGACCTAAGCGACCCATGATCGCTGCTTCCTTGGA +AGAGAGAGCGACATTTCCGTCTCGCGCATTCTTTAGTAAGGGAGGAACTTTTTGTGAAAGATGGTGTTGG +GCTTCGAGACATTTATCGTGCCTTCTGGCTTACTCAGATTACGCTGAAGCCTTTGGGCCTGCCCATGACT +GTTGCAATCATTTGTCTGAAACACCAATGACTTAACGAATGCCCACAAGACTGAAGGTTATCGATCTTGG +AGAAAACTCCTGATTCTATAGGGACCATCTCAGGCCCGACAATCCTTAACCGGATCCTAACAGGGTTCAC +ATGTTCATGGGTCGTAGTAACCTGTGGCGTGACCGTGGCATAAAGACGTCCGTCCAATTCCTGAGAAAAG +ACACCGAAATGGTGAAAGAGTGGGGACCTCCGTAGACAACTTACATCGCCACGCAAGCTTATGTGTGCGT +CAGCTACTTCATTGACCACAAATTCGGCCGAAAGATGGCGCTATTGATTTATGGATTAAGGGGCACGCAG +TTGTACTTATTCGGCGTAGGTTCCAATCTAGTGGGATTCCTATGGGCCGAAGTCTGAGAAACCTTAGACA +AACGGTCACCCATGCGCCGCACCTCCGGTGCCTCCAGTAACATAGTTTGTCCTCAGGTTTGAAATGCTGG +GTTTGGTAAGTGAACCTTAATATCCGCTTGCGATTGTCCATGCGGACCGGATTTCAAGAAAGGAGATAGA +TTGATCCAGGAACCATCTATCGTACATCGGCCTCGCCCTGCACGCAGACCTGCAGTCATCTTATGATGCG +TCCCGTGCAAACCCTCCGTTATCTAGGAGCGGCAGCAGCGCACGAAGAGCGGGAAGATGATTATCTCCCG +AGCTCTTGTGCCGAAGGGCATTGTGGAACATTTGGAACGGGGCTCGGATCTCGCTCACTAAAATCTAAGG +ATCGAGGAGACAAAGTCATTCGGCAAGACAAACTCACGTCCGTGGTTGCAACTGATTAGACTTTATGGAT +GAACGCTATGCTGCTACCGAACCGACGACCCGTGGTCACCAGCTATCCGCTCAAGACATTATATCATTAA +TCCGAGACCCCTGCAGACACACGTCTCGGCTATTGTTGAGAGCCCTTCCTGCTCCATACCCCCGGCACGT +ATAATAAACCGAGTACTATGTCCCGGCGGATTCCGTCTTACCTATCTACGAGATCCAGGTGCCGTAGCAT +AAAGCTATGCGGGACGTTCTACTCGTTCTAACAGAACCTCAGCCATTGTAACCCACTTTTGTTGCATCCT +ATTGGCAGTGGGGCGACCGCTGAGATTGGGGTAGCTTCCTAGTAACTGTGCAGCGAGGTACGTCGCAGAG +GGATACACTTGTTACGGCGGGGACTTTGGCAGTGACCCTAGGTTGATGTCCTAAGACGATGTAAATTGAG +CTATTCGACGTAGTCGGTCCCTCTAAGTGAACTTTCTATCACCGCGGCCCGAGGGGGAATTATGTGATAG +CGCGTGGCTCCAACATCTATTCGGGGCAAAACCGACACGTGTCGTACGTTGCGTACGTTACCTTTTCTGG +TTACTTATTCGACCAAGTTATTGCTTGTATCACTTTCGTTGTAAACCTATGCCCTTTTAACACAGCTACG +CCCATGAGTGACGTCATGACATTAGACAAAACACAGTATTGCCATTCCCACCTTCTCACATGCCCAGCAA +AATGGGGAACGTGTCGACTTCCTGCTGTCACAACAGAACGGAGCGTATGTGAAGAAACTGTATCCCTCGG +TGCCGTTCAAGACCGGTACACTTTGACGGCCAACCGCAAGTGGCGCGGGGGCTGTTAGCTAATCAGGCAT +ATGCATCGGCCAGCGGCGGGGCGAGACGGGTGGGTGCGGAAATAGAATCCGATGAGGTTCGTTAATCGTC +TTGCTTTGTGTTACATACAATAAATCGTCCCCCCCATGGTAACACGAGTTTCCATTCACATTTGGCTCTT +GCCCGGCAGGTGTGCGATAAGTCTTATGACTTTTTTAGACATCCACCACTTGCACGGCGTACTCTTTATT +CTCCATAATGCCTGTGTGATTCGCAGCTACCAATAAAGGTGCTTACGCAAGGTATTACCCAAATATCAAC +CCGGCCCCGCCCGCCGGACCCCAATATGTTATTCTGCCCTGTAGCGACTCCGTGGTCGTCAAGAGTCTAT +CACATACTCTATGGTAACTAAGATAGTGTCCGTGTCCTCTGGATAACGAAGCGACCTAACATCAAGACTC +AGGTAAATGGTCTTCTACTATCTTGATGTCCTGTGAGCGAGATGGATACCATTCGCCTGATCAAACGCAG +TAGCGTATAGGGCTAGCGGGAATTTCTTGACGGCCGCAGAGTTTCTTGTGATCGGATGACTGCTTGATAC +TTGGCGACGAAGCCCTGTGGTATGGGGGCCTGACTTCCGCGGACATGGTCGTAATGCCTAGGATAGTTGC +TACGGTGGGAACCTATTTTTATGCCCGCCAGACTCGTGTGGGACTCTATCAGTTAAGCGCGACTTCTCGC +AGAGAAAGATTAAGATTATGGCTTAATTCTTCTGTACATTCCTTGGTGAGGCGGCTCTCTAGAGTTGCAT +TTGGGTGGCATCACTCTTCCAGGTAGTTCACCTCTTGTTGCCTGCCCCAATGCGGGGCGAACTTTCGCTG +CTGACCGCCGTTACACAGAACCTGAAGGGACAACACACGAGACCGGAATGTAGTCTCTCTTGTCCAGTTC +TGGGGGCATATAGATTTATGATGTTATTCGTGCCCTTATGTTACCCATCCGTGTCGCCGTGAGCGACATC +CGGCTAATGTATGGGCAATGGGAATCAGGTATGACTGCGCGAGCTATCGACGCGTAGAAGGCTCACTCTG +CAAAAATCACCAGCACTAACGCGTAACACATTGGCTGTCAGCGCACTCGTTGATCTTTGTGTCGGGTGAT +TCTAAAATAGTTAGTCCGTGACCTTTGGCGGCGGCGAGAGATATGGTCCTAACCGCCGTATTTGGACAGA +AATTAGTCTATGCGATCTTCCCTCCAGATGGTAACGCTGCCAAGGGGCTGACTCTGGCATCCCAGGCGCG +TATGTCTAGGTGGCGGCTGTAGATATGAAACGATATTAGTAGGTTTTTCACATGAGCAACTACTGGGTTA +CGACTCAGTCGGTGGCCGCGGATTATGTCATTTGCGAAATAGCAGATCACTTGCGTTTCCGAGAGCCCAC +CATCGGTCGTAGGACAAATACCCAGTTAACAGCTATCGTTTCGAATATGGATGGGTAAATAGAATGACAT +ATGTAACGAGCCCCCCCCAGTAAACCCGCAGCGGAACGCTGCACTTGAGGCGAACGTGACCGTCGTCCCT +CGGGGTTTGAACTATTCTCGTGAAGTCTTACTGTAACGTTCTAGGCTACTCGTATCGCGTTATAATCCGA +GTACCACCCACGGGCAGAGTCCACAACGAAGAGCGACGGGAGATGTCAGGATAGCCGCAGATCCGTTCAA +GCCAGTACGGATAACGAGGCGATACCTTGTCAGGGGCCCGATCGACCGCTGAAGGGCAGAGTCGGAACTT +ACTAACCATGGGGTCCTCTTCGGGAACACTTTACGTGTGCAAATCCAAGCTGGTGCGACCCCGTTCAACG +GCCCAGAGAACCGCTCTACCTGCGCACACGATCAACTAGCTCGTCTTGGGCTGTGGCGAGCCTGTCGTAT +TCGCTATATAGTTAGTGGTATATCCTACTGGGGTTAAGTGATGGGAGTCTCCGCTAGAGGCGTGGGCGGC +GTTACTACAACTTCCTGCGACCAGGACCCTCGGGAGGTATTTGACTATGGTGCTTACATGGTGTTCCTTA +GTTAGATAGGATGACTTCAGCTAACTGGGCTTCGCCTAGGGTCTCGGGAGAAATCTAGGGACCCCAATCT +ATTAGACGAACACGTCCAGGGCATGGTCAGGTATACACCTTCCGACTAGACGTGTTCGAAGATTCGGGAA +AATTACCTGAAGAGCCCCCGTAAGCCGTAGTAGAAGAGGACACTTCATTTAAACAATACCGAAAAAGTGT +CTTGGCAGACCGTATCTTCACAGGGCCGAAGCACTTTTGGCAGGCTTATAAACGCCCAGAATGAAGCACT +CGCCATAGGTGGAAACCTTTAAGCGACGCGTTGGGTCCGCGCAGCGCCAACGATTTCAACCGGGAGACGT +TCGTTCATGATGAGAAGACGGCATATGATCTGTGACATACGCTAGGTGAATCATAGCTGCTTAACAAACT +CTGGCGTGTCTGAGGCATTCAGACCCCGCGACAACCTTATGTCTACAAAACGTTGCCCCCTGTACGAAAA +GTCCCTAATTGAGACTGCAACCCCCTAGGCTTTCCAGTCAACAGGCGTAGCGGTAATAGTGGGTTCGATG +CAGCTCGGCGGGCTCGGGATCATACCATCGTGTATAAACGTCAGCGTTACGACCATTTAAGTGCGCGGAA +CGGTCGGGTTAGTGCATTGCTTGCGACTAACTATATCTAGTGATACGCTGCAACTAGCCACAGTCCTAGT +TACTTTCGGAGATCCTCGTGGGTCTGAGAGGAGGGCTACCCGCACAGCGAATCCTTCGACTAGTAATTCA +TACTAGAACCTGACCGTAGTAAACGTTCTGTGGTTCACCCGCTTGGACTATGAAAGCCGGTGGTATCGAG +TTTTGGCTACCCAGTCGAATAGCCCACTCGGAACCAGTTGTAAGATAATAATTGCTATGTGTATCGTGAC +ACGTTGAGGCGCATACTTAACTGACAAGGTCCAATCATGGTCTTATACACTGTTGCAAATGACACCACAT +CCGAGCTAAATCTAGGCATCTCAATCAGGCGGTTCGAGGGATCATGTAAATGTTTAACCTTTGCGGGCAC +AAGCCCCCGTGTACTCCGGATGGCCAGGGACAGCTTCCACGAAAACTTCTTACACTCCATGTTGAGATGA +GTGGCCAACTAAATGCTTCGCTGATGTGGACAGCATCATATTAGCAATCAAGGTGCATACTTCCGGTCTT +CAAAGGCCAGTACTGGTATAGACGATAACCGTGGACATCTGCTAGACCGCCAGATTGTGGCATCTTTTGA +AACTCACCCGGAAGATAACGCGCATCTCCCTCAGGTCGTTTTAGCGACGCTAATATTATCGTGGCATCGT +GAAATCTGGGGTAAGGAAGGAGTCCGATGGCCTTACTCTCCGTTTTCACTCTTGCTTTCATCTTAGGAGC +CTATCTTTATATGAGGCCCGGCTCCCTTCGGTGACTTCGCGACTGTCATCATAAGCGCCCTTTTCATGCG +GTATACTCTGCCGTCGAACCCGATCATATAGCCAAATCGGACAATAACACAGTGGACATCGTCCATTACA +GATTAGTCGTACTGTGGGGGACTCCAATTATCGTAGATGACATCTTTCGGTTCTATTACGCCGCGACACC +AGGAATTTGGATTCTGATAACCCGGTGCTCACGTCGTGTCGCGAACAAATCTACGAGGAGCCAGGAGCGC +TGAAGCCTATCAATTGGGATTACCTTCTTGAACGCTATCCGACCTGCCCTCTGTACAGTCCCGTAGACAT +TCATGGATCCTGGATGGGAGCCACCACTGCAAGACAGCACATTCCAGAGCTTTCCCAAGCGGGATCTTGG +GTACGTTCTTTCGTCTCCTTGTTTGCGTCCGCAATAGTAGTCATACCGTAAGATTTTCTTCCCGCGGAGA +TGGCGGCTCTCCCAGCTAGAACATTATTATAATGGCAAGGGTGGCACACCAAAGGTGCGCACCCAAGGTT +CACATGTCCCTTTCACGCTACCCATGAGCGGTAGTTTATCGAAGTTTAGGTGTTTAGTGCTTGAATGAGT +CAATACGTTGTCAGAATTCACAATAAAGATCTTCTGTGCGGACTATACACTAGCCGGAGGAACCTAAATT +TTTCACGGGGTAAATCCCGAGGTTCGGACATATACCTGGCTTACCCAGCGCGGGCTGACTAGCCCACTTG +CGCGAGCGCTGACTCAGGTAAGACAAGAACGAAGTGCAGTATCTCCGGGCTTACTCGAGCGTACACCAGG +ACCCCGTAAATTTGCTTACATACAGAAGGTATGCCTTGCATGCCCTTGCTCAGCTAGACGACGATGAACG +TAGAGTAAGTGGACTTAGGGCCAACACTCGACGTTTTCGTCTAGACCAAAGGTTTAAAAGCTTTCCGAGT +GTCGACTCAAAACATGGGCTAATCTGATTCGTCAGTATCCAGGCCACACTTCAGCGGGTGGAGCACCTAC +GATAAACTAAGAAGGTAGTTAGCAAGTGCGTTAGCTTCTGGAAGTTAATATCGGCCTTAGATCGGAGACG +CGACTGCCCATAGTCTATCACTTCCCAAACAGAACACTTATAAGTTAAGCGTAATATGTACGAATTTCAA +CGTGGGATTGCCAGAGTAGAATGTGTGTAGATTCGCAACAGATAAAAGCTTTCGAAAGCAAAGCGACAGC +GCGAACGTGGGTCGGAATCCACATCCGTCCAGGGTTGTAATCACCTTGCGAATATCCCAGAATCGGTGCC +ACCGAGGCGACTCGACCACGGATCCGATTCTTCTCATACGGATCAGCAAATTGTAATAGCGGTTTCTTAC +AGAGAGGTGGGCACCCTAGCCAGGCTAGGGCGCATAGCGAACCCCTGCCACCTTAGAAAAGTGATGCCTG +GAGTTCGACAACGCGCGGCAGAAAGGATTGAGCGTATTTTCGTCTGGGTATTCACCCTATCGCCTATGTA +CCGAGAAGTGAGGGAGAGGCGGCGTTCGTCTGTACGCGGCTGGATGTACTGCGAGGTCGTTTGGCCGTTT +GCGAGACCACCCATTCGCCGAATGGGTAAAACAAGAACTGCCGTAGGTATATTATAAATCTCGGGATAGA +CGGGGGTAAGTTTATGTCTCCGGTTCTTTGTAGTCGTAACTCAGGGGCCCTCGACCCTAGCGAGGCGTTT +GTAGTGGGACACCAGGTATCTCCCGGAATAATTCTAGCATCATGAGAGGTGGAGTCGGGATTTGATCAAC +CTACATCATTTTTCAGGCAGATACATGGGACCGATCACACCTCACCTCCGTGTTGTCAAACTTAAGGACA +ACTTAAGGTTGGCTACGCGGCCGCGGATTGCGCTTGCGCGGGAGGGGCGGGGGATATCAGCCAAACGGAG +AGTGCTCCGTTGATGAGATTAAGCAGATGTATAAATCATTTGAATGTATGCAATTTGTCTAGATCACCCC +TACACATCCTTCTTGAGTAAGTCACAAATACGCCGCAGGTTTGGCGGTTTTTGTCCCCGGCACCTCACTA +GATTTTCATCCAACATACGTAGTGACGTTCCGGTGGCTCCGGAAATCGATCCACACTGCCGAGACTGACT +TGAGGTCGCGCAGTAGCCCCAACAACTGGGGAGATGGCGGACCCGAGATCAAGGCGCTTTCGCGCCACCT +GGGCTTGAGCGTACTAACCCCGGAGACGTGTGTACGGCCCCAGAACATGTATCGTGTGATGAATACCAGT +GTATTCCTTCAAAGGTCTAGAGACATACGGCCTGACTCGTTCAAACTAGAGAAGGATTTACCCGCCACAC +CTGGGGTCATGGTTGACAACAACAGGAGCCCATGTGTTTATCACTGAATTCTCGGGAATTCCTCGCGAGT +CGGAGCACTATCTCGTTCAAACCCGCTAGCCATGCCGTAGAGCGCGCGTGCCAAGGTGCGCGTAGGAGTG +CGCTGGCCCCGCATCCGCTAAATATATATCACTCAGCCCTCGCCGCAGCAGAATATCTAATCCCCGCTCG +ACTAGCCGAAACCGACACCGTTCAGGGCTGACGACCTGCCTTTTGCACAAAATAACTGAGCCCAGCGATC +GTATATTGGAGTCGGACTCAGACGATTCTACGGGTGCTTAAGGCTGACAGGGCGGCAACCATAGGCACGA +CTTCGTTGGCGCCGCCCAATAGCTACGTCGCGATGATTAAATCAGCCCTATTCGCCAGGGAGAGAGACAA +AATTGGCAATATTTCTACAGGTCTAGCTAACCGACCTAAAGGAGAACCAGGTCGTTGTAGGTCTGTGTAC +CACTGGCAGTCCGTCATCGCTGAGAAGGATGTCACGATAGGCTCCGCCTAGTAACTTTTCCCTGCTAGCT +ACACCGTTGCGGATATATAGTTTGAGCTCGGGGTCAATGGATTAGAACGTACAACATTGTGTCTGCTTAC +GATTGCATAGGTAATAGTCAGCCAGTATATGATCGTACAACACATCTGAAAACTAGTCAAGATACGTGCG +TTTGTCCATCTGTATTTACTCAATTCCTTAAACAGCTCGTCCAACGCGGGTCAAAACTGTGGCTTGGTAC +AAGTCGTACTTAAACCTCAACCACTACTTGTCAACTCGTTTACGTTTTCTACGTGGAAGTCCGAAAGTGT +CATGCGCCCTGCTTCGTATGCAAGCTCACTGATGATCACACCGATGCTTTACGTGCTCTATTGATTGTAA +AGTCGGCAATGCGTGTGCCTTAGCTGGTGAGATAATCGTTATAACCACGCTCCATAAGGAGGTTGGTTCC +TCTTCCCGATCTATCCCTGGCAGTTGTCGACACTAGCGCTAGTGGCTTCGCAGACTGATGGCAGTGTCTA +CCGCGGATTACAGTTAGCTAGCACGCCCCGTTTTTAAGGCTAGAAATGTTGCGATATTTCCGTGAAGCCG +AAAGTGAGGTAGGAAACCCCGGTCCTGTAGATCATTAATACTACGTAAGCGATTGTCAACGGAATGGGAA +GGCAGTTTATGAACGGCTGGACTGCTTAACCGTTTTCTTGGCACCTATGCCTAGTATGCACAATACGAGA +TGGGTTGCGGAGGGACGGAGAATGAGCCTAAGCGAGTTACCTCAGACCACCGGTTACGCTGACCCCTCGG +TTAAAGGCGATTCTCGATGCTTTCAGAACTACGGGTCTGTAATCAAAATCTCTATACGTTAGACGTCAGG +CGCCGTTCATTAGGAGTCCGCCATAGGATAAACAAGTCGAGTGTTGTGGTCGCTACTAGTTGCATATTTG +AACAATTTGCCGCGGCGTTATGTCCTTCAGTATATTGGCAAATGTGGCTACTGCCATACC diff -r 000000000000 -r e6ef29001647 test-data/simulated.bam Binary file test-data/simulated.bam has changed