Next changeset 1:77fc7640abc7 (2024-08-11) |
Commit message:
planemo upload commit b89c8017aeef91f940543a1cc7dadb4a85290865 |
added:
basil.xml test-data/basil.vcf test-data/ref.fa test-data/simulated.bam |
b |
diff -r 000000000000 -r e6ef29001647 basil.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/basil.xml Thu May 30 21:14:58 2019 -0400 |
[ |
@@ -0,0 +1,61 @@ +<tool id="basil" name="basil" version="1.2.0"> + <description>Breakpoint detection, including large insertions</description> + <requirements> + <requirement type="package" version="1.2.0">anise_basil</requirement> + </requirements> + <version_command>basil --version 2>&1 | grep 'basil version' | cut -f 3 -d ' '</version_command> + <command detect_errors="aggressive"><![CDATA[ + ln -s '$ref' 'ref.fa' && + ln -s '$bam' 'in.bam' && + ln -s '$vcf' 'out.vcf' && + basil + --input-reference 'ref.fa' + --input-mapping 'in.bam' + --out-vcf 'out.vcf' + --oea-min-support-each-side '$min_oea_each_side' + ]]></command> + <inputs> + <param name="ref" argument="--input-reference" type="data" format="Fasta" label="Reference Sequence File" help="FASTA file with the reference."/> + <param name="bam" argument="--input-mapping" type="data" format="sam,bam" label="Alignment File" help="SAM/BAM file to use as the input."/> + <param name="min_oea_each_side" argument="--oea-min-support-each-side" type="integer" value="2" label="Minimum supporting reads, each side" help="Smallest number of OEA (one-end-anchor) reads on each side to support an insertion. In range [1..inf]. This is the minimum number of supporting reads (without mapped partners) on each side of an insertion breakpoint required to not be filtered." /> + </inputs> + <outputs> + <data name="vcf" format="vcf" /> + </outputs> + <tests> + <test> + <param name="ref" value="ref.fa"/> + <param name="bam" value="simulated.bam"/> + <param name="min_oea_each_side" value="2"/> + <output name="vcf" file="basil.vcf"/> + </test> + </tests> + <help><![CDATA[ + BASIL is a method to detect breakpoints for structural variants (including insertion breakpoints) from aligned paired HTS reads in BAM format. Use BASIL to analyze BAM files for tentative insertion sites. + + Note that BASIL will in general detect all kinds of breakpoints, e.g. for inversions on real-world data. + + BASIL VCF fields + + A typical line in BASIL might look as follows. + + 1 5001 site_0 T <INS> . PASS IMPRECISE;SVTYPE=INS GSCORE:CLEFT:CRIGHT:OEALEFT:OEARIGHT 46.4256:10:12:35:32 + + The first seven columns are as usually in VCF files (ref name, 1-based position, reference base, abbreviation for long insertion, no assigned quality, passing all filters, imprecise insertion SV). + + The eighth column contains the names of the score values given in the ninth column: + + GSCORE Geometric mean of the sum of "1 + $score" for all of the following scores. + CLEFT Number of clipping signatures supporting the site from the left side. + CRIGHT Number of clipping signatures supporting the site from the right side. + OEALEFT Number of OEA alignments supporting the site from the left. + OEARIGHT Number of OEA alignmetns supproting the site from the right. + + Generally, one should filter for a minimum support of OEA records on each side, e.g. a value of 10 makes sense for a 30x coverage and showed good results on simulated data. + + For a ranking, GSCORE is a suitable measure but we did not develop any statistical model for BASIL matches and it is a mean of pseudocounts only. It carries no statistically precise meaning. + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btv051</citation> + </citations> +</tool> |
b |
diff -r 000000000000 -r e6ef29001647 test-data/basil.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/basil.vcf Thu May 30 21:14:58 2019 -0400 |
b |
@@ -0,0 +1,15 @@ +##fileformat=VCFv4.1 +##source=BASIL +##reference=ref.fa +##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=OEA_ONLY,Number=0,Type=Flag,Description="Breakpoint support by OEA signature only"> +##ALT=<ID=INS,Description="Insertion of novel sequence"> +##FORMAT=<ID=GSCORE,Number=1,Type=String,Description="Sum of Geometric score means (see BASIL documentation)"> +##FORMAT=<ID=CLEFT,Number=1,Type=String,Description="Clipped alignments supporting call from left side."> +##FORMAT=<ID=CRIGHT,Number=1,Type=String,Description="Clipped alignments supporting call from right side."> +##FORMAT=<ID=OEALEFT,Number=1,Type=String,Description="One-end anchored alignments supporting call from left side."> +##FORMAT=<ID=OEARIGHT,Number=1,Type=String,Description="One-end anchored alignments supporting call from right side."> +##contig=<ID=1,length=10000> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT individual +1 5001 site_0 T <INS> . PASS IMPRECISE;SVTYPE=INS GSCORE:CLEFT:CRIGHT:OEALEFT:OEARIGHT 46.4256:10:12:35:32 |
b |
diff -r 000000000000 -r e6ef29001647 test-data/ref.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ref.fa Thu May 30 21:14:58 2019 -0400 |
b |
b'@@ -0,0 +1,144 @@\n+>1\n+CTGTAACTAAACACCGCTGTCCGGACTAAGGTCCGCATATTGTTCAGGTTTTATACAATGAGCGTCGAGC\n+CAGTTCAATGTGGCCCTAAACCTACTCAAGAAGTATCCCGAGGGATGCGAAGCTGCCGATGAGAGTTAGT\n+TGACATTCCGAGTCCGTGGATGGGTCCTCCTTCTTAGCGAATTACCGTCCATTTGCATGACGGATGCCTT\n+GCACTTGGTAACAGTAGCAATGTATAGGAAACAAAGAATCGAGTTCAACAGGTCGCATTCAGTGCCTTGC\n+GCACAAATATATAGGAGGAAACTCGCAACAGAATGCTATGACCAGTTTATAGAACTGTGCCGTAGACGGA\n+GTGCCAATCAGAAATAACCATAATGGGTTGCCAGGGTGTATGACAGGTTCTTGATAGTCCCAGAGCTTTC\n+CGTAACAAAGGAACTTTCACAACGATTGCCTCGTTATATTCCGCTGCACTGCATGACATTCATTGGAATG\n+GAAACAGCTGATTTCCTAGATAGCGCCGGGTATATTGTTACCGCCCCACACGCAGGCATCGGACCACATA\n+TTTTGGCACCTGGTTAGAGAGCTAGACTCGATCGAATAACTTTAGGGAAGCTCCATGTCATGTCCAACAC\n+CCGAAGAGTACAGGACATGTCAAAGGGAGGATTAAAGTTGACCCTATCGATGTATGTCAAGGGTGCTGAG\n+TTGCCCCAGGCTTGCGTATCCTACTGGCTTGATTGGGTACCTAGAGGTACCGTTGCGCATCGGCTCACAA\n+CGTTCAATGCAGTCCCAAGATACGGTGTGATGTCAGGACCTAAGCGACCCATGATCGCTGCTTCCTTGGA\n+AGAGAGAGCGACATTTCCGTCTCGCGCATTCTTTAGTAAGGGAGGAACTTTTTGTGAAAGATGGTGTTGG\n+GCTTCGAGACATTTATCGTGCCTTCTGGCTTACTCAGATTACGCTGAAGCCTTTGGGCCTGCCCATGACT\n+GTTGCAATCATTTGTCTGAAACACCAATGACTTAACGAATGCCCACAAGACTGAAGGTTATCGATCTTGG\n+AGAAAACTCCTGATTCTATAGGGACCATCTCAGGCCCGACAATCCTTAACCGGATCCTAACAGGGTTCAC\n+ATGTTCATGGGTCGTAGTAACCTGTGGCGTGACCGTGGCATAAAGACGTCCGTCCAATTCCTGAGAAAAG\n+ACACCGAAATGGTGAAAGAGTGGGGACCTCCGTAGACAACTTACATCGCCACGCAAGCTTATGTGTGCGT\n+CAGCTACTTCATTGACCACAAATTCGGCCGAAAGATGGCGCTATTGATTTATGGATTAAGGGGCACGCAG\n+TTGTACTTATTCGGCGTAGGTTCCAATCTAGTGGGATTCCTATGGGCCGAAGTCTGAGAAACCTTAGACA\n+AACGGTCACCCATGCGCCGCACCTCCGGTGCCTCCAGTAACATAGTTTGTCCTCAGGTTTGAAATGCTGG\n+GTTTGGTAAGTGAACCTTAATATCCGCTTGCGATTGTCCATGCGGACCGGATTTCAAGAAAGGAGATAGA\n+TTGATCCAGGAACCATCTATCGTACATCGGCCTCGCCCTGCACGCAGACCTGCAGTCATCTTATGATGCG\n+TCCCGTGCAAACCCTCCGTTATCTAGGAGCGGCAGCAGCGCACGAAGAGCGGGAAGATGATTATCTCCCG\n+AGCTCTTGTGCCGAAGGGCATTGTGGAACATTTGGAACGGGGCTCGGATCTCGCTCACTAAAATCTAAGG\n+ATCGAGGAGACAAAGTCATTCGGCAAGACAAACTCACGTCCGTGGTTGCAACTGATTAGACTTTATGGAT\n+GAACGCTATGCTGCTACCGAACCGACGACCCGTGGTCACCAGCTATCCGCTCAAGACATTATATCATTAA\n+TCCGAGACCCCTGCAGACACACGTCTCGGCTATTGTTGAGAGCCCTTCCTGCTCCATACCCCCGGCACGT\n+ATAATAAACCGAGTACTATGTCCCGGCGGATTCCGTCTTACCTATCTACGAGATCCAGGTGCCGTAGCAT\n+AAAGCTATGCGGGACGTTCTACTCGTTCTAACAGAACCTCAGCCATTGTAACCCACTTTTGTTGCATCCT\n+ATTGGCAGTGGGGCGACCGCTGAGATTGGGGTAGCTTCCTAGTAACTGTGCAGCGAGGTACGTCGCAGAG\n+GGATACACTTGTTACGGCGGGGACTTTGGCAGTGACCCTAGGTTGATGTCCTAAGACGATGTAAATTGAG\n+CTATTCGACGTAGTCGGTCCCTCTAAGTGAACTTTCTATCACCGCGGCCCGAGGGGGAATTATGTGATAG\n+CGCGTGGCTCCAACATCTATTCGGGGCAAAACCGACACGTGTCGTACGTTGCGTACGTTACCTTTTCTGG\n+TTACTTATTCGACCAAGTTATTGCTTGTATCACTTTCGTTGTAAACCTATGCCCTTTTAACACAGCTACG\n+CCCATGAGTGACGTCATGACATTAGACAAAACACAGTATTGCCATTCCCACCTTCTCACATGCCCAGCAA\n+AATGGGGAACGTGTCGACTTCCTGCTGTCACAACAGAACGGAGCGTATGTGAAGAAACTGTATCCCTCGG\n+TGCCGTTCAAGACCGGTACACTTTGACGGCCAACCGCAAGTGGCGCGGGGGCTGTTAGCTAATCAGGCAT\n+ATGCATCGGCCAGCGGCGGGGCGAGACGGGTGGGTGCGGAAATAGAATCCGATGAGGTTCGTTAATCGTC\n+TTGCTTTGTGTTACATACAATAAATCGTCCCCCCCATGGTAACACGAGTTTCCATTCACATTTGGCTCTT\n+GCCCGGCAGGTGTGCGATAAGTCTTATGACTTTTTTAGACATCCACCACTTGCACGGCGTACTCTTTATT\n+CTCCATAATGCCTGTGTGATTCGCAGCTACCAATAAAGGTGCTTACGCAAGGTATTACCCAAATATCAAC\n+CCGGCCCCGCCCGCCGGACCCCAATATGTTATTCTGCCCTGTAGCGACTCCGTGGTCGTCAAGAGTCTAT\n+CACATACTCTATGGTAACTAAGATAGTGTCCGTGTCCTCTGGATAACGAAGCGACCTAACATCAAGACTC\n+AGGTAAATGGTCTTCTACTATCTTGATGTCCTGTGAGCGAGATGGATACCATTCGCCTGATCAAACGCAG\n+TAGCGTATAGGGCTAGCGGGAATTTCTTGACGGCCGCAGAGTTTCTTGTGATCGGATGACTGCTTGATAC\n+TTGGCGACGAAGCCCTGTGGTATGGGGGCCTGACTTCCGCGGACATGGTCGTAATGCCTAGGATAGTTGC\n+TACGGTGGGAACCTATTTTTATGCCCGCCAGACTCGTGTGGGACTCTATCAGTTAAGCGCGACTTCTCGC\n+AGAGAAAGATTAAGATTATGGCTTAATTCTTCTGTACATTCCTTGGTGAGGCGGCTCTCTAGAGTTGCAT\n+TTGGGTGGCATCACTCTTCCAGGTAGTTCACCTCTTGTTGCCTGCCCCAATGCGGGGCGAACTTTCGCTG\n+CTGACCGCCGTTACACAGAACCTGAAGGGACAACACACGAGACCGGAATGTAGTCTCTCTTGTCCAGTTC\n+TGGGGGCATATAGATTTATGATGTTATTCGTGCCCTTATGTTACCCATCCGTGTCGCCGTGAGCGACATC\n+CGGCTAATGTATGGGCAATGGGAATCAGGTATGACTGCGCGAGCTATCGACGCGTAGAAGGCTCACTCTG\n+CAAAAATCACCAGCACTAACGCGTAACACATTGGCTGTCAGCGCACTCGTTGATCTTTGTGTCGGGTGAT\n+TCTAAAATAGTTAGTCCGTGACCTTTGGCGGCGGCGAGAGATATGGTCCTAACCGCCGTATTTGGACAGA\n+AATTAGTCTATGCGAT'..b'TCCCTTCGGTGACTTCGCGACTGTCATCATAAGCGCCCTTTTCATGCG\n+GTATACTCTGCCGTCGAACCCGATCATATAGCCAAATCGGACAATAACACAGTGGACATCGTCCATTACA\n+GATTAGTCGTACTGTGGGGGACTCCAATTATCGTAGATGACATCTTTCGGTTCTATTACGCCGCGACACC\n+AGGAATTTGGATTCTGATAACCCGGTGCTCACGTCGTGTCGCGAACAAATCTACGAGGAGCCAGGAGCGC\n+TGAAGCCTATCAATTGGGATTACCTTCTTGAACGCTATCCGACCTGCCCTCTGTACAGTCCCGTAGACAT\n+TCATGGATCCTGGATGGGAGCCACCACTGCAAGACAGCACATTCCAGAGCTTTCCCAAGCGGGATCTTGG\n+GTACGTTCTTTCGTCTCCTTGTTTGCGTCCGCAATAGTAGTCATACCGTAAGATTTTCTTCCCGCGGAGA\n+TGGCGGCTCTCCCAGCTAGAACATTATTATAATGGCAAGGGTGGCACACCAAAGGTGCGCACCCAAGGTT\n+CACATGTCCCTTTCACGCTACCCATGAGCGGTAGTTTATCGAAGTTTAGGTGTTTAGTGCTTGAATGAGT\n+CAATACGTTGTCAGAATTCACAATAAAGATCTTCTGTGCGGACTATACACTAGCCGGAGGAACCTAAATT\n+TTTCACGGGGTAAATCCCGAGGTTCGGACATATACCTGGCTTACCCAGCGCGGGCTGACTAGCCCACTTG\n+CGCGAGCGCTGACTCAGGTAAGACAAGAACGAAGTGCAGTATCTCCGGGCTTACTCGAGCGTACACCAGG\n+ACCCCGTAAATTTGCTTACATACAGAAGGTATGCCTTGCATGCCCTTGCTCAGCTAGACGACGATGAACG\n+TAGAGTAAGTGGACTTAGGGCCAACACTCGACGTTTTCGTCTAGACCAAAGGTTTAAAAGCTTTCCGAGT\n+GTCGACTCAAAACATGGGCTAATCTGATTCGTCAGTATCCAGGCCACACTTCAGCGGGTGGAGCACCTAC\n+GATAAACTAAGAAGGTAGTTAGCAAGTGCGTTAGCTTCTGGAAGTTAATATCGGCCTTAGATCGGAGACG\n+CGACTGCCCATAGTCTATCACTTCCCAAACAGAACACTTATAAGTTAAGCGTAATATGTACGAATTTCAA\n+CGTGGGATTGCCAGAGTAGAATGTGTGTAGATTCGCAACAGATAAAAGCTTTCGAAAGCAAAGCGACAGC\n+GCGAACGTGGGTCGGAATCCACATCCGTCCAGGGTTGTAATCACCTTGCGAATATCCCAGAATCGGTGCC\n+ACCGAGGCGACTCGACCACGGATCCGATTCTTCTCATACGGATCAGCAAATTGTAATAGCGGTTTCTTAC\n+AGAGAGGTGGGCACCCTAGCCAGGCTAGGGCGCATAGCGAACCCCTGCCACCTTAGAAAAGTGATGCCTG\n+GAGTTCGACAACGCGCGGCAGAAAGGATTGAGCGTATTTTCGTCTGGGTATTCACCCTATCGCCTATGTA\n+CCGAGAAGTGAGGGAGAGGCGGCGTTCGTCTGTACGCGGCTGGATGTACTGCGAGGTCGTTTGGCCGTTT\n+GCGAGACCACCCATTCGCCGAATGGGTAAAACAAGAACTGCCGTAGGTATATTATAAATCTCGGGATAGA\n+CGGGGGTAAGTTTATGTCTCCGGTTCTTTGTAGTCGTAACTCAGGGGCCCTCGACCCTAGCGAGGCGTTT\n+GTAGTGGGACACCAGGTATCTCCCGGAATAATTCTAGCATCATGAGAGGTGGAGTCGGGATTTGATCAAC\n+CTACATCATTTTTCAGGCAGATACATGGGACCGATCACACCTCACCTCCGTGTTGTCAAACTTAAGGACA\n+ACTTAAGGTTGGCTACGCGGCCGCGGATTGCGCTTGCGCGGGAGGGGCGGGGGATATCAGCCAAACGGAG\n+AGTGCTCCGTTGATGAGATTAAGCAGATGTATAAATCATTTGAATGTATGCAATTTGTCTAGATCACCCC\n+TACACATCCTTCTTGAGTAAGTCACAAATACGCCGCAGGTTTGGCGGTTTTTGTCCCCGGCACCTCACTA\n+GATTTTCATCCAACATACGTAGTGACGTTCCGGTGGCTCCGGAAATCGATCCACACTGCCGAGACTGACT\n+TGAGGTCGCGCAGTAGCCCCAACAACTGGGGAGATGGCGGACCCGAGATCAAGGCGCTTTCGCGCCACCT\n+GGGCTTGAGCGTACTAACCCCGGAGACGTGTGTACGGCCCCAGAACATGTATCGTGTGATGAATACCAGT\n+GTATTCCTTCAAAGGTCTAGAGACATACGGCCTGACTCGTTCAAACTAGAGAAGGATTTACCCGCCACAC\n+CTGGGGTCATGGTTGACAACAACAGGAGCCCATGTGTTTATCACTGAATTCTCGGGAATTCCTCGCGAGT\n+CGGAGCACTATCTCGTTCAAACCCGCTAGCCATGCCGTAGAGCGCGCGTGCCAAGGTGCGCGTAGGAGTG\n+CGCTGGCCCCGCATCCGCTAAATATATATCACTCAGCCCTCGCCGCAGCAGAATATCTAATCCCCGCTCG\n+ACTAGCCGAAACCGACACCGTTCAGGGCTGACGACCTGCCTTTTGCACAAAATAACTGAGCCCAGCGATC\n+GTATATTGGAGTCGGACTCAGACGATTCTACGGGTGCTTAAGGCTGACAGGGCGGCAACCATAGGCACGA\n+CTTCGTTGGCGCCGCCCAATAGCTACGTCGCGATGATTAAATCAGCCCTATTCGCCAGGGAGAGAGACAA\n+AATTGGCAATATTTCTACAGGTCTAGCTAACCGACCTAAAGGAGAACCAGGTCGTTGTAGGTCTGTGTAC\n+CACTGGCAGTCCGTCATCGCTGAGAAGGATGTCACGATAGGCTCCGCCTAGTAACTTTTCCCTGCTAGCT\n+ACACCGTTGCGGATATATAGTTTGAGCTCGGGGTCAATGGATTAGAACGTACAACATTGTGTCTGCTTAC\n+GATTGCATAGGTAATAGTCAGCCAGTATATGATCGTACAACACATCTGAAAACTAGTCAAGATACGTGCG\n+TTTGTCCATCTGTATTTACTCAATTCCTTAAACAGCTCGTCCAACGCGGGTCAAAACTGTGGCTTGGTAC\n+AAGTCGTACTTAAACCTCAACCACTACTTGTCAACTCGTTTACGTTTTCTACGTGGAAGTCCGAAAGTGT\n+CATGCGCCCTGCTTCGTATGCAAGCTCACTGATGATCACACCGATGCTTTACGTGCTCTATTGATTGTAA\n+AGTCGGCAATGCGTGTGCCTTAGCTGGTGAGATAATCGTTATAACCACGCTCCATAAGGAGGTTGGTTCC\n+TCTTCCCGATCTATCCCTGGCAGTTGTCGACACTAGCGCTAGTGGCTTCGCAGACTGATGGCAGTGTCTA\n+CCGCGGATTACAGTTAGCTAGCACGCCCCGTTTTTAAGGCTAGAAATGTTGCGATATTTCCGTGAAGCCG\n+AAAGTGAGGTAGGAAACCCCGGTCCTGTAGATCATTAATACTACGTAAGCGATTGTCAACGGAATGGGAA\n+GGCAGTTTATGAACGGCTGGACTGCTTAACCGTTTTCTTGGCACCTATGCCTAGTATGCACAATACGAGA\n+TGGGTTGCGGAGGGACGGAGAATGAGCCTAAGCGAGTTACCTCAGACCACCGGTTACGCTGACCCCTCGG\n+TTAAAGGCGATTCTCGATGCTTTCAGAACTACGGGTCTGTAATCAAAATCTCTATACGTTAGACGTCAGG\n+CGCCGTTCATTAGGAGTCCGCCATAGGATAAACAAGTCGAGTGTTGTGGTCGCTACTAGTTGCATATTTG\n+AACAATTTGCCGCGGCGTTATGTCCTTCAGTATATTGGCAAATGTGGCTACTGCCATACC\n' |
b |
diff -r 000000000000 -r e6ef29001647 test-data/simulated.bam |
b |
Binary file test-data/simulated.bam has changed |