Previous changeset 8:b79bb8b09822 (2019-03-28) Next changeset 10:a57606054bd7 (2020-01-18) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/varscan commit fcf5ac14629c694f0f64773fab0428b1e78fe156" |
modified:
macros.xml varscan.py varscan_somatic.xml |
b |
diff -r b79bb8b09822 -r 4e97191a1ff7 macros.xml --- a/macros.xml Thu Mar 28 18:19:00 2019 -0400 +++ b/macros.xml Fri Aug 16 15:49:54 2019 -0400 |
b |
@@ -56,9 +56,9 @@ <param argument="--min-coverage" name="min_coverage" type="integer" value="8" min="1" max="200" label="Minimum coverage" help="@HELP@"/> </xml> - <xml name="min_reads2"> + <xml name="min_reads2" token_help="Minimum number (default: 2) of variant-supporting reads at a position required to make a call"> <param argument="--min-reads2" name="min_reads2" type="integer" value="2" min="1" max="200" - label="Minimum supporting reads" help="Minimum number of variant-supporting reads at a position required to make a call"/> + label="Minimum supporting reads" help="@HELP@"/> </xml> <xml name="min_avg_qual"> <param argument="--min-avg-qual" name="min_avg_qual" type="integer" value="15" min="1" max="50" @@ -68,12 +68,12 @@ <xml name="min_var_freq" token_value="0.01"> <param argument="--min-var-freq" name="min_var_freq" type="float" value="@VALUE@" min="0" max="1" label="Minimum variant allele frequency" - help="Minimum variant allele frequency required for calling a variant"/> + help="Minimum variant allele frequency (default: @VALUE@) required for calling a variant"/> </xml> <xml name="min_freq_for_hom"> <param argument="--min-freq-for-hom" name="min_freq_for_hom" type="float" value="0.75" min="0" max="1" label="Minimum homozygous variant allele frequency" - help="Minimum variant allele frequency required for calling a homozygous genotype" /> + help="Minimum variant allele frequency (default: 0.75) required for calling a homozygous genotype" /> </xml> <xml name="p_value" token_value="0.01" token_label="p-value threshold for calling variants" |
b |
diff -r b79bb8b09822 -r 4e97191a1ff7 varscan.py --- a/varscan.py Thu Mar 28 18:19:00 2019 -0400 +++ b/varscan.py Fri Aug 16 15:49:54 2019 -0400 |
[ |
b'@@ -8,6 +8,7 @@\n import sys\n import tempfile\n import time\n+from collections import namedtuple\n from contextlib import ExitStack\n from functools import partial\n from threading import Thread\n@@ -15,6 +16,332 @@\n import pysam\n \n \n+AlleleStats = namedtuple(\n+ "AlleleStats",\n+ [\n+ \'reads_total\',\n+ \'reads_fw\',\n+ \'reads_rv\',\n+ \'avg_mapqual\',\n+ \'avg_basequal\',\n+ \'avg_dist_from_center\',\n+ \'avg_mismatch_fraction\',\n+ \'avg_mismatch_qualsum\',\n+ \'avg_clipped_len\',\n+ \'avg_dist_from_3prime\'\n+ ]\n+)\n+\n+\n+def _get_allele_specific_pileup_column_stats(pileups, ref_fetch,\n+ ignore_md, ignore_nm,\n+ mm_runs, detect_q2_runs):\n+ var_reads_plus = var_reads_minus = 0\n+ sum_mapping_qualities = 0\n+ sum_base_qualities = 0\n+ sum_dist_from_center = 0\n+ sum_dist_from_3prime = 0\n+ sum_clipped_length = 0\n+ sum_unclipped_length = 0\n+ sum_mismatch_fractions = 0\n+ sum_mismatch_qualities = 0\n+\n+ for p in pileups:\n+ if p.alignment.is_reverse:\n+ var_reads_minus += 1\n+ else:\n+ var_reads_plus += 1\n+ sum_mapping_qualities += p.alignment.mapping_quality\n+ sum_base_qualities += p.alignment.query_qualities[p.query_position]\n+ sum_clipped_length += p.alignment.query_alignment_length\n+ unclipped_length = p.alignment.query_length\n+ sum_unclipped_length += unclipped_length\n+ # The following calculations are all in 1-based coordinates\n+ # with respect to the physical 5\'-end of the read sequence.\n+ if p.alignment.is_reverse:\n+ read_base_pos = unclipped_length - p.query_position\n+ read_first_aln_pos = (\n+ unclipped_length - p.alignment.query_alignment_end + 1\n+ )\n+ read_last_aln_pos = (\n+ unclipped_length - p.alignment.query_alignment_start\n+ )\n+ qualities_3to5prime = p.alignment.query_alignment_qualities\n+ else:\n+ read_base_pos = p.query_position + 1\n+ read_first_aln_pos = p.alignment.query_alignment_start + 1\n+ read_last_aln_pos = p.alignment.query_alignment_end\n+ qualities_3to5prime = reversed(\n+ p.alignment.query_alignment_qualities\n+ )\n+\n+ read_last_effective_aln_pos = read_last_aln_pos\n+ if detect_q2_runs:\n+ # Note: the original bam-readcount algorithm always takes\n+ # terminal q2 runs into account when determining the\n+ # effective 3\'-ends of reads.\n+ # However, q2 runs have no special meaning since Illumina\n+ # pipeline version 1.8 so detecting them is optional\n+ # in this code.\n+ for qual in qualities_3to5prime:\n+ if qual != 2:\n+ break\n+ read_last_effective_aln_pos -= 1\n+\n+ # Note: the original bam-readcount algorithm defines the\n+ # read center as:\n+ # read_center = p.alignment.query_alignment_length / 2\n+ # This is less accurate than the implementation here.\n+ read_center = (read_first_aln_pos + read_last_aln_pos) / 2\n+ sum_dist_from_center += 1 - abs(\n+ read_base_pos - read_center\n+ ) / (read_center - 1)\n+ # To calculate the distance of base positions from the 3\'-ends\n+ # of reads bam-readcount uses the formula:\n+ # sum_dist_from_3prime += abs(\n+ # read_last_effective_aln_pos - read_base_pos\n+ # ) / (unclipped_length - 1)\n+ # , which treats base positions on both sides of the effective\n+ # 3\'-end equally. Since this seems hard to justify, we cap\n+ # the distance calculation at 0 for base positions past the\n+ # effective 3\'-end (which, in turn, should only be possible\n+ # with detection of q2 runs).\n+ if read_last_effective_aln_pos > read_b'..b' / ref_stats.avg_clipped_len\n ) > max_relative_len_diff:\n record.filter.add(\'ReadLenDiff\')\n else:\n@@ -994,6 +1233,8 @@\n instance_args = {\n k: args.pop(k) for k in [\n \'max_depth\',\n+ \'count_orphans\',\n+ \'detect_overlaps\',\n \'min_mapqual\',\n \'min_basequal\',\n \'threads\',\n@@ -1046,7 +1287,7 @@\n )\n p.add_argument(\n \'-s\', \'--split-output\',\n- dest=\'split_output\', action=\'store_true\', default=False,\n+ dest=\'split_output\', action=\'store_true\',\n help=\'indicate that separate output files for SNPs and indels \'\n \'should be generated (original VarScan behavior). If specified, \'\n \'%%T in the --ofile file name will be replaced with "snp" and \'\n@@ -1090,6 +1331,20 @@\n \'default: 8000)\'\n )\n call_group.add_argument(\n+ \'--count-orphans\',\n+ dest=\'count_orphans\', action=\'store_true\',\n+ help=\'Use anomalous read pairs in variant calling \'\n+ \'(samtools mpileup -A option; default: ignore anomalous pairs)\'\n+ )\n+ call_group.add_argument(\n+ \'--no-detect-overlaps\',\n+ dest=\'detect_overlaps\', action=\'store_false\',\n+ help=\'Disable automatic read-pair overlap detection by samtools \'\n+ \'mpileup. Overlap detection tries to avoid counting duplicated \'\n+ \'bases twice during variant calling. \'\n+ \'(samtools mpileup -x option; default: use overlap detection)\'\n+ )\n+ call_group.add_argument(\n \'--min-basequal\',\n dest=\'min_basequal\', type=int,\n default=13,\n@@ -1160,7 +1415,15 @@\n dest=\'min_var_count2_lc\', type=int,\n default=2,\n help=\'Minimum number of variant-supporting reads when depth below \'\n- \'--somatic-p-depth (default: 2)\'\n+ \'--min-var-count2-depth (default: 2)\'\n+ )\n+ filter_group.add_argument(\n+ \'--min-var-count2-depth\',\n+ dest=\'min_var_count2_depth\', type=int,\n+ default=10,\n+ help=\'Combined depth of ref- and variant-supporting reads required to \'\n+ \'apply the --min-var-count filter instead of --min-var-count-lc \'\n+ \'(default: 10)\'\n )\n filter_group.add_argument(\n \'--min-var-freq2\',\n@@ -1169,18 +1432,6 @@\n help=\'Minimum variant allele frequency (default: 0.05)\'\n )\n filter_group.add_argument(\n- \'--max-somatic-p\',\n- dest=\'max_somatic_p\', type=float,\n- default=0.05,\n- help=\'Maximum somatic p-value (default: 0.05)\'\n- )\n- filter_group.add_argument(\n- \'--max-somatic-p-depth\',\n- dest=\'max_somatic_p_depth\', type=int,\n- default=10,\n- help=\'Depth required to run --max-somatic-p filter (default: 10)\'\n- )\n- filter_group.add_argument(\n \'--min-ref-readpos\',\n dest=\'min_ref_readpos\', type=float,\n default=0.1,\n@@ -1209,6 +1460,13 @@\n \'3\\\'end of variant-supporting reads (default: 0.1)\'\n )\n filter_group.add_argument(\n+ \'--detect-q2-runs\',\n+ dest=\'detect_q2_runs\', action=\'store_true\',\n+ help=\'Look for 3\\\'-terminal q2 runs and take their lengths into \'\n+ \'account for determining the effective 3\\\'end of reads \'\n+ \'(default: off)\'\n+ )\n+ filter_group.add_argument(\n \'--min-ref-len\',\n dest=\'min_ref_len\', type=int,\n default=90,\n@@ -1309,5 +1567,13 @@\n default=50,\n help=\'Maximum mismatch quality sum difference (var - ref; default: 50)\'\n )\n+ filter_group.add_argument(\n+ \'--ignore-md\',\n+ dest=\'ignore_md\', action=\'store_true\',\n+ help=\'Do not rely on read MD tag, even if it is present on the mapped \'\n+ \'reads, and recalculate mismatch quality stats from ref \'\n+ \'alignments instead.\'\n+ )\n+\n args = vars(p.parse_args())\n varscan_call(**args)\n' |
b |
diff -r b79bb8b09822 -r 4e97191a1ff7 varscan_somatic.xml --- a/varscan_somatic.xml Thu Mar 28 18:19:00 2019 -0400 +++ b/varscan_somatic.xml Fri Aug 16 15:49:54 2019 -0400 |
b |
b'@@ -1,4 +1,4 @@\n-<tool id="varscan_somatic" name="VarScan somatic" version="@VERSION@.4">\n+<tool id="varscan_somatic" name="VarScan somatic" version="@VERSION@.5">\n <description>Call germline/somatic and LOH variants from tumor-normal sample pairs</description>\n <macros>\n <import>macros.xml</import>\n@@ -28,6 +28,16 @@\n text="##FILTER=<ID=RefDist3,Description=" />\n </assert_contents>\n </macro>\n+ <macro name="filter_compat_options">\n+ <section name="experts_only" title="Compatibility options for experts" expanded="false">\n+ <param name="compat_opts" type="select" display="checkboxes" multiple="true" optional="true"\n+ label="Compatibility Options for Posterior Variant Filtering"\n+ help="">\n+ <option value="--ignore-md" selected="true">Always determine mismatch quality statistics from recalculated read to reference alignments. Ignore read MD tags if present.</option>\n+ <option value="--detect-q2-runs" selected="false">Treat runs of base qualities of value 2 at the 3\' end of reads as quality control indicator (Illumina 1.5 compatibility setting)</option>\n+ </param>\n+ </section>\n+ </macro>\n </macros>\n <expand macro="requirements">\n <requirement type="package" version="3.6.7">python</requirement>\n@@ -63,8 +73,11 @@\n --threads \\${GALAXY_SLOTS:-2}\n #if str($call_params.settings) == "custom":\n ## samtools mpileup parameters\n- --min-basequal ${call_params.min_avg_qual}\n- --min-mapqual ${call_params.min_mapqual}\n+ --min-basequal ${call_params.read_selection.min_basequal}\n+ --min-mapqual ${call_params.read_selection.min_mapqual}\n+ ${call_params.read_selection.count_orphans}\n+ ${call_params.read_selection.detect_overlaps}\n+ --max-pileup-depth ${call_params.read_selection.max_pileup_depth}\n ## VarScan parameters\n --min-coverage ${call_params.min_coverage}\n --min-var-count ${call_params.min_reads2}\n@@ -75,56 +88,61 @@\n #end if\n #if str($filter_params.settings) == "no_filter":\n --no-filters\n- #elif str($filter_params.settings) == "dream3_settings":\n- --min-var-count2 3\n- --min-var-count2-lc 1\n- --min-var-freq2 0.05\n- --max-somatic-p 0.05\n- --max-somatic-p-depth 10\n- --min-ref-readpos 0.2\n- --min-var-readpos 0.15\n- --min-ref-dist3 0.2\n- --min-var-dist3 0.15\n- --min-ref-len 90\n- --min-var-len 90\n- --max-len-diff 0.05\n- --min-strandedness 0\n- --min-strand-reads 5\n- --min-ref-basequal 15\n- --min-var-basequal 30\n- --max-basequal-diff 50\n- --min-ref-mapqual 20\n- --min-var-mapqual 30\n- --max-mapqual-diff 10\n- --max-ref-mmqs 50\n- --max-var-mmqs 100\n- --min-mmqs-diff 0\n- --max-mmqs-diff 50\n- #elif str($filter_params.settings) == "custom":\n- --min-var-count2 ${filter_params.min_var_count}\n- --min-var-count2-lc ${filter_params.min_var_count_lc}\n- --min-var-freq2 ${filter_params.min_var_freq2}\n- --max-somatic-p ${filter_params.max_somatic_p}\n- --max-somatic-p-depth ${filter_params.max_somatic_p_depth}\n- --min-ref-readpos ${filter_params.min_ref_readpos}\n- --min-var-readpos ${filter_params.min_var_readpos}\n- --min-ref-dist3 ${filter_params.min_ref_dist3}\n- --min-var-dist3 ${filter_params.min_var_dist3}\n- '..b'ng the variant and the reference allele" />\n+ help="The maximum difference (default: 50) in the mismatch base quality sums (var - ref) allowed between reads supporting the variant and the reference allele" />\n+ <expand macro="filter_compat_options" />\n </when>\n </conditional>\n </inputs>\n@@ -286,6 +319,7 @@\n </outputs>\n <tests>\n <test expect_num_outputs="1">\n+ <!-- run with default settings and genome from history -->\n <conditional name="reference">\n <param name="source" value="history" />\n <param name="genome" value="hg19_chrM.fa" />\n@@ -305,6 +339,7 @@\n </output>\n </test>\n <test expect_num_outputs="1">\n+ <!-- run with default settings and cached genome -->\n <conditional name="reference">\n <param name="source" value="cached" />\n <param name="genome" value="hg19mito" />\n@@ -324,6 +359,7 @@\n </output>\n </test>\n <test expect_num_outputs="2">\n+ <!-- run with default settings and split output -->\n <conditional name="reference">\n <param name="source" value="history" />\n <param name="genome" value="hg19_chrM.fa" />\n@@ -347,6 +383,7 @@\n </output>\n </test>\n <test expect_num_outputs="1">\n+ <!-- run with custom params for variant calling -->\n <conditional name="reference">\n <param name="source" value="history" />\n <param name="genome" value="hg19_chrM.fa" />\n@@ -355,10 +392,12 @@\n <param name="tumor_bam" value="tumor_chrM.bam" />\n <param name="split_output" value="false" />\n <conditional name="call_params">\n+ <section name="read_selection">\n+ <param name="min_basequal" value="5" />\n+ </section>\n <param name="settings" value="custom" />\n <param name="min_coverage" value="2" />\n <param name="min_reads2" value="1" />\n- <param name="min_avg_qual" value="5" />\n <param name="min_var_freq" value="0.01" />\n <param name="min_freq_for_hom" value="0.66" />\n <param name="p_value" value="0.97" />\n@@ -395,6 +434,7 @@\n </output>\n </test>\n <test expect_num_outputs="1">\n+ <!-- run with preconfigured dream3 post-filter settings -->\n <conditional name="reference">\n <param name="source" value="history" />\n <param name="genome" value="hg19_chrM.fa" />\n@@ -414,6 +454,7 @@\n </output>\n </test>\n <test expect_num_outputs="1">\n+ <!-- run without post-filters -->\n <conditional name="reference">\n <param name="source" value="history" />\n <param name="genome" value="hg19_chrM.fa" />\n@@ -433,6 +474,7 @@\n </output>\n </test>\n <test expect_num_outputs="1">\n+ <!-- run with custom post-filters -->\n <conditional name="reference">\n <param name="source" value="history" />\n <param name="genome" value="hg19_chrM.fa" />\n@@ -490,13 +532,6 @@\n This tool wraps the functionality of the ``varscan somatic`` and the\n ``varscan fpfilter`` command line tools.\n \n-.. class:: infomark\n-\n- The wrapper aims at providing the same functionality as the\n- ``varscan fpfilter`` tool, but implements it using ``pysam`` internally.\n- Note that, as one limitation compared to the original ``varscan`` tool,\n- the current version does not apply filters to indels!\n-\n The tool is designed to detect genetic variants in a **pair of samples**\n representing normal and tumor tissue from the same individual. It classifies\n the variants, according to their most likely origin, as **somatic** (variant is\n' |