Previous changeset 8:45288fb1f3f5 (2019-03-28) Next changeset 10:db94eadb92c1 (2021-12-04) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/varscan commit fcf5ac14629c694f0f64773fab0428b1e78fe156" |
modified:
macros.xml varscan.py |
b |
diff -r 45288fb1f3f5 -r e3f170cc4f95 macros.xml --- a/macros.xml Thu Mar 28 18:19:19 2019 -0400 +++ b/macros.xml Fri Aug 16 15:50:25 2019 -0400 |
b |
@@ -56,9 +56,9 @@ <param argument="--min-coverage" name="min_coverage" type="integer" value="8" min="1" max="200" label="Minimum coverage" help="@HELP@"/> </xml> - <xml name="min_reads2"> + <xml name="min_reads2" token_help="Minimum number (default: 2) of variant-supporting reads at a position required to make a call"> <param argument="--min-reads2" name="min_reads2" type="integer" value="2" min="1" max="200" - label="Minimum supporting reads" help="Minimum number of variant-supporting reads at a position required to make a call"/> + label="Minimum supporting reads" help="@HELP@"/> </xml> <xml name="min_avg_qual"> <param argument="--min-avg-qual" name="min_avg_qual" type="integer" value="15" min="1" max="50" @@ -68,12 +68,12 @@ <xml name="min_var_freq" token_value="0.01"> <param argument="--min-var-freq" name="min_var_freq" type="float" value="@VALUE@" min="0" max="1" label="Minimum variant allele frequency" - help="Minimum variant allele frequency required for calling a variant"/> + help="Minimum variant allele frequency (default: @VALUE@) required for calling a variant"/> </xml> <xml name="min_freq_for_hom"> <param argument="--min-freq-for-hom" name="min_freq_for_hom" type="float" value="0.75" min="0" max="1" label="Minimum homozygous variant allele frequency" - help="Minimum variant allele frequency required for calling a homozygous genotype" /> + help="Minimum variant allele frequency (default: 0.75) required for calling a homozygous genotype" /> </xml> <xml name="p_value" token_value="0.01" token_label="p-value threshold for calling variants" |
b |
diff -r 45288fb1f3f5 -r e3f170cc4f95 varscan.py --- a/varscan.py Thu Mar 28 18:19:19 2019 -0400 +++ b/varscan.py Fri Aug 16 15:50:25 2019 -0400 |
[ |
b'@@ -8,6 +8,7 @@\n import sys\n import tempfile\n import time\n+from collections import namedtuple\n from contextlib import ExitStack\n from functools import partial\n from threading import Thread\n@@ -15,6 +16,332 @@\n import pysam\n \n \n+AlleleStats = namedtuple(\n+ "AlleleStats",\n+ [\n+ \'reads_total\',\n+ \'reads_fw\',\n+ \'reads_rv\',\n+ \'avg_mapqual\',\n+ \'avg_basequal\',\n+ \'avg_dist_from_center\',\n+ \'avg_mismatch_fraction\',\n+ \'avg_mismatch_qualsum\',\n+ \'avg_clipped_len\',\n+ \'avg_dist_from_3prime\'\n+ ]\n+)\n+\n+\n+def _get_allele_specific_pileup_column_stats(pileups, ref_fetch,\n+ ignore_md, ignore_nm,\n+ mm_runs, detect_q2_runs):\n+ var_reads_plus = var_reads_minus = 0\n+ sum_mapping_qualities = 0\n+ sum_base_qualities = 0\n+ sum_dist_from_center = 0\n+ sum_dist_from_3prime = 0\n+ sum_clipped_length = 0\n+ sum_unclipped_length = 0\n+ sum_mismatch_fractions = 0\n+ sum_mismatch_qualities = 0\n+\n+ for p in pileups:\n+ if p.alignment.is_reverse:\n+ var_reads_minus += 1\n+ else:\n+ var_reads_plus += 1\n+ sum_mapping_qualities += p.alignment.mapping_quality\n+ sum_base_qualities += p.alignment.query_qualities[p.query_position]\n+ sum_clipped_length += p.alignment.query_alignment_length\n+ unclipped_length = p.alignment.query_length\n+ sum_unclipped_length += unclipped_length\n+ # The following calculations are all in 1-based coordinates\n+ # with respect to the physical 5\'-end of the read sequence.\n+ if p.alignment.is_reverse:\n+ read_base_pos = unclipped_length - p.query_position\n+ read_first_aln_pos = (\n+ unclipped_length - p.alignment.query_alignment_end + 1\n+ )\n+ read_last_aln_pos = (\n+ unclipped_length - p.alignment.query_alignment_start\n+ )\n+ qualities_3to5prime = p.alignment.query_alignment_qualities\n+ else:\n+ read_base_pos = p.query_position + 1\n+ read_first_aln_pos = p.alignment.query_alignment_start + 1\n+ read_last_aln_pos = p.alignment.query_alignment_end\n+ qualities_3to5prime = reversed(\n+ p.alignment.query_alignment_qualities\n+ )\n+\n+ read_last_effective_aln_pos = read_last_aln_pos\n+ if detect_q2_runs:\n+ # Note: the original bam-readcount algorithm always takes\n+ # terminal q2 runs into account when determining the\n+ # effective 3\'-ends of reads.\n+ # However, q2 runs have no special meaning since Illumina\n+ # pipeline version 1.8 so detecting them is optional\n+ # in this code.\n+ for qual in qualities_3to5prime:\n+ if qual != 2:\n+ break\n+ read_last_effective_aln_pos -= 1\n+\n+ # Note: the original bam-readcount algorithm defines the\n+ # read center as:\n+ # read_center = p.alignment.query_alignment_length / 2\n+ # This is less accurate than the implementation here.\n+ read_center = (read_first_aln_pos + read_last_aln_pos) / 2\n+ sum_dist_from_center += 1 - abs(\n+ read_base_pos - read_center\n+ ) / (read_center - 1)\n+ # To calculate the distance of base positions from the 3\'-ends\n+ # of reads bam-readcount uses the formula:\n+ # sum_dist_from_3prime += abs(\n+ # read_last_effective_aln_pos - read_base_pos\n+ # ) / (unclipped_length - 1)\n+ # , which treats base positions on both sides of the effective\n+ # 3\'-end equally. Since this seems hard to justify, we cap\n+ # the distance calculation at 0 for base positions past the\n+ # effective 3\'-end (which, in turn, should only be possible\n+ # with detection of q2 runs).\n+ if read_last_effective_aln_pos > read_b'..b' / ref_stats.avg_clipped_len\n ) > max_relative_len_diff:\n record.filter.add(\'ReadLenDiff\')\n else:\n@@ -994,6 +1233,8 @@\n instance_args = {\n k: args.pop(k) for k in [\n \'max_depth\',\n+ \'count_orphans\',\n+ \'detect_overlaps\',\n \'min_mapqual\',\n \'min_basequal\',\n \'threads\',\n@@ -1046,7 +1287,7 @@\n )\n p.add_argument(\n \'-s\', \'--split-output\',\n- dest=\'split_output\', action=\'store_true\', default=False,\n+ dest=\'split_output\', action=\'store_true\',\n help=\'indicate that separate output files for SNPs and indels \'\n \'should be generated (original VarScan behavior). If specified, \'\n \'%%T in the --ofile file name will be replaced with "snp" and \'\n@@ -1090,6 +1331,20 @@\n \'default: 8000)\'\n )\n call_group.add_argument(\n+ \'--count-orphans\',\n+ dest=\'count_orphans\', action=\'store_true\',\n+ help=\'Use anomalous read pairs in variant calling \'\n+ \'(samtools mpileup -A option; default: ignore anomalous pairs)\'\n+ )\n+ call_group.add_argument(\n+ \'--no-detect-overlaps\',\n+ dest=\'detect_overlaps\', action=\'store_false\',\n+ help=\'Disable automatic read-pair overlap detection by samtools \'\n+ \'mpileup. Overlap detection tries to avoid counting duplicated \'\n+ \'bases twice during variant calling. \'\n+ \'(samtools mpileup -x option; default: use overlap detection)\'\n+ )\n+ call_group.add_argument(\n \'--min-basequal\',\n dest=\'min_basequal\', type=int,\n default=13,\n@@ -1160,7 +1415,15 @@\n dest=\'min_var_count2_lc\', type=int,\n default=2,\n help=\'Minimum number of variant-supporting reads when depth below \'\n- \'--somatic-p-depth (default: 2)\'\n+ \'--min-var-count2-depth (default: 2)\'\n+ )\n+ filter_group.add_argument(\n+ \'--min-var-count2-depth\',\n+ dest=\'min_var_count2_depth\', type=int,\n+ default=10,\n+ help=\'Combined depth of ref- and variant-supporting reads required to \'\n+ \'apply the --min-var-count filter instead of --min-var-count-lc \'\n+ \'(default: 10)\'\n )\n filter_group.add_argument(\n \'--min-var-freq2\',\n@@ -1169,18 +1432,6 @@\n help=\'Minimum variant allele frequency (default: 0.05)\'\n )\n filter_group.add_argument(\n- \'--max-somatic-p\',\n- dest=\'max_somatic_p\', type=float,\n- default=0.05,\n- help=\'Maximum somatic p-value (default: 0.05)\'\n- )\n- filter_group.add_argument(\n- \'--max-somatic-p-depth\',\n- dest=\'max_somatic_p_depth\', type=int,\n- default=10,\n- help=\'Depth required to run --max-somatic-p filter (default: 10)\'\n- )\n- filter_group.add_argument(\n \'--min-ref-readpos\',\n dest=\'min_ref_readpos\', type=float,\n default=0.1,\n@@ -1209,6 +1460,13 @@\n \'3\\\'end of variant-supporting reads (default: 0.1)\'\n )\n filter_group.add_argument(\n+ \'--detect-q2-runs\',\n+ dest=\'detect_q2_runs\', action=\'store_true\',\n+ help=\'Look for 3\\\'-terminal q2 runs and take their lengths into \'\n+ \'account for determining the effective 3\\\'end of reads \'\n+ \'(default: off)\'\n+ )\n+ filter_group.add_argument(\n \'--min-ref-len\',\n dest=\'min_ref_len\', type=int,\n default=90,\n@@ -1309,5 +1567,13 @@\n default=50,\n help=\'Maximum mismatch quality sum difference (var - ref; default: 50)\'\n )\n+ filter_group.add_argument(\n+ \'--ignore-md\',\n+ dest=\'ignore_md\', action=\'store_true\',\n+ help=\'Do not rely on read MD tag, even if it is present on the mapped \'\n+ \'reads, and recalculate mismatch quality stats from ref \'\n+ \'alignments instead.\'\n+ )\n+\n args = vars(p.parse_args())\n varscan_call(**args)\n' |