# HG changeset patch # User Wolfgang Maier # Date 1441981549 -7200 # Node ID 85214e4428fde1cb7933c169d2eda7353ddbf151 # Parent bdd1995c9e662127a1a42a803f8b18ff05bea304 upgrade to mimodd version 0.1.7.0 diff -r bdd1995c9e66 -r 85214e4428fd annotate_variants.xml --- a/annotate_variants.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/annotate_variants.xml Fri Sep 11 16:25:49 2015 +0200 @@ -1,4 +1,4 @@ - + Predict the effects of SNPs and indels on known genes in the reference genome using SnpEff toolshed_macros.xml diff -r bdd1995c9e66 -r 85214e4428fd bamsort.xml --- a/bamsort.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/bamsort.xml Fri Sep 11 16:25:49 2015 +0200 @@ -1,4 +1,4 @@ - + Sort a BAM file by coordinates (or names) of the mapped reads toolshed_macros.xml diff -r bdd1995c9e66 -r 85214e4428fd cloudmap.xml --- a/cloudmap.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/cloudmap.xml Fri Sep 11 16:25:49 2015 +0200 @@ -1,54 +1,247 @@ - - with the CloudMap series of tools. + + Map causative mutations by multi-variant linkage analysis. toolshed_macros.xml mimodd version -q - mimodd cloudmap "$ifile" ${run.mode} "$sample" - - #if $str($run.related_parent_sample): - -r "${run.related_parent_sample}" + mimodd map "${opt.source.ifile}" ${opt.mode} + #if $str($opt.source.sample): + -m "${opt.source.sample}" + #end if + #if $str($opt.source.related_parent_sample): + -r "${opt.source.related_parent_sample}" + #end if + #if $str($opt.source.unrelated_parent_sample): + -u "${opt.source.unrelated_parent_sample}" + #end if + $opt.source.infer_missing + -o "$ofile" + #if $str($opt.source.seqdict_required.required) == "yes": + -s "${opt.source.seqdict_required.seqdict}" + #end if + $opt.source.norm + #if $len($opt.source.bin_sizes): + --bin-sizes + #for $size in $opt.source.bin_sizes: + "${size.bin_size}" + #end for + #end if + #if $str($opt.source.tabfile): + $str($opt.source.tabfile) $tfile #end if - #if $str($run.unrelated_parent_sample): - -u "${run.unrelated_parent_sample}" + #if $str($opt.source.plotopts.plots): + $str($opt.source.plotopts.plots) "$pfile" + $str($opt.source.plotopts.xlim) + #if $str($opt.source.plotopts.hylim): + --ylim-hist $str($opt.source.plotopts.hylim) + #end if + #if $str($opt.source.plotopts.hcols) and $len($opt.source.plotopts.hcols): + --hist-colors + #for $color in $opt.source.plotopts.hcols: + "${color.hcolor}" + #end for + #end if + #if $str($opt.source.plotopts.sylim): + --ylim-scatter $str($opt.source.plotopts.sylim) + #end if + #if $str($opt.source.plotopts.pcol): + --points-color "$str($opt.source.plotopts.pcol)" + #end if + #if $str($opt.source.plotopts.lcol): + --loess-color "$str($opt.source.plotopts.lcol)" + #end if + #if $str($opt.source.plotopts.span): + --loess-span "$str($opt.source.plotopts.span)" + #end if #end if - $run.infer_missing - -o "$ofile" - - #if $seqdict: - -s "$dictfile" - #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + - - - + + + + + + + + + + + + + + + + + + + + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - seqdict + + + (opt['source']['tabfile']) + + + (opt['source']['plotopts']['plots']) @@ -57,29 +250,93 @@ **What it does** -The purpose of this tool is to provide compatibility of the MiModD analysis workflow with the external `CloudMap`_ *EMS Variant Density Mapping*, *Variant Discovery Mapping* and *Hawaiian Variant Mapping* tools. These tools complement MiModD by providing easily interpreted visualizations of mapping-by-sequencing analysis workflows. +This tool is a complete rewrite of and improves the EMS Variant Density and Hawaiian Variant Mapping tools of `CloudMap`_. It is the most downstream tool in `mapping-by-sequencing analysis workflows in MiModD`_. + +It can be used to analyze and visualize the inheritance pattern of variants detected and selected by other MiModD tools or as an alternative (and more versatile) plotting engine for data generated with `CloudMap`_. + +------------- + +**Usage Modes:** -The tool converts a VCF file as generated by the *Extract Variant Sites* or *VCF Filter* tools to the format expected by the *CloudMap* series of tools. +This tool can be run in one of two different modes depending on the type of mapping analysis that should be performed: + +1) *Simple Variant Density (SVD) Mapping* mode analyzes the density of variants along the reference genome by dividing each chromosome into regions of user-defined size (bins) and counting the variants found in each bin. + + All variants listed in the input file are analyzed in this mode, which means that as input you will typically want to use filtered lists of variants (as produced by the VCF Filter tool). + + The aim of SVD analysis is to identify clusters of variants in an outcrossed strain carrying a selectable unknown mutation, which is interpreted as linkage between the corresponding genomic region and the unknown mutation. + + This mode corresponds roughly to EMS Variant Density Mapping in CloudMap. -Optionally, it also extracts the chromosome names and sizes and reports them in the *CloudMap* *species configuration file* format. -Such a file is required as input to the current versions of the *CloudMap* *Hawaiian* and *Variant Density* mapping tools, if you are working with a species other than the natively supported ones (i.e., other than *C. elegans*, *A. thaliana* or *Brachypodium distachyon*). +2) *Variant Allele Frequency (VAF) Mapping** mode analyzes the inheritance pattern in cross-progeny at sites, at which the parents are homozygous for different alleles. + + The aim of VAF analysis is to identify clusters of variants with (near) homozygous inheritance in a F2 (or later generation) population obtained from a cross between a strain carrying a selectable unknown mutation and an unrelated mapping strain. Such a cluster is interpreted as linkage between the corresponding genomic region and the unknown mutation selected for in the F2 generation. + + This mode corresponds roughly to Hawaiian Variant Mapping in CloudMap, but can simultaneously take into account non-reference alleles found in either parent strain (CloudMap users may think of this as a combined Hawaiian Variant and Variant Discovery Mapping analysis). + +------------- + +**Input:** -To use the output datasets of the tool with *CloudMap*, you only have to upload them to any public Galaxy server that hosts *CloudMap* like, e.g., the main Galaxy server at https://usegalaxy.org . +Valid input for this tool are VCF files (any VCF file in SVD mode, a MiModD-generated multi-sample VCF file in VAF mode) or a CloudMap tabular report file as generated by the Hawaiian Variant Mapping tool. Alternatively, the tool can generate (in both modes) its own tabular report file, which can be used as input instead of the original VCF file when rerunning the tool with different plotting parameters to reduce analysis time. + +.. class:: infomark + + CloudMap-generated tabular input files require, as additional input, a CloudMap-style sequence dictionary (even if the original CloudMap analysis was possible without one) as described in the original CloudMap paper. This file has a simple two-column tab-delimited format, in which each line lists the chromosome name (as it appears in the input VCF file) and the up-rounded length of the chromosome in megabases. + +------------- + +**Output:** + +The tool produces up to three output files: -**Notes:** +1) a default tabular file of binned variant counts that can be used to plot the data with external software such as Excel, + + +2) an optional pdf containing linkage plots, which should look just like the plots produced by CloudMap, but are optimized for file size and display speed and offer more user-configurable parameters and + + +3) an optional tabular per-variant report file, which can be configured to be either a valid input file for the corresponding original CloudMap tool (for users who really, really want to continue using CloudMap for plotting) or to be reusable in fast reruns of the tool (which can be useful to experiment with different plotting parameters). -1) Simple Variant Density (SVD) Mapping mode generates output for use with the CloudMap EMS Variant Density Mapping tool. The aim of SVD analysis is to identify clusters of variants that appear linked to a mutant phenotype selected for during several rounds of outcrossing or backcrossing to a non-mutagenized strain. The "mapping sample" is the out-/backcrossed strain and only its variants are taken into account for the analysis. +------------- -.. class:: warningmark +**Settings:** + +1) Analysis settings - EMS Variant Density Mapping is currently limited to *C. elegans* and other species with six chromosomes on the *CloudMap* side. + *bin size to analyze variants in* - determines the width of the regions along each chromosome, in which variants are counted and analyzed together. + + Several bin sizes can be specified and for each size you will get a corresponding report section in the binned variant counts file and a histogram plot in the linkage plots file. + + *normalize variant counts to bin-width* - if selected (as per default) the variant counts for different bin sizes are not absolute, but normalized to the bin width + + *sample names (in VAF mode only)* - to analyze inheritance patterns, VAF mode needs information about the relationship between the samples defined in the input VCF file: + + The *mapping sample name* should be set to the name of the sample for which the inheritance pattern is to be analyzed (the pooled progeny population). + + The *name of the related sample* should be that of the parent sample that carried and brought in the unknown mutation to be mapped (or, alternatively, that of a closely related ancestor). + + Finally, the *name of the unrelated sample* should be that of the other parent strain used in the cross. + + At least one of the parent samples MUST be specified, but if the input file contains variant information for both parents, they can be analyzed together for higher mapping accuracy. If you are reanalyzing a tabular report file from a previous tool run or from CloudMap, the association between variants and samples is already incorporated into the input file and cannot be specified again. -2) Variant Allele Frequency (VAF) Mapping mode generates output for use with the CloudMap Variant Discovery or Hawaiian Variant Mapping tools. The aim of VAF analysis is to identify clusters of variants with (near) homozygous inheritance in a F2 population obtained from a cross between a mutant strain of interest and an unrelated mapping strain. Here, the "mapping sample" is the pooled F2 population. To analyze inheritance patterns this mode **requires either** a list of variants that could have been inherited through the mapping strain, i.e. the "unrelated parent strain", or through the mutant parent, i.e. through the "related parent strain". If variants are available for both parents, they can be analyzed together for higher mapping accuracy. +2) Graphical output settings + + .. class:: warningmark + + To be able to generate plots the system running MiModD needs to have the statistical programming environment R and its Python interface rpy2 installed. + -3) More information on combining MiModD and CloudMap in mapping-by-sequencing analyses can be found in the `corresponding section of the MiModD User Guide`_. + *y-axes scaling* - if you want to override the defaults + + *x-axis scaling* - choose *preserve relative contig sizes* if you want the largest chromosome to fit the page width and smaller chromosomes to appear according to their relative size or choose *scale each contig to fit the plot width* if all chromosomes should exploit the available space + + *span value to be used in calculating the Loess regression line* - this value determines the degree of smoothing of the regression line through the scatterplot data. Information on loess regression and the loess span parameter can be found at http://en.wikipedia.org/wiki/Local_regression. The default is 0.1 as in CloudMap. + + *colors used for plotting* - can be selected freely from the offered palette. For histogram colors, the list of selected colors will be used to provide the colors for the different histograms plotted. If less colors than histograms (determined by the number of bin sizes selected) are specified, colors from the list will be recycled. + .. _CloudMap: https://usegalaxy.org/u/gm2123/p/cloudmap -.. _corresponding section of the MiModD User Guide: http://mimodd.readthedocs.org/en/latest/cloudmap.html - +.. _mapping-by-sequencing analysis workflows in MiModD: http://mimodd.readthedocs.org/en/latest/cloudmap.html diff -r bdd1995c9e66 -r 85214e4428fd convert.xml --- a/convert.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/convert.xml Fri Sep 11 16:25:49 2015 +0200 @@ -1,4 +1,4 @@ - + between different sequence data formats toolshed_macros.xml diff -r bdd1995c9e66 -r 85214e4428fd covstats.xml --- a/covstats.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/covstats.xml Fri Sep 11 16:25:49 2015 +0200 @@ -1,4 +1,4 @@ - + Calculate coverage statistics for a BCF file as generated by the Variant Calling tool toolshed_macros.xml diff -r bdd1995c9e66 -r 85214e4428fd deletion_predictor.xml --- a/deletion_predictor.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/deletion_predictor.xml Fri Sep 11 16:25:49 2015 +0200 @@ -1,4 +1,4 @@ - + Predicts deletions in one or more aligned read samples based on coverage of the reference genome and on insert sizes toolshed_macros.xml diff -r bdd1995c9e66 -r 85214e4428fd fileinfo.xml --- a/fileinfo.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/fileinfo.xml Fri Sep 11 16:25:49 2015 +0200 @@ -1,4 +1,4 @@ - + for supported data formats. toolshed_macros.xml diff -r bdd1995c9e66 -r 85214e4428fd reheader.xml --- a/reheader.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/reheader.xml Fri Sep 11 16:25:49 2015 +0200 @@ -1,4 +1,4 @@ - + From a BAM file generate a new file with the original header (if any) replaced or modified by that found in a second SAM file mimodd version -q diff -r bdd1995c9e66 -r 85214e4428fd sam_header.xml --- a/sam_header.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/sam_header.xml Fri Sep 11 16:25:49 2015 +0200 @@ -1,4 +1,4 @@ - + Create a SAM format header from run metadata for sample annotation. toolshed_macros.xml diff -r bdd1995c9e66 -r 85214e4428fd snap_caller.xml --- a/snap_caller.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/snap_caller.xml Fri Sep 11 16:25:49 2015 +0200 @@ -1,4 +1,4 @@ - + Map sequence reads to a reference genome using SNAP toolshed_macros.xml @@ -18,7 +18,7 @@ #end if --ofile '$outputfile' --iformat ${i.mode_choose.input.iformat} --oformat $oformat --idx-seedsize '$set.seedsize' ---idx-slack '$set.slack' --maxseeds '$set.maxseeds' --maxhits '$set.maxhits' --clipping=$set.clipping --maxdist '$set.maxdist' --confdiff '$set.confdiff' --confadapt '$set.confadpt' +--idx-slack '$set.slack' --maxseeds '$set.maxseeds' --maxhits '$set.maxhits' --clipping $set.clipping --maxdist '$set.maxdist' --confdiff '$set.confdiff' --confadapt '$set.confadpt' #if $i.mode_choose.input.header: --header '${i.mode_choose.input.header}' #end if @@ -168,9 +168,9 @@ - - - + + + diff -r bdd1995c9e66 -r 85214e4428fd snp_caller_caller.xml --- a/snp_caller_caller.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/snp_caller_caller.xml Fri Sep 11 16:25:49 2015 +0200 @@ -1,4 +1,4 @@ - + From a reference and aligned reads generate a BCF file with position-specific variant likelihoods and coverage information toolshed_macros.xml diff -r bdd1995c9e66 -r 85214e4428fd snpeff_genomes.xml --- a/snpeff_genomes.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/snpeff_genomes.xml Fri Sep 11 16:25:49 2015 +0200 @@ -1,4 +1,4 @@ - + Checks the local SnpEff installation to compile a list of currently installed genomes toolshed_macros.xml diff -r bdd1995c9e66 -r 85214e4428fd tool_dependencies.xml --- a/tool_dependencies.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/tool_dependencies.xml Fri Sep 11 16:25:49 2015 +0200 @@ -7,10 +7,10 @@ - + - http://sourceforge.net/projects/mimodd/files/MiModD-0.1.6.1.tar.gz + http://sourceforge.net/projects/mimodd/files/MiModD-0.1.7.0.tar.gz diff -r bdd1995c9e66 -r 85214e4428fd toolshed_macros.xml --- a/toolshed_macros.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/toolshed_macros.xml Fri Sep 11 16:25:49 2015 +0200 @@ -1,7 +1,7 @@ - mimodd + mimodd diff -r bdd1995c9e66 -r 85214e4428fd varextract.xml --- a/varextract.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/varextract.xml Fri Sep 11 16:25:49 2015 +0200 @@ -1,4 +1,4 @@ - + from a BCF file toolshed_macros.xml diff -r bdd1995c9e66 -r 85214e4428fd vcf_filter.xml --- a/vcf_filter.xml Tue Jul 28 23:21:11 2015 +0200 +++ b/vcf_filter.xml Fri Sep 11 16:25:49 2015 +0200 @@ -1,4 +1,4 @@ - + Extracts lines from a vcf variant file based on field-specific filters toolshed_macros.xml