Mercurial > repos > fcaramia > jointsnvmix
diff joint_snv_mix.xml @ 0:a1034918ab9b draft
Uploaded
author | fcaramia |
---|---|
date | Thu, 20 Jun 2013 00:03:08 -0400 |
parents | |
children | 26953f1c8af2 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/joint_snv_mix.xml Thu Jun 20 00:03:08 2013 -0400 @@ -0,0 +1,277 @@ +<tool id="joint_snv_mix" name="Joint SNV Mix" version="0.7.5"> + <description>classify germline and somatic mutations</description> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="0.19.1">cython</requirement> + <requirement type="package" version="0.5">pysam</requirement> + <requirement type="package" version="0.1.18">samtools</requirement> + <requirement type="package" version="0.7.5">jointsnvmix</requirement> + </requirements> + <command interpreter="perl"> + + joint_snv_mix.pl + + "ACTION::${option.option}" + + "REFGENOME::$refFile.fields.path" + "BAMNORMAL::$normal_file" + "BAMTUMOR::$tumor_file" + + + #if str($option.option) == "classify": + #if ($option.parameters): + "OPTION::--parameters_file $option.parameters" + #end if + "OPTION::--out_file $output" + "OPTION::--somatic_threshold $option.somatic_threshold" + + #end if + + #if str($option.option) == "train": + #if ($option.priors): + "OPTION::--priors_file $option.priors" + #end if + "OUTPUT::$output" + "OPTION::--convergence_threshold $option.convergence_threshold" + "OPTION::--max_iters $option.max_iters" + + #end if + #if ($positions_file): + "OPTION::--positions_file $positions_file" + #end if + + "OPTION::--min_base_qual $min_base_quality" + "OPTION::--min_map_qual $min_map_quality" + "OPTION::--model $model" + #if ($chromosome): + "OPTION::--chromosome $chromosome" + #end if + + + + </command> + <inputs> + <param name="refFile" type="select" label="Select a reference genome" optional="false"> + <options from_data_table="all_fasta"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + <param name="normal_file" type="data" format="bam" label="Normal Sample " help="Bam" /> + <param name="tumor_file" type="data" format="bam" label="Tumor Sample" help="Bam" /> + <param name="model" type="select" label="Model" help="" optional="true"> + <option value="binomial">binomial</option> + <option value="snvmix2" selected="true">snvmix2</option> + <option value="beta_binomial">beta binomial</option> + </param> + <param name="positions_file" type="data" format="txt" label="Positions file" help="Filter positions" optional="true"/> + <param name="min_map_quality" type="text" label="Min map quality" help="Filter reads" value="0"/> + <param name="min_base_quality" type="text" label="Min base quality" help="Filter reads" value="0"/> + <param name="chromosome" type="text" label="Chromosome" help="a chromosome to analyse, leave blank for all"/> + + + <conditional name="option"> + <param name="option" type="select" label="Action" help="" optional="true"> + <option value="train" selected="true">Train</option> + <option value="classify">Classify</option> + </param> + + <when value="train"> + + <param name="priors" type="data" format="txt" label="Prior Probabilities" optional="true"/> + <param name="initial_parameters" type="data" format="txt" label="Initial Parameters" optional="true"/> + <param name="convergence_threshold" type="text" label="Convergence Threshold" value="1e-6"/> + <param name="max_iters" type="text" label="Max number of training iterations" value="1000"/> + + </when> + <when value="classify"> + + <param name="parameters" type="data" format="txt" label="Classify Parameters" help="" optional="true" /> + <param name="somatic_threshold" type="text" label="Somatic Threshold" help="filter by probability" value="0.0"/> + </when> + + </conditional> + + + </inputs> + <outputs> + <data type="data" format="txt" name="output" label="${tool.name} result on ${on_string}"/> + </outputs> + + <help> + +.. class:: infomark + +**What it does** + +:: + + JointSNVMix implements a probabilistic graphical model to analyse sequence data + from tumour/normal pairs. The model draws statistical strength by analysing both + genome jointly to more accurately classify germline and somatic mutations. + + + Train + + The SnvMix family of models are complete generative models of the data. + As such the model parameters can be learned using the Expectation Maximisation + (EM) algorithm. The train command allows this to be done. + + All methods require that a file with the parameters for the prior densities, + and an initial set of parameters be passed in. Templates for these files can + be found in the config/ directory which ships with the package. If you are + unsure about setting the priors or parameter values these files should suffice. + + The train command will produce a parameters file suitable for use with the + classification command. Training is highly recommended to achieve optimal + performance when using SnvMix based model. + + To reduce memory consumption all subcommands of train take an optional --skip-size flag. + This is the number of positions to skip over before sampling a position for the training set. + Smaller values will lead to larger training sets which will require more memory, + but should yield better parameter estimates. + + All subcommands of train also take optional parameters for minimum depth a + position has in the tumour and normal to be used for training. Higher depth + sites should give more robust estimates of the parameters. The default values + of these are likely fine. + + + Classify + + The classify command is used for analysing tumour/normal paired data and + computing the posterior probability for each of the nine joint genotypes for + a pair of diploid genomes. + + + +**Models** + +:: + + There are currently three models supported by both the train and classify commands. + All models use the JointSNVMix mixture model which jointly analyses the normal and tumour genomes. + By default snvmix2 is used but other models can be specified. + + binomial + + Uses binomial densities in the mixture model this was previously referred to as the JointSnvMix1 mode. + + snvmix2 + + Uses snvmix2 densities in the mixture as described in the original SNVMix paper previously referred to as JointSnvMix2. + + beta_binomial + + Uses beta-binomial densities in the mixture model new in version 0.8. The beta-binomial is a robust (in the statistical sense) + alternative to binomial model. It can be beneficial when dealing with over-dispersed data. This is useful in cancer genomes + since allelic frequencies at somatic mutations sites may deviate significantly from those expected under diploid model. + + +**Input** + + Bam files containing normal and tumor reads. + + +**Parameters** + + + Classify + + chromosome CHROMOSOME + Chromosome to analyse. If not set all chromosomes will + be analysed. + + min_base_qual MIN_BASE_QUAL + Remove bases with base quality lower than this. + Default is 0. + + min_map_qual MIN_MAP_QUAL + Remove bases with mapping quality lower than this. + Default is 0. + + positions_file POSITIONS_FILE + Path to a file containing a list of positions to + create use for analysis. Should be space separated + chrom pos. Additionally for each chromosome the + positions should be sorted. The same format as + samtools. + + parameters_file PARAMETERS_FILE + Path to a file with custom parameters values for the + model. + + somatic_threshold SOMATIC_THRESHOLD + Only sites with P(Somatic) = p_AA_AB + p_AA_BB greater + than equal this value will be printed. Default is 0. + + + Train + + chromosome CHROMOSOME + Chromosome to analyse. If not set all chromosomes will + be analysed. + + min_base_qual MIN_BASE_QUAL + Remove bases with base quality lower than this. + Default is 0. + + min_map_qual MIN_MAP_QUAL + Remove bases with mapping quality lower than this. + Default is 0. + + positions_file POSITIONS_FILE + Path to a file containing a list of positions to + create use for analysis. Should be space separated + chrom pos. Additionally for each chromosome the + positions should be sorted. The same format as + samtools. + + priors_file PRIORS_FILE + Path to a file with priors for the model parameters. + + initial_parameters_file INITIAL_PARAMETERS_FILE + Path to a file with initial parameter values for the + model. + + min_normal_depth MIN_NORMAL_DEPTH + Minimum depth of coverage in normal sample for a site + to be eligible for use in training set. Default 10 + + min_tumour_depth MIN_TUMOUR_DEPTH + Minimum depth of coverage in tumour sample for a site + to be eligible for use in training set. Default 10 + + max_normal_depth MAX_NORMAL_DEPTH + Maximum depth of coverage in normal sample for a site + to be eligible for use in training set. Default 100 + + max_tumour_depth MAX_TUMOUR_DEPTH + Maximum depth of coverage in tumour sample for a site + to be eligible for use in training set. Default 100 + + max_iters MAX_ITERS + Maximum number of iterations to used for training + model. Default 1000 + + skip_size SKIP_SIZE + When subsampling will skip over this number of + position before adding a site to the subsample. Larger + values lead to smaller subsample data sets with faster + training and less memory. Smaller values should lead + to better parameter estimates. Default 1. + + convergence_threshold CONVERGENCE_THRESHOLD + Convergence threshold for EM training. Once the change + in objective function is below this value training + will end. Default 1e-6 + + + + + </help> +</tool> + + + +