comparison freebayes.xml @ 0:afb722f09cc1

Version 0.8.9.a
author lparsons
date Fri, 11 Nov 2011 17:02:45 -0500
parents
children c70c9dc2efb7
comparison
equal deleted inserted replaced
-1:000000000000 0:afb722f09cc1
1 <?xml version="1.0"?>
2 <tool id="freebayes_wrapper_princeton" name="Call SNPS with Freebayes (beta)" version="0.8.9.a">
3 <requirements>
4 <requirement type="binary">freebayes</requirement>
5 <requirement type="binary">samtools</requirement>
6 </requirements>
7 <description>Bayesian genetic variant detector</description>
8 <version_string>freebayes -h | grep [Vv]ersion</version_string>
9 <command>
10 if [ "`freebayes -h | grep [Vv]ersion | awk '{print \$2}'`" != "0.9.0" ]; then
11 echo "Freebayes version installed is not compatible with this version of the Galaxy wrapper (requires '0.9.0')." >&amp;2;
12 exit 2; fi;
13
14 #if $genomeSource.refGenomeSource == "history":
15 ln -s "$genomeSource.ownFile" localref.fa;
16 #else:
17 ln -s "${ filter( lambda x: str( x[0] ) == str( $genomeSource.index ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }" localref.fa;
18 #end if
19 samtools faidx localref.fa;
20
21 #if ($params.source_select == "full") and (str($params.baqAdjustment) == 'true')
22 samtools fillmd -Arb ${bamfile} localref.fa > localbam.bam;
23 #else
24 ln -s ${bamfile} localbam.bam;
25 #end if
26 samtools index localbam.bam;
27
28 #for $i, $b in enumerate($bamfiles)
29 #if ($params.source_select == "full") and (str($params.baqAdjustment) == 'true')
30 samtools fillmd -Arb ${b.additional_bamfile} localref.fa > localbam_${i}.bam;
31 #else
32 ln -s ${b.additional_bamfile} localbam_${i}.bam;
33 #end if
34 samtools index localbam_${i}.bam;
35 #end for
36
37 freebayes localbam*.bam --fasta-reference localref.fa --vcf $output
38 #if $params.source_select == "full":
39 -T $params.theta
40 -p $params.ploidy
41 $params.pooled
42 --pvar $params.pvar
43 $params.showRefRepeats
44 $params.nosnps
45 $params.noindels
46 $params.nomnps
47 $params.nocomplex
48 -n $params.bestAlleles
49 $params.leftAlignIndels
50 $params.useRefAllele
51 $params.diploidReference
52 --reference-quality $params.refMapQuality,$params.refBaseQuality
53 $params.duplicateReads
54 -m $params.minMapQuality
55 -q $params.minBaseQuality
56 $params.noFilters
57 -x $params.indelExclusionWindow
58 -F $params.minAltFraction
59 -C $params.minAltCount
60 -G $params.minAltTotal
61 --min-coverage $params.minCoverage
62 -W $params.postIntegLimitN,$params.postIntegLimitM
63 #else
64 --left-align-indels
65 #end if
66 </command>
67 <inputs>
68 <!-- <param format="fasta" name="reference" type="data" metadata_name="dbkey" label="Reference File"/> -->
69 <param format="bam" name="bamfile" type="data" label="Bam Alignment File"
70 help="Additional BAM files will be considered to be from the same sample unless they have a different read group assigned. Read groups can be assigned during mapping with BWA (advanced options) or after mapping with Picard tools."/>
71 <repeat name="bamfiles" title="Additional BAM Files">
72 <param format="bam" name="additional_bamfile" type="data" label="Additional Bam Alignment File"/>
73 </repeat>
74
75 <conditional name="genomeSource">
76 <param name="refGenomeSource" type="select" label="Select Reference Genome">
77 <option value="default" selected="true">Use the assigned data genome/build</option>
78 <option value="indexed">Select a different built-in genome</option>
79 <option value="history">Use a genome (fasta format) from my history</option>
80 </param>
81 <when value="default">
82 <param name="index" type="select" label="Check the assigned reference genome" help="Galaxy thinks that the reads in you dataset were aligned against this reference. If this is not correct, use the 'Select a build-in reference genome' option of the 'Select Reference Genome' dropdown to select approprtiate Reference.">
83 <options from_data_table="all_fasta">
84 <filter type="data_meta" ref="bamfile" key="dbkey" column="dbkey" multiple="True" separator="," />
85 <validator type="no_options" message="No reference build available for selected input" />
86 </options>
87 </param>
88 </when>
89 <when value="indexed">
90 <param name="index" type="select" label="Select a built-in reference genome" help="This list contains genomes cached at this Galaxy instance. If your genome of interest is not present here request it by using 'Help' link at the top of Galaxy interface or use the 'Use a genome (fasta format) from my history' option of the 'Select Reference Genome' dropdown.">
91 <options from_data_table="all_fasta">
92 </options>
93 </param>
94 </when>
95 <when value="history">
96 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome from history" help="This option works best for relatively small genomes. If you are working with large human-sized genomes, send request to Galaxy team for adding your reference to this Galaxy instance by using 'Help' link at the top of Galaxy interface."/>
97 </when>
98 </conditional>
99
100 <conditional name="params">
101 <param name="source_select" type="select" label="Freebayes Settings to Use" help="Default settings uses freebayes dafults for options (except that left-align-indels is set). The defaults are generally not recommended, however. For best results use Full Parameter List and set options appropriate to your data.">
102 <option value="pre_set">Default Settings</option>
103 <option value="full">Full Parameter List</option>
104 </param>
105 <when value="pre_set"/>
106 <when value="full">
107
108 <!-- Population Model -->
109 <param name="theta" size="5" type="float" value="0.01" label="Theta" help="The expected mutation rate or pairwise nucleotide diversity among the population under analysis. This serves as the single parameter to the Ewens Sampling Formula prior model"/>
110 <param name="ploidy" size="5" type="integer" value="2" label="Ploidy" help="Sets the default ploidy for the analysis"/>
111 <param name="pooled" type="boolean" label="Pooled" help="Assume that samples result from pooled sequencing. When using this flag, set 'Ploidy' to the number of alleles in each sample" truevalue="-J" falsevalue="" />
112
113 <!-- Reporting -->
114 <param name="pvar" type="float" value="0.0001" label="Probability of variant threshold" help="Report sites if the probability that there is a polymorphism at the site is greater than N." />
115 <param name="showRefRepeats" type="boolean" label="Show Reference Repeats" help="Calculate and show information about reference repeats in the VCF output" truevalue="--show-reference-repeats" falsevalue="" />
116
117 <!-- Alelle Scope -->
118 <param name="nosnps" type="boolean" label="Ignore SNP alleles" truevalue="--no-snps" falsevalue="" />
119 <param name="noindels" type="boolean" label="Ignore insertion and deletion alleles" truevalue="--no-indels" falsevalue="" />
120 <param name="nomnps" type="boolean" label="Ingnore multi-nuceotide polymorphisms, MNPs" truevalue="--no-mnps" falsevalue="" />
121 <param name="nocomplex" type="boolean" label="Ignore complex events (composites of other classes)" truevalue="--no-complex" falsevalue="" />
122 <param name="bestAlleles" size="5" type="integer" value="0" label="Use Best N Alleles" help="Evaluate only the best N alleles, ranked by sum of supporting quality scores. Set to 0 to use all." />
123
124 <!-- Indel realignment -->
125 <param name="leftAlignIndels" type="boolean" label="Left align indels" help="Left-realign and merge gaps embedded in reads, recommended when calling indels" checked="true" truevalue="--left-align-indels" falsevalue="" />
126 <param name="baqAdjustment" type="boolean" label="Base alignment quality (BAQ) adjustment" help="A quality smoothing filter which applies a hidden markov model of read genesis to each alignment independently. This is currently implemented by samtools fillmd." />
127
128 <!-- Reference Allele -->
129 <param name="useRefAllele" type="boolean" label="Use Reference Allele" help="Include the reference allele in the analsis as if it is another sample from the same population" truevalue="--use-reference-allele" falsevalue=""/>
130 <param name="diploidReference" type="select" label="Reference Ploidy" help="If using the reference sequence as a sample, it can be treated as haploid (default) or diploid.">
131 <option value="">Haploid</option>
132 <option value="--diploid-reference">Diploid</option>
133 </param>
134 <param name="refMapQuality" size="5" type="integer" value="100" label="Assign mapping quality of Q to the reference allele at each site"/>
135 <param name="refBaseQuality" size="5" type="integer" value="60" label="Reference Base Quality" help="Assign a base quality of Q to the reference allele at each site"/>
136
137 <!-- Input Filters -->
138 <param name="duplicateReads" type="boolean" label="Use duplicate reads" help="Include duplicate-marked alignments in the analysis" truevalue="--use-duplicate-reads" falsevalue="" />
139 <param name="minMapQuality" size="5" type="integer" value="30" label="Minimum Mapping Quality" help="Exclude alignments from analysis if they have a mapping quality less than Q"/>
140 <param name="minBaseQuality" size="5" type="integer" value="20" label="Minimum Base Quality" help="Exclude alleles from analysis if their supporting base quality is less than Q"/>
141 <param name="noFilters" type="boolean" label="No Filters" help="Do not use any input base and mapping quality filters. Equivalent to setting Minimum Mapping Quality, Minimum Base Quality, and Minimum Supporting Quality to 0" truevalue="--no-filters" falsevalue="" />
142 <param name="indelExclusionWindow" size="5" type="integer" value="-1" label="Indel Exclusion Window" help="Ignore portions of alignments N bases from a putative insertion or deletion allele"/>
143 <param name="minAltFraction" size="5" type="float" min="0" max="1" value="0" label="Minimum Alternative Fraction" help="Require at least this fraction of observations supporting an alternate allele within a single individual in the in order to evaluate the position"/>
144 <param name="minAltCount" size="5" type="integer" value="1" label="Minimum Alternative Count" help="Require at least this count of observations supporting an alternate allele within a single individual in order to evaluate the position"/>
145 <param name="minAltTotal" size="5" type="integer" value="1" label="Minimum Alternative Total" help="Require at least this count of observations supporting an alternate allele within the total population in order to use the allele in analysis"/>
146 <param name="minCoverage" size="5" type="integer" value="0" label="Minimum Coverage" help="Require at least this coverage to process a site"/>
147
148 <!-- Algorithmic Features -->
149 <!-- <param name="readDependenceFactor" size="5" type="float" value="0.9" label="Read Dependence Factor" help="Incorporate non-independence of reads by scaling successive observations by this factor during data likelihood calculations"/> -->
150 <!-- <param name="diffusionPriorScalar" size="5" type="float" value="1" label="Diffusion Prior Scalar" help="Downgrade the significance of P(genotype combo | allele frequency) by taking the Nth root of this component of the prior"/> -->
151 <param name="postIntegLimitN" size="5" type="integer" value="1" label="Posterior Integration Limit N" help="See Limit M below."/>
152 <param name="postIntegLimitM" size="5" type="integer" value="3" label="Posterior Integration Limit M" help="Integrate all genotype combinations in our posterior space which include no more than N samples with their Mth best data likelihood."/>
153 <!-- <param name="postIntegBandwidth" size="5" type="integer" value="2" label="Posterior Integratoin Bandwidth" help="Integrate all genotype combinations in our posterior space which lie no more than N steps from the most likely combination in terms of data likelihoods, taking the N steps from the most to least likely genotype for each individual"/>
154 <param name="postIntegBanddepth" size="5" type="integer" value="2" label="Posterior Integratoin Banddepth" help="Generate all genotype combinations for which up to this number of samples have up to their -W'th worst genotype according to data likelihood"/> -->
155 </when>
156 </conditional>
157 </inputs>
158 <outputs>
159 <data format="vcf" name="output" metadata_source="bamfile" />
160 </outputs>
161 <tests>
162 <test>
163 <param name="reference" ftype="fasta" value="mosaik_test_ref.fasta"/>
164 <param name="bamfile" ftype="bam" value="freebayes_in.bam"/>
165 <param name="source_select" value="pre_set"/>
166 <output name="output" file="freebayes_out.vcf" lines_diff="4"/>
167 </test>
168 </tests>
169 <help>
170 This tool uses `Freebayes`__ to call SNPS given a reference sequence and a BAM alignment file.
171
172 Base alignment quality (BAQ) adjustment is a quality smoothing filter which applies a hidden
173 markov model of read genesis to each alignment independently. This is currently implemented
174 by samtools fillmd. (See Biological Sequence Analysis Probabilistic Models of Proteins and
175 Nucleic Acids by Durbin et. al. for more details.)
176
177 Requires a coordinate-sorted BAM file. Galaxy currently coordinate-sorts all BAM files.
178
179 .. __: http://bioinformatics.bc.edu/marthlab/FreeBayes
180 </help>
181 </tool>