comparison cgatools/tools/cgatools_1.5/snpdiff.xml @ 0:182426b32995 draft default tip

Uploaded
author completegenomics
date Mon, 18 Jun 2012 20:15:00 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:182426b32995
1 <tool id="cg_snpdiff" name="snpdiff 1.5" version="1.0.0">
2 <!--
3 This tool creates a GUI for the snpdiff function of cgatools from Complete Genomics, Inc.
4 written 6-18-2012 by bcrain@completegenomics.com
5 -->
6
7 <description>compares snp calls to a Complete Genomics variant file.</description> <!--adds description in toolbar-->
8
9 <requirements>
10 <requirement type="binary">cgatools</requirement>
11 </requirements>
12
13 <command> <!--run executable-->
14 cgatools | head -1;
15 cgatools snpdiff
16 --reference $crr.fields.path
17 --variants $varfile
18 --genotypes $genotype
19 --output-prefix cg_
20 --reports `echo ${report1} ${report2} ${report3} | sed 's/ */,/g'`
21 </command>
22
23 <outputs>
24 <data format="tabular" name="output1" from_work_dir="cg_Output.tsv" label="${tool.name} on ${on_string}: Output">
25 <filter>(report1 == 'Output')</filter>
26 </data>
27 <data format="tabular" name="output2" from_work_dir="cg_Verbose.tsv" label="${tool.name} on ${on_string}: Verbose">
28 <filter>(report2 == 'Verbose')</filter>
29 </data>
30 <data format="tabular" name="output3" from_work_dir="cg_Stats.tsv" label="${tool.name} on ${on_string}: Stats">
31 <filter>(report3 == 'Stats')</filter>
32 </data>
33 </outputs>
34
35 <inputs>
36 <!--form field to select crr file-->
37 <param name="crr" type="select" label="Reference genome (.crr file)">
38 <options from_data_table="cg_crr_files" />
39 </param>
40
41 <!--conditional to select variant file input-->
42 <conditional name="data_sources">
43 <param name="data_source" type="select" label="Where is the input varfile?">
44 <option value="in" selected="true">imported into Galaxy</option>
45 <option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
46 </param>
47 <when value="in">
48 <!--form field to select variant files-->
49 <param name="varfile" type="data" format="cg_var" label="Var file">
50 <validator type="unspecified_build" />
51 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
52 metadata_name="dbkey" metadata_column="1"
53 message="cgatools is not currently available for this build."/>
54 </param>
55 </when>
56 <when value="out">
57 <!--form field to select crr file-->
58 <param name="varfile" type="text" label="Variant file (/path/varfile)" size="40" help="Variant file can be compressed (gz, bz2), e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01_2000/ASM/var-GS00000YYYY-ASM.tsv.bz2"/>
59 </when>
60 </conditional>
61
62 <!--conditional to select genotypes file input-->
63 <param name="genotype" type="data" format="tabular" label="Genotypes file with SNP calls" help="The genotypes file is a tab-delimited file with at
64 least the following columns (additional columns may be given): Chromosome (Required), Offset0Based (Required), GenotypesStrand (Optional), Genotypes (Optional)">
65 <validator type="unspecified_build" />
66 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
67 metadata_name="dbkey" metadata_column="1"
68 message="cgatools is not currently available for this build."/>
69 </param>
70
71 <param name="report1" type="select" label="Create report Output">
72 <option value="">no</option>
73 <option value="Output">yes</option>
74 </param>
75 <param name="report2" type="select" label="Create report Verbose">
76 <option value="">no</option>
77 <option value="Verbose">yes</option>
78 </param>
79 <param name="report3" type="select" label="Create report Stats">
80 <option value="">no</option>
81 <option value="Stats">yes</option>
82 </param>
83
84 </inputs>
85
86 <help>
87
88 **What it does**
89
90 This tool ompares snp calls to a Complete Genomics variant file.
91
92 **cgatools 1.5.0 Documentation**
93
94 Userguide: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-user-guide.pdf
95
96 Release notes: http://cgatools.sourceforge.net/docs/1.5.0/cgatools-release-notes.pdf
97
98 **Command line reference**::
99
100 COMMAND NAME
101 snpdiff - Compares snp calls to a Complete Genomics variant file.
102
103 DESCRIPTION
104 Compares the snp calls in the "genotypes" file to the calls in a Complete
105 Genomics variant file. The genotypes file is a tab-delimited file with at
106 least the following columns (additional columns may be given):
107
108 Chromosome (Required) The name of the chromosome.
109 Offset0Based (Required) The 0-based offset in the chromosome.
110 GenotypesStrand (Optional) The strand of the calls in the Genotypes
111 column (+ or -, defaults to +).
112 Genotypes (Optional) The calls, one per allele. The following
113 calls are recognized:
114 A,C,G,T A called base.
115 N A no-call.
116 - A deleted base.
117 . A non-snp variation.
118
119 The output is a tab-delimited file consisting of the columns of the
120 original genotypes file, plus the following additional columns:
121
122 Reference The reference base at the given position.
123 VariantFile The calls made by the variant file, one per allele.
124 The character codes are the same as is described for
125 the Genotypes column.
126 DiscordantAlleles (Only if Genotypes is present) The number of
127 Genotypes alleles that are discordant with calls in
128 the VariantFile. If the VariantFile is described as
129 haploid at the given position but the Genotypes is
130 diploid, then each genotype allele is compared
131 against the haploid call of the VariantFile.
132 NoCallAlleles (Only if Genotypes is present) The number of
133 Genotypes alleles that were no-called by the
134 VariantFile. If the VariantFile is described as
135 haploid at the given position but the Genotypes is
136 diploid, then a VariantFile no-call is counted twice.
137
138 The verbose output is a tab-delimited file consisting of the columns of the
139 original genotypes file, plus the following additional columns:
140
141 Reference The reference base at the given position.
142 VariantFile The call made by the variant file for one allele (there is
143 a line in this file for each allele). The character codes
144 are the same as is described for the Genotypes column.
145 [CALLS] The rest of the columns are pasted in from the VariantFile,
146 describing the variant file line used to make the call.
147
148 The stats output is a comma-separated file with several tables describing
149 the results of the snp comparison, for each diploid genotype. The tables
150 all describe the comparison result (column headers) versus the genotype
151 classification (row labels) in different ways. The "Locus classification"
152 tables have the most detailed match classifications, while the "Locus
153 concordance" tables roll these match classifications up into "discordance"
154 and "no-call". A locus is considered discordant if it is discordant for
155 either allele. A locus is considered no-call if it is concordant for both
156 alleles but has a no-call on either allele. The "Allele concordance"
157 describes the comparison result on a per-allele basis.
158
159 OPTIONS
160 -h [ --help ]
161 Print this help message.
162
163 --reference arg
164 The input crr file.
165
166 --variants arg
167 The input variant file.
168
169 --genotypes arg
170 The input genotypes file.
171
172 --output-prefix arg
173 The path prefix for all output reports.
174
175 --reports arg (=Output,Verbose,Stats)
176 Comma-separated list of reports to generate. A report is one of:
177 Output The output genotypes file.
178 Verbose The verbose output file.
179 Stats The stats output file.
180
181 SUPPORTED FORMAT_VERSION
182 0.3 or later
183 </help>
184 </tool>