diff VCFGandalfTools/VCFCarto_wrapper.xml @ 2:6bebeb76fa8d draft

Uploaded
author urgi-team
date Tue, 05 Apr 2016 08:33:41 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFGandalfTools/VCFCarto_wrapper.xml	Tue Apr 05 08:33:41 2016 -0400
@@ -0,0 +1,315 @@
+<tool id="VCFCarto" name="VCFCarto" version="0.01">
+    <description>VCFcarto can convert a tabulated marker file into a file with only the markers from 2 parents </description>
+    <requirements>
+        <requirement type="package" version="1.0">VCF_Gandalf_Tools</requirement>
+    </requirements>
+    <version_command>
+  VCFCarto.py --version
+    </version_command>
+    <command interpreter="python">
+    VCFCarto_wrapper.py -f $inputTabular -o $outputVCFCarto -A $parentA -H $parentH
+    #if str($outputType) == "carto"
+     -p -g --graphHTML $output_html --dirGraphs "$output_html.files_path"
+    #end if
+    #if str($outputType) == "MergedCarto"
+     -p -g --graphHTML $output_html --dirGraphs "$output_html.files_path" -m --mergeFile $output_bed
+    #end if
+    </command>
+    <inputs>
+        <param name="inputTabular" type="data" format="tabular" label="indicate your tabulated marker file"/>
+        <param name="parentA" size="20" type="text" value="V1" label="indicate parent 1 name (A)"/>
+        <param name="parentH" size="20" type="text" value="V2" label="indicate parent 2 name (H)"/>
+        <param name="outputType" type="select" display="radio" label="select type of output" multiple="False">
+            <option value="raw" >7 caracter code</option>
+            <option value="carto" >A - H code</option>
+            <option value="MergedCarto" >A - H code and merge</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="outputVCFCarto" label="${tool.name} on ${on_string} (tabular)"/>
+        <data format="html" name="output_html" label="${tool.name} graphs on ${on_string} (html)">
+            <filter>not outputType == "raw"</filter>
+        </data>
+        <data format="bed" name="output_bed" label="${tool.name} markers on ${on_string} (bed)">
+            <filter>outputType == "MergedCarto"</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="inputTabular" value="VCFCarto_input.tab"/>
+            <param name="parentA" value="REF1"/>
+            <param name="parentH" value="REF2"/>
+            <param name="outputType" value="raw"/>
+            <output name="outputVCFCarto" file="VCFCarto_output.tab" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="inputTabular" value="VCFCarto_input.tab"/>
+            <param name="parentA" value="REF1"/>
+            <param name="parentH" value="REF2"/>
+            <param name="outputType" value="MergedCarto"/>
+            <output name="outputVCFCarto" file="VCFCarto_output_merged.tab" ftype="tabular"/>
+            <output name="output_bed" file="VCFCarto_output_merged.bed" ftype="bed"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+      
+**VCFcarto converts a tabulated marker file into a file with only the markers from 2 parents**
+
+.. class:: infomark
+
+expected input format is the output from VCFStorage.
+
+-----
+
+**what it does :**
+
+VCFcarto converts a tabulated marker file into a file with only the markers from 2 parents, refA and refH. 
+
+2 formats are possible, either the input format is conserved, or the format is changed into a 3 letter format
+
+-----
+
+**input format :**
+
+.. class:: infomark
+
+expected input format is the output from VCFStorage.
+
+the expected format is a tab delimited format file where all genomic positions are in rows, and all strains are in columns
+
+For each position and each genome, a code is attributed : 
+
+- for the reference : ::
+
+    A,T,G,C for the corresponding nucleotidic acid 
+
+- for the genomes : ::
+
+    U if the position was not refered in the VCF file 
+    R if the base is similar to the reference 
+    F if the base has been filtered out
+    A,T,G,C if the genome has a validated SNP at the position
+
+-----
+
+**output format :**
+
+for the main output, 2 formats are possible : 
+
+- The first format is similar to the input format (same columns and code) but will only be conserved lines where the 2 parents have different alleles. 
+
+- The second format (A - H format) will have a much simpler code ::
+
+    "A" when the strain allele is the same as parent A
+    "H" when the strain allele is the same as parent H
+    "-" in any other case (base filtered out, different base, base unmapped etc...)
+
+the second format may be used as an input for a cartographic tool. 
+
+If you decide to have the A - H format, you can also merge consecutive markers that carries the same information (every strains are similars between the two markers). If you decide to do so, new markers will be generated and a bed file will do the link between the input and the output markers.
+
+Finally, graphical output will be displayed to visualise the result. 
+
+-----
+
+**example :**
+
+input : ::
+
+	CHROM	POS	reference	REF1	G01	REF2	G02	G03	G04	G05	G06	G07	G08	G09	G10	G11	G12
+	Chr1	1	A	R	R	R	R	U	R	R	R	R	R	R	R	R	R
+	Chr1	2	T	R	R	R	R	R	U	R	R	R	R	R	R	R	R
+	Chr1	3	G	R	R	R	R	R	R	R	R	R	R	R	R	R	R
+	Chr1	4	G	R	R	R	R	R	R	R	R	R	R	R	R	F	R
+	Chr1	5	G	R	R	R	R	R	R	U	F	R	R	R	R	R	R
+	Chr1	6	C	R	R	R	R	R	R	R	R	R	R	R	R	R	U
+	Chr1	7	A	G	C	C	C	F	C	C	C	C	C	G	C	G	G
+	Chr1	8	G	R	R	R	R	R	R	R	R	R	R	R	R	R	R
+	Chr1	9	C	R	T	T	R	T	T	T	U	R	T	R	T	T	T
+	Chr1	10	T	R	R	R	R	R	R	R	R	R	R	R	R	R	U
+	Chr1	11	T	R	R	R	R	R	R	R	R	R	R	R	F	R	R
+	Chr1	12	A	R	R	R	R	U	R	R	R	R	F	R	R	R	R
+	Chr1	13	A	R	R	G	G	R	F	R	F	G	R	G	R	R	F
+	Chr1	14	A	R	R	R	R	R	R	R	R	F	R	R	R	R	R
+	Chr1	15	G	R	R	R	U	R	F	R	R	R	R	R	R	U	U
+	Chr1	16	G	A	R	R	A	R	R	U	F	R	R	A	A	R	A
+	Chr1	17	A	R	G	G	R	U	R	R	G	G	R	G	U	R	G
+	Chr1	18	C	R	R	R	R	R	U	R	R	R	R	R	R	R	R
+	Chr1	19	G	C	U	R	C	R	C	U	R	R	C	C	C	R	C
+	Chr1	20	G	A	U	R	A	R	A	U	R	R	A	A	A	R	A
+	Chr1	21	G	T	U	R	T	R	T	U	R	R	T	T	T	R	T
+	Chr1	22	A	T	U	R	T	R	T	U	R	R	T	T	T	R	T
+	Chr1	23	C	T	T	R	T	R	R	R	T	R	U	T	R	T	T
+	Chr1	24	T	R	R	R	R	R	U	R	R	R	R	R	R	R	F
+	Chr1	25	G	R	F	R	R	R	R	R	U	R	F	R	R	R	R
+	Chr1	26	T	R	R	C	C	C	C	C	R	R	C	R	C	R	U
+	Chr1	27	C	R	R	G	G	G	G	R	G	R	G	R	G	R	R
+	Chr1	28	C	G	T	T	T	G	G	T	T	F	T	G	T	T	G
+	Chr1	29	G	T	R	R	R	R	T	R	T	R	T	T	R	T	R
+	Chr1	30	T	R	R	R	R	R	R	R	R	R	R	R	R	R	R
+	Chr1	31	A	R	R	R	R	F	R	R	R	R	F	R	R	R	R
+	Chr1	32	A	G	G	R	G	G	G	R	R	G	G	G	G	G	R
+	Chr1	33	G	R	R	R	R	R	R	R	R	R	R	R	R	R	R
+	Chr1	34	C	R	R	R	R	R	R	R	R	R	R	R	R	R	R
+	Chr1	35	C	R	R	R	R	R	F	R	R	R	R	R	R	R	U
+	Chr2	1	T	R	R	R	F	R	R	R	R	R	R	R	R	R	R
+	Chr2	2	A	C	R	R	C	C	U	R	R	R	R	C	C	C	U
+	Chr2	3	C	R	R	R	R	R	R	U	R	R	R	R	R	R	R
+	Chr2	4	C	R	R	R	R	R	R	R	U	R	R	R	R	F	R
+	Chr2	5	T	R	R	R	R	R	R	R	R	R	R	R	R	R	R
+	Chr2	6	C	R	R	R	R	R	R	R	R	R	R	R	R	R	R
+	Chr2	7	A	T	F	R	U	R	T	T	T	R	T	T	F	T	T
+	Chr2	8	T	R	R	R	R	R	R	R	R	R	R	R	R	R	R
+	Chr2	9	C	R	R	R	R	R	R	R	R	R	R	R	R	R	R
+	Chr2	10	G	R	T	T	T	T	R	T	R	R	R	R	R	U	R
+	Chr2	11	C	R	A	A	A	A	R	A	R	R	R	R	R	U	R
+	Chr2	12	A	R	T	T	T	T	R	T	R	R	R	R	R	U	R
+	Chr2	13	T	R	C	C	C	C	R	C	R	R	R	R	R	U	R
+	Chr2	14	C	T	A	A	T	A	T	A	T	A	T	T	A	A	A
+	Chr2	15	T	R	R	R	F	R	R	R	R	R	R	R	R	R	R
+	Chr2	16	A	R	R	R	R	R	R	R	U	R	R	R	R	R	R
+	Chr2	17	A	R	U	R	R	R	R	R	R	R	R	R	R	R	F
+	Chr2	18	G	R	R	R	R	R	R	R	R	R	R	R	R	R	R
+	Chr2	19	A	R	R	R	R	R	R	F	R	R	R	R	R	R	R
+	Chr2	20	C	R	R	R	R	R	R	R	F	R	R	R	R	R	R
+	Chr2	21	G	A	R	R	A	A	A	R	R	R	A	A	R	R	R
+	Chr2	22	A	R	R	R	R	R	R	F	R	R	R	R	R	R	R
+	Chr2	23	A	R	R	T	T	R	R	T	T	T	T	T	R	R	R
+	Chr2	24	T	R	R	R	R	R	R	U	R	R	R	R	R	R	F
+	Chr2	25	T	R	A	A	R	R	A	R	A	R	R	A	R	R	A
+	Chr2	26	G	R	R	R	R	R	R	R	R	R	R	R	R	R	R
+	Chr2	27	A	R	R	R	R	R	R	R	R	R	R	R	R	U	R
+	Chr2	28	C	R	U	R	R	F	F	R	R	F	R	F	U	R	R
+	Chr2	29	G	R	R	R	R	R	R	F	R	R	R	R	R	R	R
+	Chr2	30	T	A	A	G	A	G	G	A	A	G	F	G	G	G	U
+	Chr2	31	A	R	R	R	R	R	R	R	R	U	U	R	R	R	R
+	Chr2	32	G	R	R	R	R	R	R	U	U	R	R	R	R	R	R
+	Chr2	33	G	R	U	R	R	R	R	U	R	R	R	R	R	R	R
+	Chr2	34	A	R	R	R	U	R	R	R	R	R	R	R	R	R	R
+	Chr2	35	G	R	R	R	R	R	R	R	R	R	R	R	R	R	R
+	Chr2	36	T	R	R	R	R	R	R	U	R	R	R	R	R	R	R
+	Chr3	1	T	U	R	R	R	R	R	U	R	R	R	R	R	R	R
+	Chr3	2	T	R	R	U	R	R	R	U	R	R	R	R	R	R	R
+	Chr3	3	T	F	R	R	R	R	R	U	R	R	R	R	R	R	R
+	Chr3	4	T	R	R	F	R	R	R	U	R	R	R	R	R	R	R
+
+
+output :
+
+- without A - H code : ::
+
+	CHROM	POS	reference	REF1	G01	REF2	G02	G03	G04	G05	G06	G07	G08	G09	G10	G11	G12
+	Chr1	7	A	G	C	C	C	F	C	C	C	C	C	G	C	G	G
+	Chr1	9	C	R	T	T	R	T	T	T	U	R	T	R	T	T	T
+	Chr1	13	A	R	R	G	G	R	F	R	F	G	R	G	R	R	F
+	Chr1	16	G	A	R	R	A	R	R	U	F	R	R	A	A	R	A
+	Chr1	17	A	R	G	G	R	U	R	R	G	G	R	G	U	R	G
+	Chr1	19	G	C	U	R	C	R	C	U	R	R	C	C	C	R	C
+	Chr1	20	G	A	U	R	A	R	A	U	R	R	A	A	A	R	A
+	Chr1	21	G	T	U	R	T	R	T	U	R	R	T	T	T	R	T
+	Chr1	22	A	T	U	R	T	R	T	U	R	R	T	T	T	R	T
+	Chr1	23	C	T	T	R	T	R	R	R	T	R	U	T	R	T	T
+	Chr1	26	T	R	R	C	C	C	C	C	R	R	C	R	C	R	U
+	Chr1	27	C	R	R	G	G	G	G	R	G	R	G	R	G	R	R
+	Chr1	28	C	G	T	T	T	G	G	T	T	F	T	G	T	T	G
+	Chr1	29	G	T	R	R	R	R	T	R	T	R	T	T	R	T	R
+	Chr1	32	A	G	G	R	G	G	G	R	R	G	G	G	G	G	R
+	Chr2	2	A	C	R	R	C	C	U	R	R	R	R	C	C	C	U
+	Chr2	7	A	T	F	R	U	R	T	T	T	R	T	T	F	T	T
+	Chr2	10	G	R	T	T	T	T	R	T	R	R	R	R	R	U	R
+	Chr2	11	C	R	A	A	A	A	R	A	R	R	R	R	R	U	R
+	Chr2	12	A	R	T	T	T	T	R	T	R	R	R	R	R	U	R
+	Chr2	13	T	R	C	C	C	C	R	C	R	R	R	R	R	U	R
+	Chr2	14	C	T	A	A	T	A	T	A	T	A	T	T	A	A	A
+	Chr2	21	G	A	R	R	A	A	A	R	R	R	A	A	R	R	R
+	Chr2	23	A	R	R	T	T	R	R	T	T	T	T	T	R	R	R
+	Chr2	25	T	R	A	A	R	R	A	R	A	R	R	A	R	R	A
+	Chr2	30	T	A	A	G	A	G	G	A	A	G	F	G	G	G	U
+
+- with A - H code but no markers : ::
+
+	CHROM	POS	reference	REF1	G01	REF2	G02	G03	G04	G05	G06	G07	G08	G09	G10	G11	G12
+	Chr1	7	-	A	H	H	H	-	H	H	H	H	H	A	H	A	A
+	Chr1	9	-	A	H	H	A	H	H	H	-	A	H	A	H	H	H
+	Chr1	13	-	A	A	H	H	A	-	A	-	H	A	H	A	A	-
+	Chr1	16	-	A	H	H	A	H	H	-	-	H	H	A	A	H	A
+	Chr1	17	-	A	H	H	A	-	A	A	H	H	A	H	-	A	H
+	Chr1	19	-	A	-	H	A	H	A	-	H	H	A	A	A	H	A
+	Chr1	20	-	A	-	H	A	H	A	-	H	H	A	A	A	H	A
+	Chr1	21	-	A	-	H	A	H	A	-	H	H	A	A	A	H	A
+	Chr1	22	-	A	-	H	A	H	A	-	H	H	A	A	A	H	A
+	Chr1	23	-	A	A	H	A	H	H	H	A	H	-	A	H	A	A
+	Chr1	26	-	A	A	H	H	H	H	H	A	A	H	A	H	A	-
+	Chr1	27	-	A	A	H	H	H	H	A	H	A	H	A	H	A	A
+	Chr1	28	-	A	H	H	H	A	A	H	H	-	H	A	H	H	A
+	Chr1	29	-	A	H	H	H	H	A	H	A	H	A	A	H	A	H
+	Chr1	32	-	A	A	H	A	A	A	H	H	A	A	A	A	A	H
+	Chr2	2	-	A	H	H	A	A	-	H	H	H	H	A	A	A	-
+	Chr2	7	-	A	-	H	-	H	A	A	A	H	A	A	-	A	A
+	Chr2	10	-	A	H	H	H	H	A	H	A	A	A	A	A	-	A
+	Chr2	11	-	A	H	H	H	H	A	H	A	A	A	A	A	-	A
+	Chr2	12	-	A	H	H	H	H	A	H	A	A	A	A	A	-	A
+	Chr2	13	-	A	H	H	H	H	A	H	A	A	A	A	A	-	A
+	Chr2	14	-	A	H	H	A	H	A	H	A	H	A	A	H	H	H
+	Chr2	21	-	A	H	H	A	A	A	H	H	H	A	A	H	H	H
+	Chr2	23	-	A	A	H	H	A	A	H	H	H	H	H	A	A	A
+	Chr2	25	-	A	H	H	A	A	H	A	H	A	A	H	A	A	H
+	Chr2	30	-	A	A	H	A	H	H	A	A	H	-	H	H	H	-
+
+- with A - H code and merge  : 
+ 
+ - tab file : ::
+ 
+	CHROM	POS	reference	REF1	G01	REF2	G02	G03	G04	G05	G06	G07	G08	G09	G10	G11	G12
+	Chr1	*M_00001	-	A	H	H	H	-	H	H	H	H	H	A	H	A	A
+	Chr1	*M_00002	-	A	H	H	A	H	H	H	-	A	H	A	H	H	H
+	Chr1	*M_00003	-	A	A	H	H	A	-	A	-	H	A	H	A	A	-
+	Chr1	*M_00004	-	A	H	H	A	H	H	-	-	H	H	A	A	H	A
+	Chr1	*M_00005	-	A	H	H	A	-	A	A	H	H	A	H	-	A	H
+	Chr1	*M_00006	-	A	-	H	A	H	A	-	H	H	A	A	A	H	A
+	Chr1	*M_00007	-	A	A	H	A	H	H	H	A	H	-	A	H	A	A
+	Chr1	*M_00008	-	A	A	H	H	H	H	H	A	A	H	A	H	A	-
+	Chr1	*M_00009	-	A	A	H	H	H	H	A	H	A	H	A	H	A	A
+	Chr1	*M_00010	-	A	H	H	H	A	A	H	H	-	H	A	H	H	A
+	Chr1	*M_00011	-	A	H	H	H	H	A	H	A	H	A	A	H	A	H
+	Chr1	*M_00012	-	A	A	H	A	A	A	H	H	A	A	A	A	A	H
+	Chr2	*M_00013	-	A	H	H	A	A	-	H	H	H	H	A	A	A	-
+	Chr2	*M_00014	-	A	-	H	-	H	A	A	A	H	A	A	-	A	A
+	Chr2	*M_00015	-	A	H	H	H	H	A	H	A	A	A	A	A	-	A
+	Chr2	*M_00016	-	A	H	H	A	H	A	H	A	H	A	A	H	H	H
+	Chr2	*M_00017	-	A	H	H	A	A	A	H	H	H	A	A	H	H	H
+	Chr2	*M_00018	-	A	A	H	H	A	A	H	H	H	H	H	A	A	A
+	Chr2	*M_00019	-	A	H	H	A	A	H	A	H	A	A	H	A	A	H
+	Chr2	*M_00020	-	A	A	H	A	H	H	A	A	H	-	H	H	H	-
+
+ - bed file : :: 
+    
+	Chr1	7	7	*M_00001
+	Chr1	9	9	*M_00002
+	Chr1	13	13	*M_00003
+	Chr1	16	16	*M_00004
+	Chr1	17	17	*M_00005
+	Chr1	19	22	*M_00006
+	Chr1	23	23	*M_00007
+	Chr1	26	26	*M_00008
+	Chr1	27	27	*M_00009
+	Chr1	28	28	*M_00010
+	Chr1	29	29	*M_00011
+	Chr1	32	32	*M_00012
+	Chr2	2	2	*M_00013
+	Chr2	7	7	*M_00014
+	Chr2	10	13	*M_00015
+	Chr2	14	14	*M_00016
+	Chr2	21	21	*M_00017
+	Chr2	23	23	*M_00018
+	Chr2	25	25	*M_00019
+	Chr2	30	30	*M_00020
+
+
+-----
+
+**reference :**
+
+]]>
+    </help>
+</tool>