comparison multispecies_MicrosatDataGenerator_interrupted_GALAXY.xml @ 0:275433d3a395 draft

Uploaded tool tarball.
author devteam
date Wed, 25 Sep 2013 11:26:57 -0400
parents
children f2aeaacb43c2
comparison
equal deleted inserted replaced
-1:000000000000 0:275433d3a395
1 <tool id="multispecies_orthologous_microsats" name="Extract orthologous microsatellites" version="1.0.0">
2 <description> for multiple (>2) species alignments</description>
3 <command interpreter="perl">
4 multispecies_MicrosatDataGenerator_interrupted_GALAXY.pl
5 $input1
6 $input2
7 $out_file1
8 $thresholds
9 $species
10 "$treedefinition"
11 $separation
12
13 </command>
14 <inputs>
15 <page>
16 <param format="maf" name="input1" type="data" label="Select unfiltered MAF alignments" help= "NOTE: Currently users are requested to select only the alignments that contain five, four or three species' genomes. )"/>
17 <param format="maf" name="input2" type="data" label="Select the filtered version of above MAF alignments" help= "NOTE: Please use the tool 'Filter nucleotides' to filter nucleotides based on quality, in multiple species. When using the Filter nucleotide tool, ensure that you click 'Select All' for the option 'Mask Species')"/>
18 <param name="separation" size="10" type="integer" value="10" label="Minimum base pair distance between adjacent microsatellite blocks"
19 help="A value of 10 means: Adjacent microsatellites separated by less than 10 base pairs will be excluded from the output."/>
20 <param name="thresholds" size="15" type="text" value="9,10,12,12" label="Minimum Threshold for the number of repeats for microsatellites"
21 help="A value of 9,10,12,12 means: All monos having fewer than 9 repeats, dis having fewer than 5 repeats, tris having fewer than 4 repeats, tetras having fewer than 3 repeats will be excluded from the output."/>
22 <param name="species" type="select" label="Select species" display="checkboxes" multiple="true" help="NOTE: Currently users are requested to select one of these three combinations: hg18-panTro2-ponAbe2, hg18-panTro2-ponAbe2-rheMac2 or hg18-panTro2-ponAbe2-rheMac2-calJac1">
23 <options>
24 <filter type="data_meta" ref="input1" key="species" />
25 </options>
26 </param>
27 <param name="treedefinition" size="200" type="text" value = "((((hg18,panTro2),ponAbe2),rheMac2),calJac1)" label="Tree definition of all species above whether or not selected for microsatellite extraction"
28 help="For example: ((((hg18,panTro2),ponAbe2),rheMac2),calJac1)"/>
29 </page>
30 </inputs>
31 <outputs>
32 <data format="txt" name="out_file1" metadata_source="input1"/>
33 </outputs>
34 <requirements>
35 <requirement type="binary">bx-sputnik</requirement>
36 </requirements>
37 <tests>
38 <test>
39 <param name="input1" value="regVariation/microsatellite/Galaxy17_unmasked_short.maf.gz"/>
40 <param name="input2" value="regVariation/microsatellite/Galaxy17_masked_short.maf.gz"/>
41 <param name="thresholds" value="9,10,12,12"/>
42 <param name="species" value="hg18,panTro2,ponAbe2,rheMac2,calJac1"/>
43 <param name="treedefinition" value="((((hg18,panTro2),ponAbe2),rheMac2),calJac1)"/>
44 <param name="separation" value="10"/>
45 <output name="out_file1" file="regVariation/microsatellite/Galaxy17_short_raw.txt"/>
46 </test>
47 </tests>
48
49 <help>
50
51 .. class:: infomark
52
53 **What it does**
54
55 This tool finds ortholgous microsatellite blocks between aligned species
56
57 -----
58
59 .. class:: warningmark
60
61 **Note**
62
63 A non-tabular format is created in which each row contains all information pertaining to a microsatellite locus from multiple species in the alignment.
64 The rows read like this:
65
66 >hg18 15 hg18 chr22 16092941 16093413 panTro2 chr22 16103944 16104421 ponAbe2 chr22 13797750 13798215 rheMac2 chr10 61890946 61891409 calJac1 Contig6986 140254 140728 mononucleotide A 0 13 + 29 aaaaa------aaaAAA >rheMac2 15 hg18 chr22 16092941 16093413 panTro2 chr22 16103944 16104421 ponAbe2 chr22 13797750 13798215 rheMac2 chr10 61890946 61891409 calJac1 Contig6986 140254 140728 mononucleotide A 0 13 + 29 aaaaaaaa---AAAAAA
67
68 Information from each species starts with an ">" followed by the species name, for instance, ">rheMac2". Below we describe all information listed for a microsatellite sequence in each species.
69
70 After the species tag the alignemnt number is listed.
71 What follows is details of the alignment block from all the species, including the chromosome number, start and end coordinates in each species. For instance:
72
73 hg18 chr22 16092941 16093413 panTro2 chr22 16103944 16104421 ponAbe2 chr22 13797750 13798215 rheMac2 chr10 61890946 61891409 calJac1 Contig6986 140254 140728
74
75 suggests that the alignment block as five species: hg18, panTro2, ponAbe2, rheMac2 and calJac1.
76
77 Then the type of microsatellite is written, for instance, "mononucleotide".
78
79 Then the microsatellite motif.
80
81 Then the number of gaps in the alignment, in the respective species (as noted above, rheMac2 in this case).
82
83 Then the start coordinate, the strand, and the end coordinate WITHIN the alignment block.
84
85 At the end is listed the microsatellite sequence.
86
87 If the microsatellite contains interruptions (which are not important for this tool), then the interruptions' information will be written out after the microsatellite sequence.
88
89
90 </help>
91
92
93 </tool>