Previous changeset 0:3e19d0dfcf3e (2015-03-23) Next changeset 2:feb40a9a8eae (2015-07-10) |
Commit message:
Uploaded |
modified:
MDSplot/mdsplot.xml tool_dependencies.xml |
added:
MDSplot/MDSbasedOnIBSmatrix.pl.org PedToFasta/Ped2Fasta.pl PedToFasta/pedToFasta.xml PedToFasta/test-data/result.fa PedToFasta/test-data/sample.ped Rooting/Rooting.pl Rooting/Rootings_54.jar Rooting/rooting.xml Rooting/test-data/newick Rooting/test-data/out_tree Rooting/test-data/out_tree.log SNP_density/CalculateSlidingWindowsSNPdensitiesFromHapmap.pl SNP_density/calculateSlidingWindowsSNPdensitiesFromHapMap_wrapper.xml SNP_density/calculateSlidingWindowsSNPdensitiesFromHapmap.sh SNP_density/test-data/.svn/entries SNP_density/test-data/.svn/text-base/hapmap.svn-base SNP_density/test-data/.svn/text-base/result.txt.svn-base SNP_density/test-data/.svn/text-base/result_bysample.txt.svn-base SNP_density/test-data/hapmap SNP_density/test-data/result.txt SNP_density/test-data/result_bysample.txt VCF2Hapmap/VCF2FastaAndHapmap.pl VCF2Hapmap/vcf2FastaAndHapmap.sh VCF2Hapmap/vcf2FastaAndHapmap.xml check_gwas_inputs/CheckGWASInputs.pl check_gwas_inputs/CheckGWASInputs.sh check_gwas_inputs/CheckGWASInputs.xml egglib/CalculateDiversityIndexes.pl egglib/CalculateDiversityIndexes.sh egglib/CalculateDiversityIndexes.xml egglib/egglib-2.1.5/bin/eggstats egglib/egglib-2.1.5/include/egglib-cpp/Align.hpp egglib/egglib-2.1.5/include/egglib-cpp/Arg.hpp egglib/egglib-2.1.5/include/egglib-cpp/BaseDiversity.hpp egglib/egglib-2.1.5/include/egglib-cpp/ChangeTypes.hpp egglib/egglib-2.1.5/include/egglib-cpp/CharMatrix.hpp egglib/egglib-2.1.5/include/egglib-cpp/Consensus.hpp egglib/egglib-2.1.5/include/egglib-cpp/Container.hpp egglib/egglib-2.1.5/include/egglib-cpp/Controller.hpp egglib/egglib-2.1.5/include/egglib-cpp/Convert.hpp egglib/egglib-2.1.5/include/egglib-cpp/Current.hpp egglib/egglib-2.1.5/include/egglib-cpp/DataMatrix.hpp egglib/egglib-2.1.5/include/egglib-cpp/Edge.hpp egglib/egglib-2.1.5/include/egglib-cpp/EggException.hpp egglib/egglib-2.1.5/include/egglib-cpp/FStatistics.hpp egglib/egglib-2.1.5/include/egglib-cpp/Fasta.hpp egglib/egglib-2.1.5/include/egglib-cpp/HFStatistics.hpp egglib/egglib-2.1.5/include/egglib-cpp/HaplotypeDiversity.hpp egglib/egglib-2.1.5/include/egglib-cpp/LinkageDisequilibrium.hpp egglib/egglib-2.1.5/include/egglib-cpp/MicrosatelliteDiversity.hpp egglib/egglib-2.1.5/include/egglib-cpp/Ms.hpp egglib/egglib-2.1.5/include/egglib-cpp/Mutation.hpp egglib/egglib-2.1.5/include/egglib-cpp/Mutator.hpp egglib/egglib-2.1.5/include/egglib-cpp/NucleotideDiversity.hpp egglib/egglib-2.1.5/include/egglib-cpp/ParamSet.hpp egglib/egglib-2.1.5/include/egglib-cpp/Population.hpp egglib/egglib-2.1.5/include/egglib-cpp/Random.hpp egglib/egglib-2.1.5/include/egglib-cpp/SitePolymorphism.hpp egglib/egglib-2.1.5/include/egglib-cpp/Staden.hpp egglib/egglib-2.1.5/include/egglib-cpp/config.h egglib/egglib-2.1.5/lib/libegglib-cpp.a hapmap2mlmm/HapmapToMLMMFiles.pl hapmap2mlmm/HapmapToMLMMFiles.sh hapmap2mlmm/HapmapToMLMMFiles.xml hapmap2mlmm/transpose.awk ped2bed/ped2bed.sh ped2bed/ped2bed.xml |
removed:
VCFToolFilter/VCFToolsFilter.pl VCFToolFilter/test-data/result.log VCFToolFilter/test-data/result.vcf VCFToolFilter/test-data/sample.vcf VCFToolFilter/vcfToolsFilter.sh VCFToolFilter/vcfToolsFilter.xml VCFToolsStats/VCFToolsStats.pl VCFToolsStats/test-data/result.TsTv.summary VCFToolsStats/test-data/result.annotation VCFToolsStats/test-data/result.het VCFToolsStats/test-data/result.imiss VCFToolsStats/test-data/result.log VCFToolsStats/test-data/sample.vcf VCFToolsStats/vcfToolsStats.sh VCFToolsStats/vcfToolsStats.xml admixture/Admixture.pl admixture/admixture.sh admixture/admixture.xml admixture/transpose.awk snpEff/SnpEff.pl snpEff/snpEff-pipe.sh snpEff/snpEff.xml tassel/tassel.sh tassel/tassel.xml tassel/tool-data/tool_dependencies.xml |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 MDSplot/MDSbasedOnIBSmatrix.pl.org --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MDSplot/MDSbasedOnIBSmatrix.pl.org Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,77 @@ +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; +use Bio::SeqIO; + +my $PLINK_EXE= "/apps/www/sniplay.cirad.fr/tools/plink/plink-1.07-x86_64/plink"; + +my $usage = qq~Usage:$0 <args> [<opts>] +where <args> are: + -i, --in <input> + -o, --out <output> +~; +$usage .= "\n"; + +my ($in,$out); + + +GetOptions( + "in=s" => \$in, + "out=s" => \$out +); + +die $usage + if ( !$in || !$out); + + +my $plink_command = $PLINK_EXE . " --file $in --noweb --cluster --matrix --mds-plot 2 --out $out >>$in.plink.log 2>&1"; +system($plink_command); + +my $awk_cmd = "awk \{\'print \$1\'\} $in.ped"; +my $inds = `$awk_cmd`; +my @individuals = split("\n",$inds); + + +open(my $OUT,">$out.mds_plot.txt"); +my $go = 0; +open(my $O,"$out.mds"); +while(<$O>) +{ + if ($go) + { + my $line = $_; + $line =~s/\n//g; + $line =~s/\r//g; + my @i = split(/\s+/,$line); + my $ind = $i[1]; + print $OUT "$ind ".$i[4]." ".$i[5]."\n"; + } + if (/C1/){$go = 1;} +} +close($O); +close($OUT); + + +my $j = 0; +open(my $IBS,">$out.ibs_matrix.txt"); +print $IBS "Individuals " . join("\t",@individuals)."\n"; +open(my $O2,"$out.mibs"); +while(<$O2>) +{ + my $line = $_; + $line =~s/\n//g; + $line =~s/\r//g; + my @i = split(/\s+/,$line); + print $IBS $individuals[$j]. " ". join("\t",@i)."\n"; + $j++; +} +close($O2); +close($IBS); + + + + + + |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 MDSplot/mdsplot.xml --- a/MDSplot/mdsplot.xml Mon Mar 23 05:57:27 2015 -0400 +++ b/MDSplot/mdsplot.xml Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -6,7 +6,7 @@ <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> <requirements> <requirement type="binary">perl</requirement> - <requirement type="package" version="0.1.13">plink</requirement> + <requirement type="package" version="1.07">plink</requirement> </requirements> <!-- [OPTIONAL] Command to be executed to get the tool's version string --> @@ -63,17 +63,31 @@ <!-- [OPTIONAL] Help displayed in Galaxy --> <help> + .. class:: infomark -**Authors** +**Authors** plink_ + +.. _plink: http://pngu.mgh.harvard.edu/purcell/plink/ + + | "PLINK: a toolset for whole-genome association and population-based linkage analysis.", **Purcell S, Neale B, Todd-Brown K, Thomas L, Ferreira MAR, Bender D, Maller J, Sklar P, de Bakker PIW, Daly MJ, Sham PC.**, American Journal of Human Genetics, 81, 2007. + + +.. class:: infomark + +**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique. + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr + +.. class:: infomark + +**Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). --------------------------------------------------- -.. class:: infomark -**Please cite** If you use this tool, please cite dereeper et al. 2015 in prep. - ---------------------------------------------------- ======== MDS plot @@ -90,20 +104,14 @@ Workflow position ----------------- -**Upstream tools** - -=========== ========================== ======= -Name output file(s) format -=========== ========================== ======= -=========== ========================== ======= +**Upstream tool** - -**Downstream tools** +=============== ========================== =============== +Name output file(s) format +=============== ========================== =============== +VCFtools Filter PED and MAP file tabular and MAP +=============== ========================== =============== -=========== ========================== ======= -Name output file(s) format -=========== ========================== ======= -=========== ========================== ======= ---------- @@ -137,6 +145,11 @@ Output_name.log Log file +------------ +Dependencies +------------ +plink + version 1.07 --------------------------------------------------- @@ -199,5 +212,21 @@ </help> + <citations> + <!-- [HELP] As DOI or BibTex entry --> + <citation type="bibtex">@article{Dereeper03062015, +author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, +title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, +year = {2015}, +doi = {10.1093/nar/gkv351}, +abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, +URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, +eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, +journal = {Nucleic Acids Research} +} + + </citation> + + </citations> </tool> |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 PedToFasta/Ped2Fasta.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PedToFasta/Ped2Fasta.pl Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,70 @@ + +#!/usr/bin/perl + +use strict; +use Getopt::Long; + +my $usage = qq~Usage:$0 <args> [<opts>] + +where <args> are: + + -i, --in <PED input> + -o, --out <Fasta output> +~; +$usage .= "\n"; + +my ($input,$out); + + + +GetOptions( + "in=s" => \$input, + "out=s" => \$out, +); + + +die $usage + if ( !$input || !$out); + + +my %IUPAC = +( + '00'=> "?", + 'AA'=> "A", + 'CC'=> "C", + 'GG'=> "G", + 'TT'=> "T", + 'AG'=> "R", + 'GA'=> "R", + 'CT'=> "Y", + 'TC'=> "Y", + 'TG'=> "K", + 'GT'=> "K", + 'CG'=> "S", + 'GC'=> "S", + 'AT'=> "W", + 'TA'=> "W", + 'AC'=> "M", + 'CA'=> "M", +); + +open(my $O,">$out"); +open(my $P,$input) or die "File does not exist"; +while(<$P>) +{ + my $line = $_; + $line =~s/\r//g; + $line =~s/\n//g; + my @infos = split("\t",$_); + my $ind = $infos[0]; + print $O ">$ind\n"; + for (my $i = 6; $i <= $#infos; $i= $i+2) + { + my $code = $infos[$i].$infos[$i+1]; + my $letter = $IUPAC{$code}; + print $O $letter; + } + print $O "\n"; +} +close($P); +close($O); |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 PedToFasta/pedToFasta.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PedToFasta/pedToFasta.xml Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,171 @@ +<tool id="sniplay_pedToFasta" name="Ped2Fasta" version="1.0.0"> + + <!-- [REQUIRED] Tool description displayed after the tool name --> + <description> Convert PED file to Fasta File </description> + + <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> + <requirements> + <requirement type="binary">perl</requirement> + </requirements> + + <!-- [OPTIONAL] Command to be executed to get the tool's version string --> + <version_command> +<!-- + tool_binary -v +--> + </version_command> + + <!-- [REQUIRED] The command to execute --> + <command interpreter="perl"> + Ped2Fasta.pl --in $filein --out $fileout + </command> + + <!-- [REQUIRED] Input files and tool parameters --> + <inputs> + <param name="filein" type="data" format="txt" optional="false" label="PED input" /> + <param name="fileout_label" type="text" value="filtered" optional="false" label="Output file name"/> + </inputs> + + <!-- [REQUIRED] Output files --> + <outputs> + <data name="fileout" type="data" format="fasta" label="${fileout_label}.fa" /> + </outputs> + + <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <stdio> + <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> + <exit_code range="1:" level="fatal" /> + </stdio> + + <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> + <tests> + <!-- [HELP] Test files have to be in the ~/test-data directory --> + <test> + <param name="filein" value="sample.ped" /> + <output name="fileout" file="result.fa" /> + </test> + </tests> + + <!-- [OPTIONAL] Help displayed in Galaxy --> + <help> + +.. class:: infomark + +**Authors** Dereeper Alexis (alexis.dereeper@ird.fr), IRD, South Green platform + + | **Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). + +.. class:: infomark + +**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique. + +.. class:: infomark + +**Support** For any questions, please send an e-mail to support.abims@sb-roscoff.fr + +--------------------------------------------------- + +========= +Ped2Fasta +========= + +----------- +Description +----------- + + Convert PED file to Fasta File + +----------------- +Workflow position +----------------- + +**Upstream tool** + +=============== ========================== ======= +Name output file(s) format +=============== ========================== ======= +VCFtools Filter VCF file VCF +=============== ========================== ======= + + +**Downstream tool** + +=========== ========================== ======= +Name input file(s) format +=========== ========================== ======= +Readseq Fasta alignment fasta +=========== ========================== ======= + + +---------- +Input file +---------- + +PED file + PED file usually from VCF tools + +---------- +Parameters +---------- + +Output file name + Prefix for the output fasta file + +------------ +Output files +------------ + +Fasta file + PED file conversion + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +=========== + +PED file +--------- + +:: + + CATB1 CATB1 0 0 0 0 C T T A C T A T A T A G G A + +Parameters +========== + +Output name -> pedFile + +Output files +============ + +pedFile.fa +---------- + +:: + + YWYWWRRSYYMKRRKMYRKSRKYRYRYKRKRSKKSYRWYSYRRYRRRWYWWYYWRRYRSRWSSRMYRRKSWMSKWRRYYWMYKYWRSYRWRYMWYYYMKYKYWRYRYRY + + + </help> + + <citations> + <!-- [HELP] As DOI or BibTex entry --> + <citation type="bibtex">@article{Dereeper03062015, +author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, +title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, +year = {2015}, +doi = {10.1093/nar/gkv351}, +abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, +URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, +eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, +journal = {Nucleic Acids Research} +} + + </citation> + + </citations> +</tool> |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 PedToFasta/test-data/result.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PedToFasta/test-data/result.fa Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,2 @@ +>CATB1 +YWYWWRRSYYMKRRKMYRKSRKYRYRYKRKRSKKSYRWYSYRRYRRRWYWWYYWRRYRSRWSSRMYRRKSWMSKWRRYYWMYKYWRSYRWRYMWYYYMKYKYWRYRYRYYKKYWRMMWKYYSYYWRYSKRWRSRRRYYWYYYKYYKYRMKKRYYRKYKKWWKRYWRMYRSWYWYYWRRRRRYRSRKRWKWRKKYYYKMKYYYSWSSYRRKRMSKMKRRKRWRRRRYWRKKYWRKRYWWWKMRRRYRYYWWRYYRKKWYYRRWYYRKYSRRYWRYWSWYYWWKYYKYWYRMRYRYYRRWYYYSRYRRKRRYMMSSRRYYYSRMWRRRRSWRYYMYYRYWYRRSSYYMYRKKRKYWKRRYYRYRMMWYWRYSSWYYYRYWWWWWRYRRYRYWWMMYRYYYRSYYRRRRRYYYRYYRRKKMYMRRRRSYKYRRRYWYRRYWRYSYYRYRRYSRMMKRKYYYMMRWYKWYMRYRSWRYRYMYMMRRYSRYRRRSRRSRRMKRKYYYRRYMYYRYRRKYRRRRRYYRYYWYYSRRMKRYYYSYRRYYWRMYSRYYYMMRRYYRWKMWWWYRRWYYYSWRRYRYRYRRRYRYRYRYYYYRRRWMYKMWRRYYYYRMYSSRRRWYRWRRRWYRWYRKWYYMKRYRMYRYMRRYRYYYYYRMRRWYYRSYYSYRYYYYSWKYMRYYYYYYYRRKWYYMYRYMWYYYSYYMWYMWSYWWWYRYYYSYRYMMRRRYKYRWRYYRRRKYYYKRYYRYYMYRMRYYYKWYWWYYKYRYWWRYRMSSYYYYYKYRYRRYYRYYRYRKRYYKYYYWYRYRRYYYRMYYRRRYWRRKRYYYYRRRYRYYKYYRRRRWYKWRRRMYWRYRRSWSSYYYYRYYYYYSRYYMRRYSRWMYRYYYRRYRYWYYYMWWRRYMRYYYRRMYRRSKRRYYSRYKRRRMRWYYMWYYMRWYYWWYYYKSYKYYYYYYYWYYSRWSYYYYRRYSRMYYYRRYSWYYYYWRYYSRYYKRRYYYMWYYYRWWYKYMYRRRMYYSRKYMMYYRRRWRYRRRMYKYRKRYRYRSYYRYSRMYWWWWWWYKYRSYYWKMRWWYWSMMSMRKYRSWWMKWYYYWKRRKRWRWYWWWYSYRSYYYYYRRWMMRWYRYYWYKRMWRYRYYRRWWRYMYYYRYYYMRYYYRRYRWRYRYWWKWMRWRYYRRRYWSYWRSRYWYSRYKRRYRRKRYRYWSWWYRWRRRKYKYRYYWYKKRYYYRRYYYYYRKRYKYMYYRWYKRWMYRRKYRRRYRYRWYSYRYKYMSYYRWRMWKSRSYYRWSRRRRYSMSRRWKRRRYYYRRYWWYYRRYMYMRRYWWRYYYRMRKKYKWMYYRMYYKMWMYRWRKYRSRMKSYRSYYRWMMSSKRYMYKYRWRRRRRYYSRKYRYYYYRYRRKRYYYWYYWYYWRYRKMRKRRYRYWWMMRRRYYYYWSWSRMWWYRWYSYYWRYYRRYRRYYRRRWYSSRYRYYSRRKWYSMYSSKYYRMRRRYWMRYRKRRRRKYYWSYRYYYYRRMRRYWRRRRYSYRYYRRWSRMSRYSRWYRSRYRKKWYRYYWRWRRYMKRKRKKRKYMMKYRMRRRYRRYYRMWWRRRYYWWYRKRYYYRYYYYYRYYWRRYYMYSSWRRKYRRYYRRKYSMWMWRYRMRRYRMYRRWRWRKRYSKYSSRYSRYYMRRRRWKYRSRRKKYRRMMYRMRWRRRKRWRRYYKWYRKRRRMKRRSYYWSRKRRRMYWWWWKRYRRYYMWMYRRYRRMYYSWYMMYRYSKWWRYWYRWYWWYRRYRSSMYKMSYRSRRYYYYMKWWMKSRMRYRSRYMRRRYWRYMKYRYSWWMSKYYSWYRYRRRMYYWSRWWYYRYMSSYYYRYYWYYMRYSMWRYRYYRKRRRRMRYRRWYSYYWYYMWYRYYRWRYYYSRRYRRRMMMRSYKRWWKRYYRRKYRYWRRRYWRRRYMYYYWYWYYWRRRYRYRSRWMRRRWYYYKWYRRYSRKMRRRRSKYMWKRYMKKRSMKYYRRRRKRYRKYRYMYYYYRRRMKKMRWYYKWYMWYSWWRYRKYKWWRKYRRYMYRWRYWWRYWMRRRRRRRKRSYMKYRRKRKWRYYWKSSYYWYMSYSYRWRRRMRWMRWSWYSRYYYWWRRYRSWWSKSRYYYYYYMYRYSRWSWYYYMYWYRRYWRYYYWSYRYYRKWRYYRYRWKYMSYRRRKRRWYYWKSKSYMSRRYYYYYYYYRYRWSWMYWRKYMRWYRKYRRMMYYRMYMWYKWYYRWRRRYRYYRYYYRYSYYWWYYYRYRRRSRRYRMYMKYKYRYSYKYWRWRRYSYRWKRYYWRYYYYKYMWYMWRYMWSRKRWYRMRYYRYKRSRRRWRMRRKYRYRYRYRRYRKYYRRYRRMSKRKRWKRWMYMYKRWRSRYYYKRWYRRRKRYRRYWWKYYYRMRYRRMRWSYSYWWRYMWYKRKSYRMYWKSKSRWYYYRRRSRYRYKRYWWWRMWYYSYRRYKYYYSWYKRMYRKRRRYYWSKYSKYYMYYYYKRKYYYSSRRRYRSWYRYRKMSYRRWMRSYKRYKKWYYYYMKRYWYYRYRRMRSMMYSMKYYYSRKYRYMYSRKYKYYYKRKYWYKYYYRYRRKYWRRRYYSRYRWWRRRWYMRWRRRRRYSRWKYRMWYYYRYRYRRRRRYRRRYYYYWYYRRYYYRMKYYYWWMWRRRRYRYRRRWMKRRYWKKRYRYMYMYYRYYWRYRRWRWRMYYSRWRYRYYRMKRMWSRRKRRYYRYRKWWMSYRRRRSWKYSMKYWRSYSYSSYRKRRKSYYYRWRYWYYRYRRRYYYRYRYRRYRWWYYRYRSYRRYRMRKYSYWYMWYRYYRYYYSWYYRMRKRYSYWYRWYRYRYRYYYYRRMYRYWYYYYRYRSMRYKRYRYKMYYYYYRSWSMRRYRSWYYWWYYWYRYRRYSRRYYRYKYRYRYYYRRRSYKRMYRMYWWSRWYYMYKYRRMYWRKYRYRRYYRKRYYWYYWYRRRRSRYSYYRYYRWYRRWKRRYRWSYRRYYRYYRYRRRYRKMYWYMSRYRYYWSWKRWYYMSMRWRRYKYYYRYRKWYYRRKYRRMWMWYRSKKKKYRSKWWMYYSMYRRMWRKMKRYYRRKMRKKYYWRKWKYYRRRRMYRWRRWKSRKKYMRWYRSRRKYKSWRSRRRYKYWYWYYMYRYYRRYSRRRYRYKWRRYYKMWWKYYWMRMYRRRYSYYRWMYYRYSSYYYRSRWWRSKRYRRMKYWRRSRRYYSRYRYYRYRYMSYRRYMRYRYYRMRWYRYMWKWRYKYRRKRRKSYYMRRYYYYYYMYRRRRKYYKRRWYYWMRYYRRRYYYKRYYRYYRYMKRRYRYWWYRRRRYRYYSWMMWYWMYWSYYRWWWYRRRRYWYKSKRYSSYYYWRRRMMRSYYKYRKYYMYKRYYYKRYSMYYWYRSWRRWRYKRYRYYYRYRKYYYRYKYYYYRSYKWYMWSMRRYRMKRMRYSRYSRYYYRMYKRRRWRYYRRRRYYWYYWRRKWKKRYRWRWYSMYRRSMRWWWSMRMSRKYWRRYYYWYRYSYRMWMRSWRYKKRRWSSYYYYYRYYYMYYYYMRYKYYRWYYYRYYYYYYRMYYYRRRYYYYRRYKWRYRRRYWWYKMRKSYYYYYRYWRRRRKSYSYM |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 PedToFasta/test-data/sample.ped --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PedToFasta/test-data/sample.ped Fri Jul 10 04:39:30 2015 -0400 |
b |
b'@@ -0,0 +1,1 @@\n+CATB1\tCATB1\t0\t0\t0\t0\tC\tT\tT\tA\tC\tT\tA\tT\tA\tT\tA\tG\tG\tA\tC\tG\tC\tT\tT\tC\tA\tC\tT\tG\tA\tG\tA\tG\tT\tG\tA\tC\tC\tT\tA\tG\tT\tG\tC\tG\tA\tG\tT\tG\tT\tC\tA\tG\tC\tT\tG\tA\tT\tC\tG\tT\tG\tA\tG\tT\tG\tA\tG\tC\tT\tG\tT\tG\tC\tG\tC\tT\tA\tG\tA\tT\tC\tT\tG\tC\tT\tC\tG\tA\tA\tG\tC\tT\tG\tA\tA\tG\tG\tA\tA\tT\tT\tC\tA\tT\tA\tT\tC\tT\tC\tT\tA\tT\tA\tG\tG\tA\tC\tT\tA\tG\tG\tC\tA\tG\tA\tT\tG\tC\tC\tG\tG\tA\tC\tA\tT\tC\tA\tG\tA\tG\tT\tG\tG\tC\tA\tT\tA\tC\tG\tC\tG\tT\tA\tT\tG\tA\tG\tA\tC\tT\tC\tT\tA\tT\tC\tA\tC\tT\tT\tG\tT\tC\tA\tT\tG\tA\tC\tG\tT\tC\tG\tA\tT\tA\tA\tG\tC\tT\tC\tA\tT\tA\tC\tT\tC\tT\tT\tC\tA\tC\tG\tT\tC\tT\tT\tG\tC\tT\tA\tT\tA\tG\tC\tT\tA\tG\tC\tT\tA\tG\tC\tT\tT\tC\tT\tG\tG\tT\tC\tT\tT\tA\tA\tG\tC\tA\tA\tC\tA\tT\tT\tG\tC\tT\tC\tT\tC\tG\tC\tT\tT\tC\tT\tA\tA\tG\tT\tC\tC\tG\tT\tG\tA\tG\tA\tT\tG\tA\tG\tC\tA\tG\tA\tG\tG\tA\tC\tT\tT\tC\tT\tA\tC\tT\tT\tC\tT\tC\tT\tG\tT\tC\tT\tC\tG\tT\tT\tC\tA\tG\tA\tC\tT\tG\tG\tT\tG\tA\tC\tT\tC\tT\tA\tG\tT\tG\tT\tC\tT\tG\tT\tG\tA\tT\tT\tA\tT\tG\tG\tA\tC\tT\tT\tA\tA\tG\tC\tA\tT\tC\tG\tA\tC\tG\tA\tT\tC\tT\tT\tA\tT\tC\tT\tC\tT\tA\tG\tA\tA\tG\tA\tG\tA\tG\tA\tG\tT\tC\tG\tA\tG\tC\tA\tG\tG\tT\tG\tA\tA\tT\tT\tG\tA\tT\tA\tG\tG\tT\tT\tG\tC\tT\tT\tC\tT\tC\tG\tT\tA\tC\tG\tT\tC\tT\tT\tC\tT\tC\tG\tC\tT\tA\tC\tG\tC\tG\tC\tT\tA\tG\tG\tA\tT\tG\tG\tA\tA\tC\tC\tG\tG\tT\tA\tC\tT\tG\tA\tG\tG\tA\tG\tT\tA\tG\tA\tT\tG\tA\tG\tA\tA\tG\tG\tA\tT\tC\tA\tT\tA\tG\tG\tT\tT\tG\tC\tT\tT\tA\tA\tG\tT\tG\tG\tA\tT\tC\tT\tA\tA\tT\tT\tA\tG\tT\tA\tC\tG\tA\tG\tA\tA\tG\tC\tT\tA\tG\tC\tT\tT\tC\tT\tA\tA\tT\tA\tG\tT\tC\tT\tC\tA\tG\tT\tG\tG\tT\tT\tA\tT\tC\tT\tC\tA\tG\tG\tA\tT\tA\tT\tC\tT\tC\tA\tG\tT\tG\tC\tT\tC\tG\tA\tG\tG\tA\tC\tT\tA\tT\tA\tG\tT\tC\tA\tT\tG\tC\tA\tT\tT\tC\tC\tT\tA\tT\tA\tT\tG\tT\tT\tC\tT\tC\tG\tT\tT\tC\tA\tT\tT\tC\tG\tA\tC\tA\tA\tG\tC\tT\tA\tG\tT\tC\tC\tT\tA\tG\tA\tG\tA\tT\tC\tT\tC\tT\tT\tC\tG\tC\tG\tA\tC\tT\tA\tG\tG\tA\tT\tG\tA\tG\tG\tA\tC\tT\tA\tC\tC\tA\tG\tC\tC\tG\tG\tA\tA\tG\tC\tT\tC\tT\tC\tT\tC\tG\tA\tG\tA\tC\tA\tT\tG\tA\tG\tA\tA\tG\tA\tG\tG\tC\tA\tT\tG\tA\tC\tT\tT\tC\tC\tA\tC\tT\tT\tC\tA\tG\tT\tC\tA\tT\tC\tT\tG\tA\tG\tA\tG\tC\tC\tG\tC\tT\tC\tT\tA\tC\tT\tC\tA\tG\tT\tG\tG\tT\tA\tG\tT\tG\tC\tT\tT\tA\tG\tT\tA\tG\tG\tA\tC\tT\tT\tC\tG\tA\tT\tC\tA\tG\tC\tA\tA\tC\tA\tT\tT\tC\tT\tA\tG\tA\tT\tC\tG\tC\tG\tC\tT\tA\tC\tT\tT\tC\tT\tC\tA\tG\tT\tC\tA\tT\tA\tT\tT\tA\tA\tT\tT\tA\tG\tA\tT\tC\tG\tA\tA\tG\tT\tC\tG\tA\tC\tT\tA\tT\tA\tT\tA\tC\tA\tC\tC\tT\tA\tG\tT\tC\tT\tC\tT\tC\tA\tG\tC\tG\tC\tT\tC\tT\tG\tA\tG\tA\tG\tA\tG\tA\tG\tA\tC\tT\tT\tC\tC\tT\tA\tG\tC\tT\tT\tC\tA\tG\tA\tG\tG\tT\tG\tT\tC\tA\tC\tT\tC\tA\tG\tA\tG\tA\tG\tA\tA\tG\tC\tG\tT\tC\tT\tG\tT\tC\tG\tA\tA\tG\tG\tA\tT\tC\tT\tA\tT\tC\tA\tG\tG\tA\tT\tC\tT\tA\tA\tG\tC\tT\tG\tC\tC\tT\tC\tT\tG\tA\tT\tC\tG\tA\tG\tA\tC\tT\tG\tC\tA\tG\tA\tC\tA\tC\tG\tT\tG\tA\tG\tT\tT\tC\tT\tC\tT\tC\tC\tA\tC\tA\tG\tA\tA\tT\tC\tT\tT\tG\tT\tA\tT\tC\tA\tC\tA\tG\tT\tC\tG\tA\tC\tG\tA\tT\tA\tG\tT\tC\tA\tG\tT\tC\tA\tC\tC\tT\tC\tA\tA\tC\tG\tA\tA\tG\tT\tC\tG\tC\tG\tA\tC\tT\tG\tA\tG\tA\tA\tG\tG\tC\tA\tG\tA\tG\tG\tC\tA\tG\tA\tG\tA\tC\tG\tT\tG\tA\tG\tT\tC\tT\tC\tT\tT\tC\tG\tA\tA\tG\tC\tT\tA\tC\tC\tT\tC\tT\tA\tG\tC\tT\tG\tA\tG\tA\tG\tT\tC\tT\tG\tA\tG\tA\tG\tA\tG\tA\tG\tA\tC\tT\tT\tC\tG\tA\tC\tT\tT\tC\tA\tT\tC\tT\tT\tC\tG\tC\tG\tA\tG\tA\tA\tC\tG\tT\tG\tA\tC\tT\tC\tT\tT\tC\tC\tG\tT\tC\tG\tA\tA\tG\tT\tC\tT\tC\tA\tT\tG\tA\tC\tA\tT\tC\tG\tC\tG\tA\tC\tT\tC\tT\tC\tT\tC\tA\tC\tA\tG\tA\tG\tA\tC\tT\tC\tT\tG\tA\tT\tA\tT\tG\tA\tC\tA\tT\tT\tA\tA\tT\tC\tT\tA\tG\tG\tA\tT\tA\tC\tT\tT\tC\tT\tC\tC\tG\tT\tA\tG\tA\tG\tA\tT\tC\tA\tG\tT\tC\tG\tA\tC\tT\tA\tG\tA\tG\tG\tA\tT\tC\tG\tA\tT\tC\tA\tG\tC\tT\tG\tA\tT\tC\tC\tT\tC\tT\tT\tC\tG\tA\tA\tG\tG\tA\tA\tT\tC\tA\tT\tC\tT\tG\tC\tA\tA\tT\tA\tG\tG\tA\tC\tT\tC\tT\tC\tT\tC\tT\tG\tA\tC\tA\tT\tC\tC\tG\tG\tC\tA\tG\tG\tA\tG\tA\tT\tA\tC\tT\tG\tA\tT\tA\tA\tG\tG\tA\tA\tG\tA\tT\tT\tC\tA\tG\tA\tT\tT\tC\tA\tG\tT\tG\tA\tT\tC\tT\tC\tT\tC\tA\tG\tT\tG\tA\tC\tT\tG\tA\tC\tA\tC\tT\tG\tA\tC\tT\tC\tA\tG\tA\tG\tA\tC\tT\tA\tG\tT\tC\tT\tC\tC\tT\tC\tT\tC\tT\tG\tA\tA\tC\tA\tG\tA\tG\tA\tT\tC\tT\tC\tT\tG\tA\tG\tC\tC\tT\tT\tC\tC\tG\tT\tC\tA\tG\tT\tC\tC\tT\tT\tC\tC\tT\tG\tC\tA\tT\tG\tT\tC\tT\tA\tC\tA\tG\tC\tT\tT\tC\tC\tT\tC\tT\tC\tT\tT\tC\tC\tT\tG\tA\tG\tA\tG\tT\tA\tT\tC\tT\tT\tC\tC\tA\tC\tT\tG\tA\tC\tT\tC\tA\tT\tA\tC\tT\tC\tT\tT\tC\tC\tG\tC\tT\tT\tC\tA\tC\tT\tA\tC\tT\tA\tC\tA\tT\tC\tG\tT\tC\tT\tA\tA\tT\tT\tA\tT\tC\tG\tA\tC\tT\tC\tT\tT\tC\tC\tG\tT\tC\tG\tA\tC\tT\tA\tC\tC\tA\tG\tA\tG\tA\tG\tA\tT\tC\tT\tG\tT\tC\tA\tG\tA\tT\tG\tA\tT\tC\tC\tT\tG\tA\tA\tG\tA\tG\tT\tG\tC\tT\tC\tT\tT\tC\tT\tG\tG\tA\tC\tT\tC\tT\tG\tA\tC\tT\tC\tT\tC\tA\tT\tC\tG\tA\tC\tA\tG\tA\tC\tT\tT\tC\tT\tC\tG\tT\tA\tT\tC\tT\tT\tA\tT\tA\tT\tC\tC\tT\tT\tG\tC\tT\tG\tA\tT\tC\tA\tT\tT\tA\tA\tG\tT\tC\tG\tA\tA\tC\tG\tC\tG\tC\tC\tT\tT\tC\tC\tT\tC\tT\tC\tT\tT\tG\tT\tC\tA\tG\tT\tC\tA\tG\tA\tG\tC\tT\tC\tT\tA\tG\tC\tT\tT\tC\tG\tA\tC\tT\tG\tA\tG\tT\tG\tA\tC\tT\tC\tT\tG\tT\tC\tT\tC\tT\tC\tT\tT\tA\tC\tT\tA\tG\tT\tC\tG\tA\tG\tA\tC\tT\tT\tC\tC\tT\tG\tA\tA\tC\tC\tT\tC\tT\tA\tG\tG\tA\tG\tA\tC\tT\tA\tT\tA\tG\tA\tG\tG\tT\tA\tG\tC\tT\tC\tT\tC\tT\tC\tT\tA\tG\tA\tG\tG\tA\tC\tT\tG\tA\tC\tT\tC\tT\tG\tT\tC\tT\tT\tC\tG\tA\tG\tA\tG\tA\tG\tA\tT\tA\tC\tT\tG\tT\tT\tA\tG\tA\tG\tA\tG\tA\tC\tA\tT\tC\tA\tT\tG\tA\tT\tC\tA\tG\tG\tA\tC\tG\tA\tT\tC\tG\tG\tC\tC\tT\tC\tT\tC\tT\tC\tT\tG\tA\tT\tC\tC\tT\tC\tT\tT\tC\tC\tT\tC\tG\tA\tG\tT\tC\tT\tC\tC\tA\tG\tA\tA\tG\tC\tT\tC\tG\tG\tA\tA\tT\tA\tC\tT\tC\tA\tG\tC\tT\tC\tT\tC\tT\tG\tA\tG\tA\tC\tT\tA\tG\tC\tT\tT\tA\tC\tT\tT\tC\tC\tT\tA\tC\tA\tT\tA\tT\tG\tA\tG\tA\tC\tT\tA\tC\tG\tA\tT\tC\tC\tT\tC\tT\tG\tA\tG\tA\tA\tC\tC\tT\tG\tA\tG\tA\tG\tC\tT\tG\tG\tA\tA\tG\tC\tT\tC\tT\tG\tC\tG\tA\tT\tC\tG\tT\tG\tA\tG\tA\tG\tA\tA\tC\tA\tG\tT\tA\tT\tC\tT\tC\tC\tA\tT\tA\tC\tT\tC\tT\tA\tC\tG\tA\tT\tA\tC\tT\tC\tT\tT\tA\tT\tA\tT\tC\tT\tC\tT\tC\tG\tT\tC\tG\tT\tC\tT\tG\tT\tC\tC\tT\tC\tT\tT\tC\tC\tT\tC\tT\tT\tC\tT\tA\tC\tT\tT\tC\tC\tG\tA\tG\tT\tA\tC\tG\tC\tT\tC\tT\tC\tT\tT\tC\tG\tA\tG\tA\tT\tC\tG\tC\tA\tG\tA\tC\tT\tC\tC\tT\tT\tC\tG\tA\tG\tA\tT\tC\tC\tG\tA\tT\tC\tT\tC\tT\tT\tC\tC\tT\tA\tT\tG\tA\tC\tT\tC\tT\tC\tG\tA\tG\tT\t'..b'\tG\tG\tA\tA\tG\tG\tA\tC\tT\tC\tG\tG\tA\tT\tA\tG\tT\tT\tC\tA\tG\tA\tC\tT\tA\tC\tT\tC\tT\tT\tC\tA\tG\tT\tC\tA\tG\tT\tC\tG\tA\tA\tG\tA\tG\tA\tG\tG\tA\tC\tT\tA\tG\tG\tA\tA\tG\tC\tT\tC\tT\tT\tC\tC\tT\tA\tT\tT\tC\tC\tT\tA\tG\tG\tA\tC\tT\tT\tC\tT\tC\tG\tA\tC\tA\tG\tT\tT\tC\tT\tC\tT\tC\tT\tA\tA\tT\tC\tA\tA\tT\tA\tG\tA\tG\tA\tG\tA\tG\tC\tT\tA\tG\tC\tT\tA\tG\tA\tG\tA\tG\tA\tT\tA\tC\tT\tG\tG\tA\tA\tG\tT\tC\tT\tA\tG\tT\tT\tG\tA\tG\tT\tC\tA\tG\tT\tC\tC\tA\tC\tT\tC\tA\tT\tC\tT\tC\tA\tG\tC\tT\tT\tC\tT\tA\tA\tG\tC\tT\tA\tG\tG\tA\tT\tA\tG\tA\tA\tT\tA\tG\tA\tC\tT\tC\tC\tT\tG\tC\tG\tA\tT\tA\tG\tA\tC\tT\tA\tG\tT\tC\tC\tT\tG\tA\tC\tA\tG\tT\tG\tA\tC\tA\tA\tT\tC\tG\tG\tA\tA\tG\tG\tT\tA\tG\tG\tA\tC\tT\tT\tC\tG\tA\tC\tT\tA\tG\tG\tT\tA\tT\tT\tA\tA\tC\tC\tG\tC\tT\tG\tA\tG\tA\tA\tG\tA\tG\tC\tG\tT\tA\tG\tT\tC\tT\tG\tC\tA\tC\tT\tG\tT\tC\tT\tA\tA\tG\tC\tG\tC\tT\tG\tC\tT\tC\tC\tG\tG\tC\tC\tT\tG\tA\tT\tG\tG\tA\tA\tG\tT\tG\tC\tG\tC\tT\tC\tT\tT\tC\tA\tG\tA\tT\tA\tG\tT\tC\tA\tT\tC\tT\tC\tT\tG\tA\tC\tT\tG\tA\tG\tA\tA\tG\tC\tT\tT\tC\tT\tC\tA\tG\tT\tC\tG\tA\tT\tC\tA\tG\tA\tG\tC\tT\tG\tA\tA\tT\tA\tT\tT\tC\tT\tC\tG\tA\tT\tC\tA\tG\tG\tC\tC\tT\tG\tA\tA\tG\tT\tC\tG\tA\tA\tC\tA\tG\tG\tT\tT\tC\tG\tC\tC\tT\tT\tA\tC\tT\tC\tA\tA\tT\tC\tT\tG\tA\tC\tT\tT\tC\tG\tA\tT\tC\tT\tC\tT\tC\tC\tG\tA\tT\tT\tC\tC\tT\tG\tA\tA\tC\tG\tA\tT\tG\tG\tA\tT\tC\tG\tC\tC\tT\tA\tT\tC\tT\tA\tG\tA\tT\tT\tC\tA\tG\tC\tT\tG\tA\tC\tT\tG\tA\tC\tT\tT\tC\tC\tT\tC\tT\tG\tA\tG\tA\tA\tC\tC\tT\tG\tA\tC\tT\tT\tA\tC\tT\tC\tT\tT\tC\tT\tC\tG\tA\tC\tT\tG\tA\tG\tC\tA\tC\tG\tA\tT\tC\tT\tG\tG\tA\tC\tT\tG\tA\tT\tC\tT\tG\tC\tA\tC\tT\tC\tT\tT\tC\tC\tT\tT\tC\tA\tG\tC\tG\tT\tA\tC\tG\tA\tC\tA\tG\tA\tG\tC\tT\tG\tA\tG\tC\tA\tT\tT\tC\tC\tT\tT\tA\tA\tT\tT\tC\tC\tT\tT\tA\tC\tT\tG\tA\tC\tT\tG\tA\tA\tG\tT\tC\tC\tG\tG\tA\tG\tA\tT\tC\tT\tC\tA\tG\tC\tT\tT\tG\tC\tT\tA\tG\tC\tT\tA\tG\tT\tC\tT\tC\tT\tC\tA\tG\tG\tA\tG\tA\tC\tG\tC\tT\tT\tG\tA\tG\tC\tA\tC\tT\tA\tG\tC\tA\tT\tC\tA\tT\tT\tA\tG\tC\tA\tG\tT\tA\tT\tC\tC\tT\tC\tA\tT\tC\tT\tG\tT\tC\tA\tG\tA\tG\tC\tA\tC\tT\tA\tT\tA\tG\tG\tT\tT\tC\tG\tA\tC\tT\tG\tA\tG\tA\tC\tT\tC\tT\tG\tA\tG\tT\tG\tA\tT\tC\tT\tC\tT\tA\tC\tT\tC\tT\tA\tT\tC\tT\tG\tA\tA\tG\tG\tA\tG\tA\tG\tC\tA\tG\tT\tC\tC\tG\tC\tT\tC\tT\tG\tA\tC\tT\tC\tT\tG\tA\tA\tT\tC\tT\tG\tA\tA\tG\tT\tA\tT\tG\tG\tA\tG\tA\tC\tT\tG\tA\tT\tA\tG\tC\tC\tT\tG\tA\tG\tA\tT\tC\tC\tT\tG\tA\tT\tC\tT\tC\tG\tA\tC\tT\tG\tA\tG\tA\tG\tA\tT\tC\tA\tG\tT\tG\tA\tC\tC\tT\tT\tA\tT\tC\tC\tA\tG\tC\tG\tA\tC\tT\tG\tA\tC\tT\tC\tT\tT\tA\tC\tG\tT\tA\tT\tG\tG\tA\tT\tA\tC\tT\tC\tT\tA\tC\tG\tC\tA\tC\tG\tA\tA\tT\tA\tG\tA\tG\tT\tC\tG\tT\tC\tT\tC\tT\tT\tC\tA\tG\tT\tC\tA\tG\tT\tG\tA\tT\tT\tC\tC\tT\tG\tA\tA\tG\tG\tT\tT\tC\tA\tG\tG\tA\tA\tC\tA\tT\tA\tC\tT\tA\tC\tT\tA\tG\tG\tC\tT\tG\tG\tT\tG\tT\tG\tT\tT\tC\tA\tG\tG\tC\tT\tG\tT\tA\tA\tT\tA\tC\tT\tC\tC\tT\tG\tC\tC\tA\tC\tT\tA\tG\tA\tG\tA\tC\tT\tA\tA\tG\tT\tG\tA\tC\tT\tG\tG\tA\tT\tC\tT\tC\tA\tG\tG\tA\tT\tG\tC\tA\tA\tG\tG\tT\tG\tT\tC\tT\tC\tT\tT\tA\tG\tA\tT\tG\tA\tT\tT\tG\tC\tT\tC\tT\tA\tG\tG\tA\tG\tA\tG\tA\tA\tC\tC\tT\tG\tA\tT\tA\tA\tG\tG\tA\tA\tT\tT\tG\tG\tC\tA\tG\tT\tG\tG\tT\tT\tC\tA\tC\tG\tA\tT\tA\tT\tC\tG\tA\tC\tG\tA\tG\tG\tA\tG\tT\tC\tT\tG\tT\tC\tG\tA\tT\tG\tA\tG\tC\tG\tA\tA\tG\tG\tA\tT\tC\tG\tT\tC\tT\tA\tT\tT\tC\tA\tT\tT\tC\tT\tC\tA\tC\tT\tC\tG\tA\tT\tC\tC\tT\tG\tA\tG\tA\tC\tT\tC\tG\tG\tA\tG\tA\tA\tG\tC\tT\tG\tA\tT\tC\tT\tG\tA\tT\tA\tG\tA\tG\tC\tT\tT\tC\tG\tT\tA\tC\tT\tA\tT\tA\tG\tT\tC\tT\tC\tT\tT\tA\tA\tC\tG\tA\tC\tA\tC\tT\tA\tG\tG\tA\tA\tG\tC\tT\tC\tG\tC\tT\tC\tT\tG\tA\tT\tA\tC\tA\tT\tC\tC\tT\tA\tG\tT\tC\tG\tC\tC\tG\tC\tT\tC\tT\tC\tT\tG\tA\tC\tG\tG\tA\tA\tT\tA\tT\tG\tA\tC\tG\tG\tT\tA\tG\tC\tT\tA\tG\tA\tG\tC\tA\tG\tT\tC\tT\tA\tT\tA\tG\tA\tG\tC\tG\tG\tA\tA\tG\tC\tT\tC\tT\tG\tC\tG\tA\tC\tT\tA\tG\tT\tC\tT\tC\tA\tG\tT\tC\tG\tA\tC\tT\tA\tC\tC\tG\tC\tT\tA\tG\tA\tG\tC\tT\tC\tA\tG\tA\tT\tC\tA\tG\tC\tT\tT\tC\tG\tA\tC\tA\tA\tG\tA\tT\tT\tC\tA\tG\tC\tT\tC\tA\tT\tA\tG\tT\tT\tA\tG\tA\tT\tC\tT\tG\tC\tT\tG\tA\tG\tA\tG\tT\tG\tA\tG\tA\tG\tT\tG\tC\tC\tT\tC\tT\tA\tC\tA\tG\tG\tA\tC\tT\tC\tT\tC\tT\tC\tT\tC\tT\tT\tC\tC\tA\tC\tT\tG\tA\tG\tA\tG\tA\tG\tA\tG\tT\tT\tC\tT\tC\tG\tT\tA\tG\tG\tA\tA\tT\tT\tC\tT\tC\tA\tT\tA\tC\tA\tG\tT\tC\tC\tT\tG\tA\tG\tA\tA\tG\tC\tT\tT\tC\tT\tC\tG\tT\tG\tA\tT\tC\tC\tT\tA\tG\tT\tC\tT\tC\tA\tG\tC\tT\tA\tC\tT\tG\tG\tA\tA\tG\tC\tT\tA\tG\tC\tT\tT\tA\tT\tA\tC\tT\tG\tA\tA\tG\tA\tG\tG\tA\tC\tT\tG\tA\tC\tT\tT\tC\tC\tG\tA\tT\tA\tC\tC\tA\tT\tA\tC\tT\tT\tA\tC\tA\tT\tC\tA\tT\tC\tG\tT\tC\tT\tC\tG\tA\tT\tA\tT\tA\tT\tA\tT\tC\tG\tA\tA\tG\tA\tG\tG\tA\tC\tT\tT\tA\tC\tT\tG\tT\tG\tC\tT\tG\tG\tA\tT\tC\tG\tC\tG\tC\tC\tT\tC\tT\tT\tC\tA\tT\tG\tA\tG\tA\tG\tA\tA\tC\tA\tC\tA\tG\tC\tG\tT\tC\tT\tC\tT\tG\tC\tT\tA\tG\tG\tT\tC\tT\tC\tT\tC\tA\tC\tT\tT\tG\tA\tG\tT\tC\tC\tT\tT\tC\tT\tG\tG\tA\tC\tT\tG\tC\tC\tA\tT\tC\tT\tC\tT\tA\tT\tC\tG\tA\tG\tC\tA\tT\tG\tA\tA\tG\tT\tA\tA\tG\tC\tT\tG\tT\tA\tG\tC\tT\tA\tG\tT\tC\tT\tC\tT\tC\tA\tG\tT\tC\tA\tG\tG\tT\tC\tT\tT\tC\tT\tC\tA\tG\tC\tT\tT\tG\tC\tT\tC\tT\tC\tT\tC\tT\tG\tA\tG\tC\tC\tT\tG\tT\tT\tA\tC\tT\tA\tC\tT\tA\tG\tC\tA\tC\tG\tA\tA\tG\tC\tT\tA\tG\tC\tA\tT\tG\tG\tA\tA\tC\tA\tG\tT\tC\tG\tC\tG\tA\tC\tT\tC\tG\tA\tG\tC\tT\tC\tT\tT\tC\tG\tA\tC\tA\tT\tC\tT\tG\tA\tG\tG\tA\tG\tA\tA\tT\tA\tG\tC\tT\tC\tT\tG\tA\tG\tA\tG\tA\tA\tG\tT\tC\tT\tC\tA\tT\tT\tC\tT\tC\tA\tT\tA\tG\tG\tA\tT\tG\tA\tT\tG\tT\tT\tG\tA\tG\tT\tC\tA\tG\tA\tT\tA\tG\tA\tT\tC\tT\tC\tG\tC\tA\tC\tT\tG\tA\tG\tA\tG\tC\tC\tA\tG\tA\tA\tT\tT\tA\tA\tT\tC\tG\tC\tA\tA\tG\tC\tA\tC\tG\tG\tA\tG\tT\tC\tT\tT\tA\tA\tG\tG\tA\tC\tT\tT\tC\tC\tT\tA\tT\tC\tT\tA\tG\tT\tC\tC\tG\tC\tT\tA\tG\tA\tC\tT\tA\tA\tC\tA\tG\tG\tC\tA\tT\tA\tG\tC\tT\tT\tG\tT\tG\tA\tG\tA\tG\tT\tA\tC\tG\tG\tC\tC\tT\tT\tC\tT\tC\tC\tT\tT\tC\tG\tA\tT\tC\tT\tC\tT\tC\tC\tA\tC\tT\tC\tT\tT\tC\tT\tC\tA\tC\tA\tG\tC\tT\tG\tT\tT\tC\tT\tC\tA\tG\tT\tA\tC\tT\tT\tC\tC\tT\tA\tG\tC\tT\tC\tT\tC\tT\tC\tT\tC\tT\tC\tT\tG\tA\tA\tC\tC\tT\tC\tT\tC\tT\tG\tA\tG\tA\tG\tA\tC\tT\tT\tC\tC\tT\tT\tC\tA\tG\tA\tG\tC\tT\tT\tG\tT\tA\tG\tA\tC\tT\tG\tA\tA\tG\tG\tA\tC\tT\tA\tT\tA\tT\tT\tC\tT\tG\tA\tC\tG\tA\tG\tT\tC\tG\tC\tT\tT\tC\tT\tC\tT\tC\tT\tC\tG\tA\tC\tT\tT\tA\tG\tA\tG\tA\tA\tG\tA\tG\tT\tG\tG\tC\tT\tC\tC\tG\tT\tC\tC\tA\tG\tA\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 Rooting/Rooting.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Rooting/Rooting.pl Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,59 @@ +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; +use Bio::SeqIO; +use Cwd ; +use FindBin qw ( $Bin $Script ); + +my $CURRENT_DIR = $Bin; + +my $ROOTING_EXE = "java -jar ". $CURRENT_DIR . "/Rootings_54.jar"; + +my $usage = qq~Usage:$0 <args> [<opts>] +where <args> are: + -i, --input <newick input> + -o, --output <newick output> +~; +$usage .= "\n"; + +my ($input,$outfile); + + +GetOptions( + "input=s" => \$input, + "output=s" => \$outfile +); + + +die $usage + if ( !$input || !$outfile); + +my $treefile = $input; + + +# replace negative values by 0 +open(T,$treefile); +open(T2,">$treefile.2"); +while(<T>) +{ + my $line = $_; + $line =~s/\-\d+\.*\d*\,/0,/g; + $line =~s/\-\d+\.*\d*\)/0\)/g; + print T2 $line; +} +close(T); +close(T2); + +my $rooting_command = $ROOTING_EXE . " -input $treefile.2 -output $treefile.all -midpoint $treefile.midpoint >>$treefile.rooting.log 2>&1"; +system($rooting_command); + +unlink("$treefile.all"); +unlink("$treefile.2"); +rename("$treefile.midpoint",$outfile); + + + + + |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 Rooting/Rootings_54.jar |
b |
Binary file Rooting/Rootings_54.jar has changed |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 Rooting/rooting.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Rooting/rooting.xml Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,193 @@ +<tool id="sniplay_rooting" name="Rooting" version="1.0.2"> + + <!-- [REQUIRED] Tool description displayed after the tool name --> + <description> Midpoint rooting of newick tree </description> + + <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> + <requirements> + <requirement type="binary">perl</requirement> + <requirement type="package">Rootings_54.jar</requirement> + </requirements> + + <!-- [OPTIONAL] Command to be executed to get the tool's version string --> + <version_command> +<!-- + tool_binary -v +--> + </version_command> + + <!-- [REQUIRED] The command to execute --> + <command interpreter="perl"> + Rooting.pl -i $filein -o $fileout && mv ${filein}.rooting.log $fileout_log + </command> + + <!-- [REQUIRED] Input files and tool parameters --> + <inputs> + <param name="filein" type="data" format="txt" optional="false" label="Fasta alignment input" /> + <param name="fileout_label" type="text" value="out tree" label="Output name" help="Output name for files" /> + </inputs> + + <!-- [REQUIRED] Output files --> + <outputs> + <data name="fileout_log" type="data" format="txt" label="${fileout_label}.log" /> + <data name="fileout" type="data" format="txt" label="${fileout_label}" /> + </outputs> + + <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <stdio> + <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> + <exit_code range="1:" level="fatal" /> + </stdio> + + <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> + <tests> + <!-- [HELP] Test files have to be in the ~/test-data directory --> + + <test> + <param name="filein" value="newick" /> + <output name="fileout" file="out_tree" /> + <output name="fileout_log" file="out_tree.log" /> + </test> + + <!-- [HELP] Multiple tests can be defined with different parameters --> +<!-- + <test> + </test> +--> + </tests> + + <!-- [OPTIONAL] Help displayed in Galaxy --> + <help> + + +.. class:: infomark + +**Authors** Jean-François Dufayard, CIRAD, South Green platform + + +.. class:: infomark + +**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique. + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr + +.. class:: infomark + +**Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). + +--------------------------------------------------- + + + + +======= +Rooting +======= + +----------- +Description +----------- + + Compute a midpoint newick rooted tree. + + +----------------- +Workflow position +----------------- + +**Upstream tool** + +=========== ========================== ======= +Name output file(s) format +=========== ========================== ======= +fastme Newick tree Newick +=========== ========================== ======= + + + +---------- +Input file +---------- + +Newick file + + +---------- +Parameters +---------- + +Output name + Output base name for the ouput files + + +------------ +Output files +------------ + +Output_name + Resulting tree rooted in newick format + +Output_name.log + Log file + +------------ +Dependencies +------------ +Rooting + CIRAD tool. Contact jean-francois.dufayard@cirad.fr + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +=========== + +Newick file +----------- + +:: + + (((((((((((((((((((((((((GOGOLEMPUK:0.001198,GOGOLEMPAK:0.002128):0.030378,TREMBESE:0.013258):0.055246,(((JIMBRUKJOL:0.045219,KETANKONIR:0.035298):0.006267, ... + + +Parameters +========== + +Output name -> out tree + + +Output files +============ + +out tree +-------- + +:: + + (ref:0.9384270000000001,(((((((((((((((((((((((((((((((((((IRAT257:0.044246,IRAT112:0.023421):0.009006,ARAGUAIA:0.093061):0.004662... + + + </help> + <citations> + <!-- [HELP] As DOI or BibTex entry --> + <citation type="bibtex">@article{Dereeper03062015, +author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, +title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, +year = {2015}, +doi = {10.1093/nar/gkv351}, +abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, +URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, +eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, +journal = {Nucleic Acids Research} +} + + </citation> + + </citations> + +</tool> |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 Rooting/test-data/newick --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Rooting/test-data/newick Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,2 @@ +(((((((((((((((((((((((((GOGOLEMPUK:0.001198,GOGOLEMPAK:0.002128):0.030378,TREMBESE:0.013258):0.055246,(((JIMBRUKJOL:0.045219,KETANKONIR:0.035298):0.006267,KANIRANGA:0.042518):0.020121,(VARYMADINI:0.039903,BENGALYVAK:0.032305):0.012408):0.004740):0.001841,(((YANCAOUSSA:0.006364,PATEBLANCM:0.008832):0.000858,MOROBEREKA:0.004615):0.013114,VARYMALADY:0.026086):0.024153):0.015663,WAB706-3-4:0.050598):0.004174,(MAINTIMOLO:0.044761,(FOHISOMOTR:0.005656,VARYSOMOTR:0.006949):0.030574):0.009217):0.006248,IRAT364:0.047033):0.003654,((IRAT366:0.033773,CIRAD358:0.093382):0.015822,(((MANANELATR:0.040437,(ARROZCEBAD:0.027110,GRAZI:0.022678):0.000763):0.009870,KOMOJAMANI:0.049216):0.006713,((TSIPALA89:-0.000887,VARYLAVADE:0.037778):0.044182,MITSANGANA:0.082664):0.068968):0.000971):0.009442):0.002985,(((((((((JUMALI:0.077650,LAMBAYQUE1:0.170057):0.027742,(GOMPA2:0.049418,KAKANI2:0.052752):0.024526):0.081123,JUMULA2:0.075284):0.027607,ref:0.722923):0.015245,((((NPE826:0.019317,NPE253:0.017161):0.033863,((GIZA171:0.023894,NIPPONBARE:0.027998):0.013954,M202:0.053155):0.016691):0.020398,NEPHOAVANG:0.097913):0.011042,BABER:0.155657):0.024483):0.043699,((((((((DAM:0.054359,((((VIETNAM3:0.030951,SENG:0.038876):0.007914,(NABESHI:0.010252,KARASUKARA:0.010553):0.032650):0.012575,VIETNAM1:0.049323):0.003110,(PEHPINUO:0.070737,(CHUAN3:0.020631,CHUAN4:0.024135):0.014450):0.010970):0.020849):0.002227,(((((JAOHAW:0.044517,KHAODAM:0.031174):0.017389,MAHAE:0.041633):0.006695,(KHAOKAPXAN:0.005767,HAWMOM:0.009429):0.031367):0.015885,CHAPHUMA:0.071266):0.005503,KU115:0.043819):0.007631):0.009768,NHTA10:0.061745):0.006241,INDANE:0.079646):0.007679,(((GEMJYAJYAN:0.012403,YANGKUMRED:0.014267):0.008479,DANGREY:0.024493):0.020552,DAWASANRED:0.030369):0.065249):0.006240,CHALOYOE:0.058075):0.012844,NHTA5:0.138764):0.005686,YUNLU7:0.079119):0.012627):0.020665,CUIABANA:0.066182):0.018877,((GANIGI:0.040448,PULULAPA:0.039840):0.020258,GOGO:0.060904):0.005609):0.001416,(((((GUNDILKUNI:0.019719,(CICIHBETON:0.021620,BULUPANDAK:0.016924):0.008554):0.028704,PADIBOENAR:0.052396):0.002882,(REKETMAUN:0.059430,((POENOETHIT:0.042637,KETANLUMBU:0.035696):0.016617,MOLOK:0.055674):0.015506):0.001164):0.024179,PADIKASALL:0.093595):0.063014,RATHAL:0.090729):0.015609):0.008281):0.005079,RT1031-69:0.043520):0.002593,MANDRIRAVI:0.049833):0.005647,IRAT234:0.037316):0.007912,((((EARLYMUTAN:0.015218,(IAC25:0.007986,(DOURADOAGU:0.001563,DOURADOPRE:0.001763):0.010062):0.002688):0.011250,(((IAC47:0.007321,(IAC165:0.000500,VARYLAVA90:0.001650):0.006263):0.003063,HD1-4:0.015932):0.006876,GUARANI:0.013831):0.005817):0.007843,IGUAPECATE:0.036421):0.002898,BICOBRANCO:0.031026):0.008082):0.001457,((((((((IR60080-46:0.102166,IR71525-19:0.037083):0.071364,IR68704-14:0.144370):0.009188,((IR66421-10:0.104309,IR66421-09:0.112363):0.044923,CNA-7_BO_1:0.097756):0.030658):0.043160,(((((((CIRAD403:0.043568,MARAVILHA:0.105095):0.009135,CURINCA:0.140813):0.023139,CIRAD409:0.038172):0.045565,PCT4_SA_4_:0.177389):0.024197,(PCT11_0_0_:0.075858,P5589-1-1-:0.044714):0.022844):0.006949,ESPERANZA:0.077139):0.022033,((CT13582-15:0.086985,((((((IRAT216:0.017956,IR47684-05:0.006483):0.014180,((IR63372-08:0.008257,IR65907-18:0.049347):0.060968,IR65907-20:0.025334):0.004085):0.028888,COLOMBIA1:0.106747):0.009488,WAB56-50:0.048646):0.019308,IDSA77:0.026889):0.022776,WAB56-125:0.057970):0.025793):0.017149,IRAT362:0.030612):0.021338):0.005273):0.007474,((IR65261-19:0.088569,IR63380-16:0.060828):0.017377,ORYZICASAB:0.059793):0.020771):0.010205,((((IRAT2:0.006656,(62667:0.002564,(IRAT177:0.003065,IRAT13:0.006564):0.001422):0.001435):0.014034,IRAT109:0.079089):0.006041,IRAT170:0.013368):0.020262,63-104:0.022825):0.010789):0.003440,(((((KUROKA:0.096901,IRAT144:0.032372):0.040630,CIRAD488:0.062971):0.026055,(IRAT212:0.041268,PRIMAVERA:0.059109):0.012806):0.027141,((CIRAD392:0.025487,IRAT380:0.024721):0.028822,CIRAD394:0.055046):0.034180):0.011060,CAIAPO:0.072220):0.009469):0.006119,((ARAGUAIA:0.084755,(IRAT335:0.077784,IRAT112:0.027346):0.006445):0.007072,IRAT257:0.036873):0.020075):0.002575):0.004673,((IR47686-09:0.045565,IRAT104:0.041351):0.019777,IR71524-44:0.058741):0.008141):0.006303,((OS4:0.021016,OS6:0.015670):0.010953,(CANAROXA:0.018385,KINANDANGP:0.047364):0.022379):0.007502):0.001209,TRESMESES:0.028972):0.002988,PACHOLINHA:0.042509):0.002897,CANELADEFE:0.050931):0.011439,CAAWA/FORT:0.055630):0.006838,((LUDAN:0.054230,((CUBA65:0.037658,(IR53236-27:0.026784,AZUCENA:0.025318):0.020859):0.017556,DAVAO:0.045931):0.005774):0.010335,(BINULAWAN:0.059589,MALAGKITPI:0.062856):0.004216):0.000088):0.004898,DINORADO:0.065206):0.004869,(BAGANANASA:0.049455,BAKUNGH:0.048193):0.021502):0.004581,((SPEAKER:0.057882,(KEDAYAN:0.065817,ARIAS:0.074161):0.019154):0.012584,(TANDUI:0.078177,KETANMENAH:0.079797):0.013169):0.003598):0.031721,PALAWAN:0.032789,KENDINGA5H:0.028863); + |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 Rooting/test-data/out_tree --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Rooting/test-data/out_tree Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,1 @@ +(ref:0.575788,(((((((((((((((((((((((PALAWAN:0.032789,KENDINGA5H:0.028863):0.031721,((SPEAKER:0.057882,(KEDAYAN:0.065817,ARIAS:0.074161):0.019154):0.012584,(TANDUI:0.078177,KETANMENAH:0.079797):0.013169):0.003598):0.004581,(BAGANANASA:0.049455,BAKUNGH:0.048193):0.021502):0.004869,DINORADO:0.065206):0.004898,((LUDAN:0.05423,((CUBA65:0.037658,(IR53236-27:0.026784,AZUCENA:0.025318):0.020859):0.017556,DAVAO:0.045931):0.005774):0.010335,(BINULAWAN:0.059589,MALAGKITPI:0.062856):0.004216):8.8E-5):0.006838,CAAWA/FORT:0.05563):0.011439,CANELADEFE:0.050931):0.002897,PACHOLINHA:0.042509):0.002988,TRESMESES:0.028972):0.001209,((OS4:0.021016,OS6:0.01567):0.010953,(CANAROXA:0.018385,KINANDANGP:0.047364):0.022379):0.007502):0.006303,((IR47686-09:0.045565,IRAT104:0.041351):0.019777,IR71524-44:0.058741):0.008141):0.004673,((((((((IR60080-46:0.102166,IR71525-19:0.037083):0.071364,IR68704-14:0.14437):0.009188,((IR66421-10:0.104309,IR66421-09:0.112363):0.044923,CNA-7_BO_1:0.097756):0.030658):0.04316,(((((((CIRAD403:0.043568,MARAVILHA:0.105095):0.009135,CURINCA:0.140813):0.023139,CIRAD409:0.038172):0.045565,PCT4_SA_4_:0.177389):0.024197,(PCT11_0_0_:0.075858,P5589-1-1-:0.044714):0.022844):0.006949,ESPERANZA:0.077139):0.022033,((CT13582-15:0.086985,((((((IRAT216:0.017956,IR47684-05:0.006483):0.01418,((IR63372-08:0.008257,IR65907-18:0.049347):0.060968,IR65907-20:0.025334):0.004085):0.028888,COLOMBIA1:0.106747):0.009488,WAB56-50:0.048646):0.019308,IDSA77:0.026889):0.022776,WAB56-125:0.05797):0.025793):0.017149,IRAT362:0.030612):0.021338):0.005273):0.007474,((IR65261-19:0.088569,IR63380-16:0.060828):0.017377,ORYZICASAB:0.059793):0.020771):0.010205,((((IRAT2:0.006656,(62667:0.002564,(IRAT177:0.003065,IRAT13:0.006564):0.001422):0.001435):0.014034,IRAT109:0.079089):0.006041,IRAT170:0.013368):0.020262,63-104:0.022825):0.010789):0.00344,(((((KUROKA:0.096901,IRAT144:0.032372):0.04063,CIRAD488:0.062971):0.026055,(IRAT212:0.041268,PRIMAVERA:0.059109):0.012806):0.027141,((CIRAD392:0.025487,IRAT380:0.024721):0.028822,CIRAD394:0.055046):0.03418):0.01106,CAIAPO:0.07222):0.009469):0.006119,((ARAGUAIA:0.084755,(IRAT335:0.077784,IRAT112:0.027346):0.006445):0.007072,IRAT257:0.036873):0.020075):0.002575):0.001457,((((EARLYMUTAN:0.015218,(IAC25:0.007986,(DOURADOAGU:0.001563,DOURADOPRE:0.001763):0.010062):0.002688):0.01125,(((IAC47:0.007321,(IAC165:5.0E-4,VARYLAVA90:0.00165):0.006263):0.003063,HD1-4:0.015932):0.006876,GUARANI:0.013831):0.005817):0.007843,IGUAPECATE:0.036421):0.002898,BICOBRANCO:0.031026):0.008082):0.007912,IRAT234:0.037316):0.005647,MANDRIRAVI:0.049833):0.002593,RT1031-69:0.04352):0.005079,((((((((GOGOLEMPUK:0.001198,GOGOLEMPAK:0.002128):0.030378,TREMBESE:0.013258):0.055246,(((JIMBRUKJOL:0.045219,KETANKONIR:0.035298):0.006267,KANIRANGA:0.042518):0.020121,(VARYMADINI:0.039903,BENGALYVAK:0.032305):0.012408):0.00474):0.001841,(((YANCAOUSSA:0.006364,PATEBLANCM:0.008832):8.58E-4,MOROBEREKA:0.004615):0.013114,VARYMALADY:0.026086):0.024153):0.015663,WAB706-3-4:0.050598):0.004174,(MAINTIMOLO:0.044761,(FOHISOMOTR:0.005656,VARYSOMOTR:0.006949):0.030574):0.009217):0.006248,IRAT364:0.047033):0.003654,((IRAT366:0.033773,CIRAD358:0.093382):0.015822,(((MANANELATR:0.040437,(ARROZCEBAD:0.02711,GRAZI:0.022678):7.63E-4):0.00987,KOMOJAMANI:0.049216):0.006713,((TSIPALA89:0.0,VARYLAVADE:0.037778):0.044182,MITSANGANA:0.082664):0.068968):9.71E-4):0.009442):0.002985):0.008281,(((((GUNDILKUNI:0.019719,(CICIHBETON:0.02162,BULUPANDAK:0.016924):0.008554):0.028704,PADIBOENAR:0.052396):0.002882,(REKETMAUN:0.05943,((POENOETHIT:0.042637,KETANLUMBU:0.035696):0.016617,MOLOK:0.055674):0.015506):0.001164):0.024179,PADIKASALL:0.093595):0.063014,RATHAL:0.090729):0.015609):0.001416,((GANIGI:0.040448,PULULAPA:0.03984):0.020258,GOGO:0.060904):0.005609):0.018877,CUIABANA:0.066182):0.020665,((((((((DAM:0.054359,((((VIETNAM3:0.030951,SENG:0.038876):0.007914,(NABESHI:0.010252,KARASUKARA:0.010553):0.03265):0.012575,VIETNAM1:0.049323):0.00311,(PEHPINUO:0.070737,(CHUAN3:0.020631,CHUAN4:0.024135):0.01445):0.01097):0.020849):0.002227,(((((JAOHAW:0.044517,KHAODAM:0.031174):0.017389,MAHAE:0.041633):0.006695,(KHAOKAPXAN:0.005767,HAWMOM:0.009429):0.031367):0.015885,CHAPHUMA:0.071266):0.005503,KU115:0.043819):0.007631):0.009768,NHTA10:0.061745):0.006241,INDANE:0.079646):0.007679,(((GEMJYAJYAN:0.012403,YANGKUMRED:0.014267):0.008479,DANGREY:0.024493):0.020552,DAWASANRED:0.030369):0.065249):0.00624,CHALOYOE:0.058075):0.012844,NHTA5:0.138764):0.005686,YUNLU7:0.079119):0.012627):0.043699,((((NPE826:0.019317,NPE253:0.017161):0.033863,((GIZA171:0.023894,NIPPONBARE:0.027998):0.013954,M202:0.053155):0.016691):0.020398,NEPHOAVANG:0.097913):0.011042,BABER:0.155657):0.024483):0.015245,(((JUMALI:0.07765,LAMBAYQUE1:0.170057):0.027742,(GOMPA2:0.049418,KAKANI2:0.052752):0.024526):0.081123,JUMULA2:0.075284):0.027607):0.14713500000000007); |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 SNP_density/CalculateSlidingWindowsSNPdensitiesFromHapmap.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNP_density/CalculateSlidingWindowsSNPdensitiesFromHapmap.pl Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,124 @@ +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; +use Bio::SeqIO; + +my $usage = qq~Usage:$0 <args> [<opts>] +where <args> are: + -i, --input <Hapmap input> + -o, --out <output in tabular format> + -s, --step <step (in bp)> +~; +$usage .= "\n"; + +my ($input,$out,$step); + +GetOptions( + "input=s" => \$input, + "out=s" => \$out, + "step=s" => \$step, +); + + +die $usage + if ( !$input || !$step || !$out ); + +my $max_chr_num = 100; + +my %counts; +my %counts_by_ind; +open(my $HAPMAP,$input); +my $headers= <$HAPMAP>; +$headers=~s/\n//g; +$headers=~s/\r//g; +my @ind_names = split(/\t/,$headers); +my @individual_names; +for (my $i = 12; $i <= $#ind_names; $i++) +{ + push(@individual_names,$ind_names[$i]); +} +my %maximums; +while(<$HAPMAP>) +{ + my $line = $_; + $line=~s/\n//g; + $line=~s/\r//g; + my @infos = split(/\t/,$line); + my $chrom = $infos[2]; + my $position = $infos[3]; + if ($position > $maximums{$chrom}){$maximums{$chrom}=$position;} + my $classe_position = int($position/$step); + $counts{$chrom}{$classe_position}++; + + my $ref_allele = $infos[11]; + for (my $i = 12; $i <= $#infos; $i++) + { + if (!$counts_by_ind{$chrom}{$classe_position}{$i}){$counts_by_ind{$chrom}{$classe_position}{$i} = 0;} + if ($infos[$i] ne $ref_allele) + { + $counts_by_ind{$chrom}{$classe_position}{$i}++; + } + } +} +close($HAPMAP); + +####################################################### +# global +####################################################### +open(my $OUT,">$out"); +print $OUT "Chromosome Position SNPs\n"; +my $chr_num = 0; +foreach my $chrom(sort keys(%counts)) +{ + $chr_num++; + my $ref_counts = $counts{$chrom}; + my %final_counts = %$ref_counts; + my $x = 0; + #foreach my $classe_position(sort {$a<=>$b} keys(%final_counts)) + for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++) + { + my $nb = 0; + if ($counts{$chrom}{$classe_position}) + { + $nb = $counts{$chrom}{$classe_position}; + } + $x += $step; + print $OUT "$chrom $x $nb\n"; + } + if ($chr_num >= $max_chr_num){last;} +} +close($OUT); + +####################################################### +# For each individual +####################################################### +open(my $OUT2,">$out.by_sample"); +$chr_num = 0; +print $OUT2 "Chromosome ".join("\t",@individual_names) . "\n"; +foreach my $chrom(sort keys(%counts_by_ind)) +{ + $chr_num++; + my $ref_counts = $counts_by_ind{$chrom}; + my %final_counts = %$ref_counts; + for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++) + { + print $OUT2 "$chrom"; + my $num_ind = 12; + foreach my $indiv(@individual_names) + { + my $val = 0; + + if ($counts_by_ind{$chrom}{$classe_position}{$num_ind}) + { + $val = $counts_by_ind{$chrom}{$classe_position}{$num_ind}; + } + print $OUT2 " $val"; + $num_ind++; + } + print $OUT2 "\n"; + } + if ($chr_num >= $max_chr_num){last;} +} +close($OUT2); |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 SNP_density/calculateSlidingWindowsSNPdensitiesFromHapMap_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNP_density/calculateSlidingWindowsSNPdensitiesFromHapMap_wrapper.xml Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,207 @@ +<tool id="sniplay_density" name="SNP density" version="1.2.0"> + + <!-- [REQUIRED] Tool description displayed after the tool name --> + <description> Calculate SNP densities along chromosome from HapMap</description> + + <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> + <requirements> + <requirement type="binary">perl</requirement> + </requirements> + + <!-- [OPTIONAL] Command to be executed to get the tool's version string --> + <version_command> +<!-- + tool_binary -v +--> + </version_command> + + <!-- [REQUIRED] The command to execute --> + <command interpreter="bash"> + calculateSlidingWindowsSNPdensitiesFromHapmap.sh $filein $fileout $fileout_bysample $step + </command> + + <!-- [REQUIRED] Input files and tool parameters --> + <inputs> + <param name="filein" type="data" format="txt" optional="false" label="Hapmap input" /> + <param name="step" type="integer" value="200000" label="Step" help="Step in bp"/> + <param name="fileout_label" type="text" value="densities" label="Output name" help="Output name for tabular files" /> + </inputs> + + <!-- [REQUIRED] Output files --> + <outputs> + <data name="fileout" type="data" format="tabular" label="${fileout_label}" /> + <data name="fileout_bysample" type="data" format="tabular" label="${fileout_label}.by_sample" /> + </outputs> + + <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <stdio> + <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> + <exit_code range="1:" level="fatal" /> + </stdio> + + <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> + <tests> + <!-- [HELP] Test files have to be in the ~/test-data directory --> +<!-- + <test> + <param name="input" value="input_test.txt" /> + <output name="output" file="output_test.txt" /> + </test> +--> + <!-- [HELP] Multiple tests can be defined with different parameters --> +<!-- + <test> + </test> +--> + </tests> + + <!-- [OPTIONAL] Help displayed in Galaxy --> + <help> + +.. class:: infomark + +**Authors** Dereeper Alexis (alexis.dereeper@ird.fr), IRD, South Green platform + + | **Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). + + +.. class:: infomark + +**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique. + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr + +--------------------------------------------------- + + + +============= +SNP densities +============= + +----------- +Description +----------- + + Calculate SNP densities along chromosome from HapMap + + +----------------- +Workflow position +----------------- + +**Upstream tool** + +=============== ====================== =========== +Name output file(s) format +=============== ====================== =========== +VCF to Hapmap Hapmap file hapmap +=============== ====================== =========== + + +---------- +Input file +---------- + +Hapmap file + File with SNPs + + +---------- +Parameters +---------- + +Step + Step in bp for the window to calculate SNP density + +Output name + Output base name for the two ouput files + + +------------ +Output files +------------ + +Output_name + Tabular file with SNP density in each postion + +Output_name.by_sample + Tabular file with SNP density for each sample + + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +=========== + +hapmap file +----------- + +:: + + rs# alleles chrom pos strand assembly# center protLSID assayLSID panelLSID QCcode ref BA58 BA59 BD54 + chr1:1774 [G/T] chr1 1774 Cc01_g00010:47 exon NON_SYNONYMOUS_CODING gCg/gAg A/E 25.0% 4 GG TT TT TT + chr1:1640 [G/A] chr1 1640 Cc01_g00010:127 exon NON_SYNONYMOUS_CODING Ccg/Tcg P/S 37.5% 4 GG GG AA GA + chr1:1629 [A/C] chr1 1629 Cc01_g00010:138 exon SYNONYMOUS_CODING ctT/ctG L/L 37.5% 4 AA CC CC AC + chr1:1628 [C/G] chr1 1628 Cc01_g00010:139 exon NON_SYNONYMOUS_CODING Ggg/Cgg G/R 12.5% 4 CC CC CC CG + chr1:1619 [T/G] chr1 1619 Cc01_g00010:148 exon NON_SYNONYMOUS_CODING Aaa/Caa K/Q 37.5% 4 TT TT GG TG + chr1:1405 [C/T] chr1 1405 Cc01_g00010:362 exon NON_SYNONYMOUS_CODING cGg/cAg R/Q 16.7% 3 CC CC NN CT + +Parameters +========== + +Step -> 200000 + +Output name -> densities + + +Output files +============ + +densities +--------- + +:: + + Chromosome Position SNPs + chr1 200000 355 + chr1 400000 228 + chr1 600000 63 + chr1 800000 191 + + +densities.by_sample +------------------- + +:: + + Chromosome BA58 BA59 BD54 + chr1 220 197 225 + chr1 130 119 133 + chr1 43 43 40 + chr1 139 167 141 + + </help> + <citations> + <!-- [HELP] As DOI or BibTex entry --> + <citation type="bibtex">@article{Dereeper03062015, +author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, +title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, +year = {2015}, +doi = {10.1093/nar/gkv351}, +abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, +URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, +eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, +journal = {Nucleic Acids Research} +} + + </citation> + + </citations> +</tool> |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 SNP_density/calculateSlidingWindowsSNPdensitiesFromHapmap.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNP_density/calculateSlidingWindowsSNPdensitiesFromHapmap.sh Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,13 @@ +#!/bin/bash + +tool_path=$(dirname $0) +hapmap=$1 +fileout=$2 +fileout_bysample=$3 +step=$4 + +perl $tool_path/CalculateSlidingWindowsSNPdensitiesFromHapmap.pl -i $hapmap -o $fileout -s $step + +cp $fileout.by_sample $fileout_bysample +rm $fileout.by_sample + |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 SNP_density/test-data/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNP_density/test-data/.svn/entries Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,130 @@ +10 + +dir +39 +svn+ssh://svn.sb-roscoff.fr/svn/logiciel/galaxy/galaxy4sniplay/SNP_density/test-data +svn+ssh://svn.sb-roscoff.fr/svn/logiciel/galaxy + + + +2014-12-05T15:19:39.834618Z +39 +gandres + + + + + + + + + + + + + + +0f93037e-e277-4375-988d-e0ab8f9fda44 + +hapmap +file + + + + +2014-10-10T14:09:09.000000Z +65d1c4316f6d483a62c8bea0f5b9ef2a +2014-12-05T15:19:39.834618Z +39 +gandres + + + + + + + + + + + + + + + + + + + + + +376118 + +result_bysample.txt +file + + + + +2014-12-05T14:21:25.000000Z +4c3d447481c9e55ef17337836c42fc38 +2014-12-05T15:19:39.834618Z +39 +gandres + + + + + + + + + + + + + + + + + + + + + +336 + +result.txt +file + + + + +2014-12-05T14:21:32.000000Z +436c5356d4acc99f8ed023bc681bb787 +2014-12-05T15:19:39.834618Z +39 +gandres + + + + + + + + + + + + + + + + + + + + + +357 + |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 SNP_density/test-data/.svn/text-base/hapmap.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNP_density/test-data/.svn/text-base/hapmap.svn-base Fri Jul 10 04:39:30 2015 -0400 |
[ |
b'@@ -0,0 +1,3584 @@\n+rs#\talleles\tchrom\tpos\tstrand\tassembly#\tcenter\tprotLSID\tassayLSID\tpanelLSID\tQCcode\tref\tBA58\tBA59\tBD54\n+chr1:1774\t[G/T]\tchr1\t1774\tCc01_g00010:47\texon\tNON_SYNONYMOUS_CODING\tgCg/gAg\tA/E\t25.0%\t4\tGG\tTT\tTT\tTT\n+chr1:1640\t[G/A]\tchr1\t1640\tCc01_g00010:127\texon\tNON_SYNONYMOUS_CODING\tCcg/Tcg\tP/S\t37.5%\t4\tGG\tGG\tAA\tGA\n+chr1:1629\t[A/C]\tchr1\t1629\tCc01_g00010:138\texon\tSYNONYMOUS_CODING\tctT/ctG\tL/L\t37.5%\t4\tAA\tCC\tCC\tAC\n+chr1:1628\t[C/G]\tchr1\t1628\tCc01_g00010:139\texon\tNON_SYNONYMOUS_CODING\tGgg/Cgg\tG/R\t12.5%\t4\tCC\tCC\tCC\tCG\n+chr1:1619\t[T/G]\tchr1\t1619\tCc01_g00010:148\texon\tNON_SYNONYMOUS_CODING\tAaa/Caa\tK/Q\t37.5%\t4\tTT\tTT\tGG\tTG\n+chr1:1405\t[C/T]\tchr1\t1405\tCc01_g00010:362\texon\tNON_SYNONYMOUS_CODING\tcGg/cAg\tR/Q\t16.7%\t3\tCC\tCC\tNN\tCT\n+chr1:1398\t[T/G]\tchr1\t1398\tCc01_g00010:369\texon\tSYNONYMOUS_CODING\tgcA/gcC\tA/A\t16.7%\t3\tTT\tTT\tNN\tTG\n+chr1:1394\t[G/C]\tchr1\t1394\tCc01_g00010:373\texon\tNON_SYNONYMOUS_CODING\tCgt/Ggt\tR/G\t16.7%\t3\tGG\tGG\tNN\tGC\n+chr1:1379\t[A/T]\tchr1\t1379\tCc01_g00010:388\texon\tNON_SYNONYMOUS_CODING\tTac/Aac\tY/N\t16.7%\t3\tAA\tAA\tNN\tAT\n+chr1:1352\t[T/C]\tchr1\t1352\tCc01_g00010:415\texon\tNON_SYNONYMOUS_CODING\tAct/Gct\tT/A\t33.3%\t3\tTT\tCC\tNN\tCC\n+chr1:1252\t[T/A]\tchr1\t1252\tCc01_g00010:515\texon\tNON_SYNONYMOUS_CODING\taAg/aTg\tK/M\t12.5%\t4\tTT\tTT\tTT\tTA\n+chr1:17176\t[T/G]\tchr1\t17176\tCc01_g00030:504\texon\tSYNONYMOUS_CODING\tggT/ggG\tG/G\t12.5%\t4\tTT\tTT\tTG\tTT\n+chr1:18041\t[A/G]\tchr1\t18041\tCc01_g00030:661\texon\tNON_SYNONYMOUS_CODING\tAtc/Gtc\tI/V\t37.5%\t4\tAA\tAA\tGG\tAG\n+chr1:18442\t[C/A]\tchr1\t18442\tCc01_g00030:929\texon\tNON_SYNONYMOUS_CODING\ttCc/tAc\tS/Y\t25.0%\t4\tCC\tCC\tCA\tCA\n+chr1:18478\t[T/C]\tchr1\t18478\tCc01_g00030:965\texon\tNON_SYNONYMOUS_CODING\tcTa/cCa\tL/P\t25.0%\t4\tTT\tTT\tTC\tTC\n+chr1:18554\t[T/G]\tchr1\t18554\tCc01_g00030:1041\texon\tSYNONYMOUS_CODING\ttcT/tcG\tS/S\t25.0%\t4\tTT\tTT\tTG\tTG\n+chr1:19062\t[T/A]\tchr1\t19062\tCc01_g00030:1151\texon\tNON_SYNONYMOUS_CODING\ttTc/tAc\tF/Y\t33.3%\t3\tTT\tNN\tTT\tAA\n+chr1:19078\t[A/G]\tchr1\t19078\tCc01_g00030:1167\texon\tSYNONYMOUS_CODING\taaA/aaG\tK/K\t12.5%\t4\tAA\tAA\tAG\tAA\n+chr1:19138\t[T/C]\tchr1\t19138\tCc01_g00030:1227\texon\tSYNONYMOUS_CODING\ttaT/taC\tY/Y\t25.0%\t4\tTT\tTT\tCT\tTC\n+chr1:22125\t[C/T]\tchr1\t22125\tCc01_g00030:1773\texon\tSYNONYMOUS_CODING\tccC/ccT\tP/P\t25.0%\t4\tCC\tCC\tCT\tCT\n+chr1:22212\t[T/C]\tchr1\t22212\tCc01_g00030:1860\texon\tSYNONYMOUS_CODING\tagT/agC\tS/S\t12.5%\t4\tTT\tTT\tTC\tTT\n+chr1:22342\t[T/A]\tchr1\t22342\tCc01_g00030:1990\texon\tNON_SYNONYMOUS_CODING\tTca/Aca\tS/T\t25.0%\t4\tTT\tTT\tTA\tTA\n+chr1:22404\t[C/T]\tchr1\t22404\tCc01_g00030:2052\texon\tSYNONYMOUS_CODING\ttgC/tgT\tC/C\t25.0%\t4\tCC\tCC\tCT\tCT\n+chr1:22478\t[C/G]\tchr1\t22478\tCc01_g00030:2126\texon\tNON_SYNONYMOUS_CODING\taCt/aGt\tT/S\t25.0%\t4\tCC\tGG\tCC\tCC\n+chr1:22613\t[C/T]\tchr1\t22613\tCc01_g00030:2261\texon\tNON_SYNONYMOUS_CODING\tgCg/gTg\tA/V\t25.0%\t4\tCC\tTT\tCC\tCC\n+chr1:22657\t[C/T]\tchr1\t22657\tCc01_g00030:2305\texon\tSYNONYMOUS_CODING\tCta/Tta\tL/L\t12.5%\t4\tCC\tCC\tCT\tCC\n+chr1:22758\t[A/G]\tchr1\t22758\tCc01_g00030:2406\texon\tSYNONYMOUS_CODING\tgaA/gaG\tE/E\t25.0%\t4\tAA\tAA\tAG\tAG\n+chr1:22903\t[C/A]\tchr1\t22903\tCc01_g00030:2551\texon\tNON_SYNONYMOUS_CODING\tCtt/Att\tL/I\t25.0%\t4\tCC\tCC\tCA\tCA\n+chr1:22965\t[G/T]\tchr1\t22965\tCc01_g00030:2613\texon\tNON_SYNONYMOUS_CODING\tatG/atT\tM/I\t12.5%\t4\tGG\tGG\tTG\tGG\n+chr1:23037\t[A/G]\tchr1\t23037\tCc01_g00030:2685\texon\tSYNONYMOUS_CODING\taaA/aaG\tK/K\t25.0%\t4\tAA\tAA\tAG\tAG\n+chr1:24151\t[A/G]\tchr1\t24151\tCc01_g00030:3063\texon\tSYNONYMOUS_CODING\tgcA/gcG\tA/A\t25.0%\t4\tAA\tAA\tAG\tAG\n+chr1:24436\t[T/C]\tchr1\t24436\tCc01_g00030:3240\texon\tSYNONYMOUS_CODING\tagT/agC\tS/S\t25.0%\t4\tTT\tTT\tTC\tTC\n+chr1:25763\t[G/C]\tchr1\t25763\tCc01_g00030:3610\texon\tNON_SYNONYMOUS_CODING\tGta/Cta\tV/L\t25.0%\t4\tGG\tGG\tGC\tGC\n+chr1:26910\t[T/G]\tchr1\t26910\tCc01_g00030:3889\texon\tNON_SYNONYMOUS_CODING\tTct/Gct\tS/A\t25.0%\t4\tTT\tTT\tTG\tTG\n+chr1:26996\t[C/T]\tchr1\t26996\tCc01_g00030:3975\texon\tSYNONYMOUS_CODING\tgaC/gaT\tD/D\t25.0%\t4\tCC\tCC\tCT\tCT\n+chr1:27095\t[A/T]\tchr1\t27095\tCc01_g00030:4074\texon\tSYNONYMOUS_CODING\tccA/ccT\tP/P\t37.5%\t4\tAA\tAA\tTT\tAT\n+chr1:27755\t[C/T]\tchr1\t27755\tCc01_g00030:4734\texon\tSYNONYMOUS_CODING\tagC/agT\tS/S\t25.0%\t4\tCC\tCC\tCT\tCT\n+chr1:30493\t[C/T]\tchr1\t30493\tCc01_g00030:4992\texon\tSYNONYMOUS_CODING\tgaC/gaT\tD/D\t12.5%\t4\tCC\tCC\tCT\tC'..b'n\tSYNONYMOUS_CODING\tacG/acA\tT/T\t25.0%\t4\tCC\tTT\tCC\tCC\n+chr1:3940278\t[G/A]\tchr1\t3940278\tCc01_g02140:861\texon\tSYNONYMOUS_CODING\taaC/aaT\tN/N\t25.0%\t4\tGG\tAA\tGG\tGG\n+chr1:3940190\t[T/C]\tchr1\t3940190\tCc01_g02140:949\texon\tNON_SYNONYMOUS_CODING\tAtc/Gtc\tI/V\t12.5%\t4\tTT\tTT\tTC\tTT\n+chr1:3940062\t[G/A]\tchr1\t3940062\tCc01_g02140:1077\texon\tSYNONYMOUS_CODING\taaC/aaT\tN/N\t37.5%\t4\tGG\tAA\tGA\tAA\n+chr1:3939962\t[C/A]\tchr1\t3939962\tCc01_g02140:1177\texon\tNON_SYNONYMOUS_CODING\tGcc/Tcc\tA/S\t12.5%\t4\tCC\tCA\tCC\tCC\n+chr1:3939959\t[G/T]\tchr1\t3939959\tCc01_g02140:1180\texon\tNON_SYNONYMOUS_CODING\tCtt/Att\tL/I\t25.0%\t4\tGG\tTT\tGG\tGG\n+chr1:3939916\t[C/T]\tchr1\t3939916\tCc01_g02140:1223\texon\tNON_SYNONYMOUS_CODING\taGt/aAt\tS/N\t25.0%\t4\tCC\tTT\tCC\tCC\n+chr1:3938977\t[G/C]\tchr1\t3938977\tCc01_g02140:1335\texon\tSYNONYMOUS_CODING\ttcC/tcG\tS/S\t12.5%\t4\tGG\tGC\tGG\tGG\n+chr1:3938798\t[T/G]\tchr1\t3938798\tCc01_g02140:1369\texon\tNON_SYNONYMOUS_CODING\tAgt/Cgt\tS/R\t37.5%\t4\tTT\tGG\tTG\tTT\n+chr1:3938797\t[C/G]\tchr1\t3938797\tCc01_g02140:1370\texon\tNON_SYNONYMOUS_CODING\taGt/aCt\tS/T\t12.5%\t4\tCC\tCC\tCC\tCG\n+chr1:3938792\t[G/A]\tchr1\t3938792\tCc01_g02140:1375\texon\tSYNONYMOUS_CODING\tCta/Tta\tL/L\t12.5%\t4\tGG\tGG\tGG\tAG\n+chr1:3938784\t[C/T]\tchr1\t3938784\tCc01_g02140:1383\texon\tSYNONYMOUS_CODING\tccG/ccA\tP/P\t25.0%\t4\tCC\tTC\tCT\tCC\n+chr1:3938738\t[C/T]\tchr1\t3938738\tCc01_g02140:1429\texon\tNON_SYNONYMOUS_CODING\tGtt/Att\tV/I\t12.5%\t4\tCC\tCC\tCT\tCC\n+chr1:3938734\t[T/C]\tchr1\t3938734\tCc01_g02140:1433\texon\tNON_SYNONYMOUS_CODING\tcAa/cGa\tQ/R\t25.0%\t4\tTT\tTT\tTC\tCT\n+chr1:3938731\t[A/G]\tchr1\t3938731\tCc01_g02140:1436\texon\tNON_SYNONYMOUS_CODING\tgTg/gCg\tV/A\t12.5%\t4\tAA\tAA\tAG\tAA\n+chr1:3938674\t[C/T]\tchr1\t3938674\tCc01_g02140:1493\texon\tNON_SYNONYMOUS_CODING\taGt/aAt\tS/N\t12.5%\t4\tCC\tCC\tCT\tCC\n+chr1:3950327\t[A/C]\tchr1\t3950327\tCc01_g02150:300\texon\tNON_SYNONYMOUS_CODING\tcaT/caG\tH/Q\t37.5%\t4\tAA\tCC\tAC\tAA\n+chr1:3949508\t[G/T]\tchr1\t3949508\tCc01_g02150:438\texon\tSYNONYMOUS_CODING\ttcC/tcA\tS/S\t12.5%\t4\tGG\tGT\tGG\tGG\n+chr1:3949500\t[T/C]\tchr1\t3949500\tCc01_g02150:446\texon\tNON_SYNONYMOUS_CODING\ttAt/tGt\tY/C\t12.5%\t4\tTT\tTT\tTC\tTT\n+chr1:3949478\t[G/A]\tchr1\t3949478\tCc01_g02150:468\texon\tSYNONYMOUS_CODING\tcgC/cgT\tR/R\t12.5%\t4\tGG\tGG\tGA\tGG\n+chr1:3949470\t[C/T]\tchr1\t3949470\tCc01_g02150:476\texon\tNON_SYNONYMOUS_CODING\taGg/aAg\tR/K\t25.0%\t4\tCC\tTT\tCC\tCC\n+chr1:3949465\t[C/G]\tchr1\t3949465\tCc01_g02150:481\texon\tNON_SYNONYMOUS_CODING\tGgt/Cgt\tG/R\t37.5%\t4\tCC\tGG\tCG\tCC\n+chr1:3949086\t[C/T]\tchr1\t3949086\tCc01_g02150:541\texon\tNON_SYNONYMOUS_CODING\tGtc/Atc\tV/I\t12.5%\t4\tCC\tCC\tCT\tCC\n+chr1:3967535\t[C/T]\tchr1\t3967535\tCc01_g02160:57\texon\tSYNONYMOUS_CODING\tcgG/cgA\tR/R\t12.5%\t4\tCC\tCT\tCC\tCC\n+chr1:3967530\t[A/T]\tchr1\t3967530\tCc01_g02160:62\texon\tNON_SYNONYMOUS_CODING\tcTg/cAg\tL/Q\t12.5%\t4\tAA\tAA\tAA\tAT\n+chr1:3967524\t[G/A]\tchr1\t3967524\tCc01_g02160:68\texon\tNON_SYNONYMOUS_CODING\tcCa/cTa\tP/L\t37.5%\t4\tGG\tAG\tGA\tAG\n+chr1:3967512\t[A/G]\tchr1\t3967512\tCc01_g02160:80\texon\tNON_SYNONYMOUS_CODING\taTa/aCa\tI/T\t12.5%\t4\tAA\tAA\tAG\tAA\n+chr1:3967435\t[A/G]\tchr1\t3967435\tCc01_g02160:157\texon\tNON_SYNONYMOUS_CODING\tTcc/Ccc\tS/P\t25.0%\t4\tAA\tAG\tAA\tAG\n+chr1:3967421\t[G/A]\tchr1\t3967421\tCc01_g02160:171\texon\tSYNONYMOUS_CODING\tacC/acT\tT/T\t12.5%\t4\tGG\tAG\tGG\tGG\n+chr1:3967412\t[A/C]\tchr1\t3967412\tCc01_g02160:180\texon\tSTOP_GAINED\ttaT/taG\tY/0\t37.5%\t4\tAA\tAC\tAC\tAC\n+chr1:3967382\t[A/G]\tchr1\t3967382\tCc01_g02160:210\texon\tSYNONYMOUS_CODING\tccT/ccC\tP/P\t25.0%\t4\tAA\tGA\tGA\tAA\n+chr1:3967354\t[G/T]\tchr1\t3967354\tCc01_g02160:238\texon\tNON_SYNONYMOUS_CODING\tCaa/Aaa\tQ/K\t12.5%\t4\tGG\tGG\tGG\tGT\n+chr1:3967352\t[T/C]\tchr1\t3967352\tCc01_g02160:240\texon\tSYNONYMOUS_CODING\tcaA/caG\tQ/Q\t37.5%\t4\tTT\tCC\tCT\tCC\n+chr1:3967310\t[G/C]\tchr1\t3967310\tCc01_g02160:282\texon\tNON_SYNONYMOUS_CODING\tatC/atG\tI/M\t37.5%\t4\tGG\tGC\tGC\tGC\n+chr1:3967300\t[C/T]\tchr1\t3967300\tCc01_g02160:292\texon\tNON_SYNONYMOUS_CODING\tGat/Aat\tD/N\t12.5%\t4\tCC\tCC\tCC\tTC\n+chr1:3967295\t[G/A]\tchr1\t3967295\tCc01_g02160:297\texon\tSYNONYMOUS_CODING\tatC/atT\tI/I\t12.5%\t4\tGG\tGG\tGA\tGG\n+chr1:3966909\t[T/A]\tchr1\t3966909\tCc01_g02160:683\texon\tNON_SYNONYMOUS_CODING\ttAt/tTt\tY/F\t12.5%\t4\tTT\tTA\tTT\tTT\n+chr1:3966784\t[G/A]\tchr1\t3966784\tCc01_g02160:808\texon\tNON_SYNONYMOUS_CODING\tCtt/Ttt\tL/F\t16.7%\t3\tGG\tNN\tGA\tGG\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 SNP_density/test-data/.svn/text-base/result.txt.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNP_density/test-data/.svn/text-base/result.txt.svn-base Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,21 @@ +Chromosome Position SNPs +chr1 200000 355 +chr1 400000 228 +chr1 600000 63 +chr1 800000 191 +chr1 1000000 88 +chr1 1200000 90 +chr1 1400000 190 +chr1 1600000 267 +chr1 1800000 226 +chr1 2000000 163 +chr1 2200000 357 +chr1 2400000 132 +chr1 2600000 158 +chr1 2800000 104 +chr1 3000000 146 +chr1 3200000 331 +chr1 3400000 181 +chr1 3600000 36 +chr1 3800000 167 +chr1 4000000 110 |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 SNP_density/test-data/.svn/text-base/result_bysample.txt.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNP_density/test-data/.svn/text-base/result_bysample.txt.svn-base Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,21 @@ +Chromosome BA58 BA59 BD54 +chr1 220 197 225 +chr1 130 119 133 +chr1 43 43 40 +chr1 139 167 141 +chr1 62 72 78 +chr1 35 76 38 +chr1 98 123 82 +chr1 133 205 135 +chr1 94 141 151 +chr1 112 116 117 +chr1 228 238 204 +chr1 86 95 96 +chr1 60 106 82 +chr1 60 74 64 +chr1 106 112 115 +chr1 199 220 228 +chr1 151 94 64 +chr1 18 32 27 +chr1 87 90 100 +chr1 54 75 47 |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 SNP_density/test-data/hapmap --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNP_density/test-data/hapmap Fri Jul 10 04:39:30 2015 -0400 |
[ |
b'@@ -0,0 +1,3584 @@\n+rs#\talleles\tchrom\tpos\tstrand\tassembly#\tcenter\tprotLSID\tassayLSID\tpanelLSID\tQCcode\tref\tBA58\tBA59\tBD54\n+chr1:1774\t[G/T]\tchr1\t1774\tCc01_g00010:47\texon\tNON_SYNONYMOUS_CODING\tgCg/gAg\tA/E\t25.0%\t4\tGG\tTT\tTT\tTT\n+chr1:1640\t[G/A]\tchr1\t1640\tCc01_g00010:127\texon\tNON_SYNONYMOUS_CODING\tCcg/Tcg\tP/S\t37.5%\t4\tGG\tGG\tAA\tGA\n+chr1:1629\t[A/C]\tchr1\t1629\tCc01_g00010:138\texon\tSYNONYMOUS_CODING\tctT/ctG\tL/L\t37.5%\t4\tAA\tCC\tCC\tAC\n+chr1:1628\t[C/G]\tchr1\t1628\tCc01_g00010:139\texon\tNON_SYNONYMOUS_CODING\tGgg/Cgg\tG/R\t12.5%\t4\tCC\tCC\tCC\tCG\n+chr1:1619\t[T/G]\tchr1\t1619\tCc01_g00010:148\texon\tNON_SYNONYMOUS_CODING\tAaa/Caa\tK/Q\t37.5%\t4\tTT\tTT\tGG\tTG\n+chr1:1405\t[C/T]\tchr1\t1405\tCc01_g00010:362\texon\tNON_SYNONYMOUS_CODING\tcGg/cAg\tR/Q\t16.7%\t3\tCC\tCC\tNN\tCT\n+chr1:1398\t[T/G]\tchr1\t1398\tCc01_g00010:369\texon\tSYNONYMOUS_CODING\tgcA/gcC\tA/A\t16.7%\t3\tTT\tTT\tNN\tTG\n+chr1:1394\t[G/C]\tchr1\t1394\tCc01_g00010:373\texon\tNON_SYNONYMOUS_CODING\tCgt/Ggt\tR/G\t16.7%\t3\tGG\tGG\tNN\tGC\n+chr1:1379\t[A/T]\tchr1\t1379\tCc01_g00010:388\texon\tNON_SYNONYMOUS_CODING\tTac/Aac\tY/N\t16.7%\t3\tAA\tAA\tNN\tAT\n+chr1:1352\t[T/C]\tchr1\t1352\tCc01_g00010:415\texon\tNON_SYNONYMOUS_CODING\tAct/Gct\tT/A\t33.3%\t3\tTT\tCC\tNN\tCC\n+chr1:1252\t[T/A]\tchr1\t1252\tCc01_g00010:515\texon\tNON_SYNONYMOUS_CODING\taAg/aTg\tK/M\t12.5%\t4\tTT\tTT\tTT\tTA\n+chr1:17176\t[T/G]\tchr1\t17176\tCc01_g00030:504\texon\tSYNONYMOUS_CODING\tggT/ggG\tG/G\t12.5%\t4\tTT\tTT\tTG\tTT\n+chr1:18041\t[A/G]\tchr1\t18041\tCc01_g00030:661\texon\tNON_SYNONYMOUS_CODING\tAtc/Gtc\tI/V\t37.5%\t4\tAA\tAA\tGG\tAG\n+chr1:18442\t[C/A]\tchr1\t18442\tCc01_g00030:929\texon\tNON_SYNONYMOUS_CODING\ttCc/tAc\tS/Y\t25.0%\t4\tCC\tCC\tCA\tCA\n+chr1:18478\t[T/C]\tchr1\t18478\tCc01_g00030:965\texon\tNON_SYNONYMOUS_CODING\tcTa/cCa\tL/P\t25.0%\t4\tTT\tTT\tTC\tTC\n+chr1:18554\t[T/G]\tchr1\t18554\tCc01_g00030:1041\texon\tSYNONYMOUS_CODING\ttcT/tcG\tS/S\t25.0%\t4\tTT\tTT\tTG\tTG\n+chr1:19062\t[T/A]\tchr1\t19062\tCc01_g00030:1151\texon\tNON_SYNONYMOUS_CODING\ttTc/tAc\tF/Y\t33.3%\t3\tTT\tNN\tTT\tAA\n+chr1:19078\t[A/G]\tchr1\t19078\tCc01_g00030:1167\texon\tSYNONYMOUS_CODING\taaA/aaG\tK/K\t12.5%\t4\tAA\tAA\tAG\tAA\n+chr1:19138\t[T/C]\tchr1\t19138\tCc01_g00030:1227\texon\tSYNONYMOUS_CODING\ttaT/taC\tY/Y\t25.0%\t4\tTT\tTT\tCT\tTC\n+chr1:22125\t[C/T]\tchr1\t22125\tCc01_g00030:1773\texon\tSYNONYMOUS_CODING\tccC/ccT\tP/P\t25.0%\t4\tCC\tCC\tCT\tCT\n+chr1:22212\t[T/C]\tchr1\t22212\tCc01_g00030:1860\texon\tSYNONYMOUS_CODING\tagT/agC\tS/S\t12.5%\t4\tTT\tTT\tTC\tTT\n+chr1:22342\t[T/A]\tchr1\t22342\tCc01_g00030:1990\texon\tNON_SYNONYMOUS_CODING\tTca/Aca\tS/T\t25.0%\t4\tTT\tTT\tTA\tTA\n+chr1:22404\t[C/T]\tchr1\t22404\tCc01_g00030:2052\texon\tSYNONYMOUS_CODING\ttgC/tgT\tC/C\t25.0%\t4\tCC\tCC\tCT\tCT\n+chr1:22478\t[C/G]\tchr1\t22478\tCc01_g00030:2126\texon\tNON_SYNONYMOUS_CODING\taCt/aGt\tT/S\t25.0%\t4\tCC\tGG\tCC\tCC\n+chr1:22613\t[C/T]\tchr1\t22613\tCc01_g00030:2261\texon\tNON_SYNONYMOUS_CODING\tgCg/gTg\tA/V\t25.0%\t4\tCC\tTT\tCC\tCC\n+chr1:22657\t[C/T]\tchr1\t22657\tCc01_g00030:2305\texon\tSYNONYMOUS_CODING\tCta/Tta\tL/L\t12.5%\t4\tCC\tCC\tCT\tCC\n+chr1:22758\t[A/G]\tchr1\t22758\tCc01_g00030:2406\texon\tSYNONYMOUS_CODING\tgaA/gaG\tE/E\t25.0%\t4\tAA\tAA\tAG\tAG\n+chr1:22903\t[C/A]\tchr1\t22903\tCc01_g00030:2551\texon\tNON_SYNONYMOUS_CODING\tCtt/Att\tL/I\t25.0%\t4\tCC\tCC\tCA\tCA\n+chr1:22965\t[G/T]\tchr1\t22965\tCc01_g00030:2613\texon\tNON_SYNONYMOUS_CODING\tatG/atT\tM/I\t12.5%\t4\tGG\tGG\tTG\tGG\n+chr1:23037\t[A/G]\tchr1\t23037\tCc01_g00030:2685\texon\tSYNONYMOUS_CODING\taaA/aaG\tK/K\t25.0%\t4\tAA\tAA\tAG\tAG\n+chr1:24151\t[A/G]\tchr1\t24151\tCc01_g00030:3063\texon\tSYNONYMOUS_CODING\tgcA/gcG\tA/A\t25.0%\t4\tAA\tAA\tAG\tAG\n+chr1:24436\t[T/C]\tchr1\t24436\tCc01_g00030:3240\texon\tSYNONYMOUS_CODING\tagT/agC\tS/S\t25.0%\t4\tTT\tTT\tTC\tTC\n+chr1:25763\t[G/C]\tchr1\t25763\tCc01_g00030:3610\texon\tNON_SYNONYMOUS_CODING\tGta/Cta\tV/L\t25.0%\t4\tGG\tGG\tGC\tGC\n+chr1:26910\t[T/G]\tchr1\t26910\tCc01_g00030:3889\texon\tNON_SYNONYMOUS_CODING\tTct/Gct\tS/A\t25.0%\t4\tTT\tTT\tTG\tTG\n+chr1:26996\t[C/T]\tchr1\t26996\tCc01_g00030:3975\texon\tSYNONYMOUS_CODING\tgaC/gaT\tD/D\t25.0%\t4\tCC\tCC\tCT\tCT\n+chr1:27095\t[A/T]\tchr1\t27095\tCc01_g00030:4074\texon\tSYNONYMOUS_CODING\tccA/ccT\tP/P\t37.5%\t4\tAA\tAA\tTT\tAT\n+chr1:27755\t[C/T]\tchr1\t27755\tCc01_g00030:4734\texon\tSYNONYMOUS_CODING\tagC/agT\tS/S\t25.0%\t4\tCC\tCC\tCT\tCT\n+chr1:30493\t[C/T]\tchr1\t30493\tCc01_g00030:4992\texon\tSYNONYMOUS_CODING\tgaC/gaT\tD/D\t12.5%\t4\tCC\tCC\tCT\tC'..b'n\tSYNONYMOUS_CODING\tacG/acA\tT/T\t25.0%\t4\tCC\tTT\tCC\tCC\n+chr1:3940278\t[G/A]\tchr1\t3940278\tCc01_g02140:861\texon\tSYNONYMOUS_CODING\taaC/aaT\tN/N\t25.0%\t4\tGG\tAA\tGG\tGG\n+chr1:3940190\t[T/C]\tchr1\t3940190\tCc01_g02140:949\texon\tNON_SYNONYMOUS_CODING\tAtc/Gtc\tI/V\t12.5%\t4\tTT\tTT\tTC\tTT\n+chr1:3940062\t[G/A]\tchr1\t3940062\tCc01_g02140:1077\texon\tSYNONYMOUS_CODING\taaC/aaT\tN/N\t37.5%\t4\tGG\tAA\tGA\tAA\n+chr1:3939962\t[C/A]\tchr1\t3939962\tCc01_g02140:1177\texon\tNON_SYNONYMOUS_CODING\tGcc/Tcc\tA/S\t12.5%\t4\tCC\tCA\tCC\tCC\n+chr1:3939959\t[G/T]\tchr1\t3939959\tCc01_g02140:1180\texon\tNON_SYNONYMOUS_CODING\tCtt/Att\tL/I\t25.0%\t4\tGG\tTT\tGG\tGG\n+chr1:3939916\t[C/T]\tchr1\t3939916\tCc01_g02140:1223\texon\tNON_SYNONYMOUS_CODING\taGt/aAt\tS/N\t25.0%\t4\tCC\tTT\tCC\tCC\n+chr1:3938977\t[G/C]\tchr1\t3938977\tCc01_g02140:1335\texon\tSYNONYMOUS_CODING\ttcC/tcG\tS/S\t12.5%\t4\tGG\tGC\tGG\tGG\n+chr1:3938798\t[T/G]\tchr1\t3938798\tCc01_g02140:1369\texon\tNON_SYNONYMOUS_CODING\tAgt/Cgt\tS/R\t37.5%\t4\tTT\tGG\tTG\tTT\n+chr1:3938797\t[C/G]\tchr1\t3938797\tCc01_g02140:1370\texon\tNON_SYNONYMOUS_CODING\taGt/aCt\tS/T\t12.5%\t4\tCC\tCC\tCC\tCG\n+chr1:3938792\t[G/A]\tchr1\t3938792\tCc01_g02140:1375\texon\tSYNONYMOUS_CODING\tCta/Tta\tL/L\t12.5%\t4\tGG\tGG\tGG\tAG\n+chr1:3938784\t[C/T]\tchr1\t3938784\tCc01_g02140:1383\texon\tSYNONYMOUS_CODING\tccG/ccA\tP/P\t25.0%\t4\tCC\tTC\tCT\tCC\n+chr1:3938738\t[C/T]\tchr1\t3938738\tCc01_g02140:1429\texon\tNON_SYNONYMOUS_CODING\tGtt/Att\tV/I\t12.5%\t4\tCC\tCC\tCT\tCC\n+chr1:3938734\t[T/C]\tchr1\t3938734\tCc01_g02140:1433\texon\tNON_SYNONYMOUS_CODING\tcAa/cGa\tQ/R\t25.0%\t4\tTT\tTT\tTC\tCT\n+chr1:3938731\t[A/G]\tchr1\t3938731\tCc01_g02140:1436\texon\tNON_SYNONYMOUS_CODING\tgTg/gCg\tV/A\t12.5%\t4\tAA\tAA\tAG\tAA\n+chr1:3938674\t[C/T]\tchr1\t3938674\tCc01_g02140:1493\texon\tNON_SYNONYMOUS_CODING\taGt/aAt\tS/N\t12.5%\t4\tCC\tCC\tCT\tCC\n+chr1:3950327\t[A/C]\tchr1\t3950327\tCc01_g02150:300\texon\tNON_SYNONYMOUS_CODING\tcaT/caG\tH/Q\t37.5%\t4\tAA\tCC\tAC\tAA\n+chr1:3949508\t[G/T]\tchr1\t3949508\tCc01_g02150:438\texon\tSYNONYMOUS_CODING\ttcC/tcA\tS/S\t12.5%\t4\tGG\tGT\tGG\tGG\n+chr1:3949500\t[T/C]\tchr1\t3949500\tCc01_g02150:446\texon\tNON_SYNONYMOUS_CODING\ttAt/tGt\tY/C\t12.5%\t4\tTT\tTT\tTC\tTT\n+chr1:3949478\t[G/A]\tchr1\t3949478\tCc01_g02150:468\texon\tSYNONYMOUS_CODING\tcgC/cgT\tR/R\t12.5%\t4\tGG\tGG\tGA\tGG\n+chr1:3949470\t[C/T]\tchr1\t3949470\tCc01_g02150:476\texon\tNON_SYNONYMOUS_CODING\taGg/aAg\tR/K\t25.0%\t4\tCC\tTT\tCC\tCC\n+chr1:3949465\t[C/G]\tchr1\t3949465\tCc01_g02150:481\texon\tNON_SYNONYMOUS_CODING\tGgt/Cgt\tG/R\t37.5%\t4\tCC\tGG\tCG\tCC\n+chr1:3949086\t[C/T]\tchr1\t3949086\tCc01_g02150:541\texon\tNON_SYNONYMOUS_CODING\tGtc/Atc\tV/I\t12.5%\t4\tCC\tCC\tCT\tCC\n+chr1:3967535\t[C/T]\tchr1\t3967535\tCc01_g02160:57\texon\tSYNONYMOUS_CODING\tcgG/cgA\tR/R\t12.5%\t4\tCC\tCT\tCC\tCC\n+chr1:3967530\t[A/T]\tchr1\t3967530\tCc01_g02160:62\texon\tNON_SYNONYMOUS_CODING\tcTg/cAg\tL/Q\t12.5%\t4\tAA\tAA\tAA\tAT\n+chr1:3967524\t[G/A]\tchr1\t3967524\tCc01_g02160:68\texon\tNON_SYNONYMOUS_CODING\tcCa/cTa\tP/L\t37.5%\t4\tGG\tAG\tGA\tAG\n+chr1:3967512\t[A/G]\tchr1\t3967512\tCc01_g02160:80\texon\tNON_SYNONYMOUS_CODING\taTa/aCa\tI/T\t12.5%\t4\tAA\tAA\tAG\tAA\n+chr1:3967435\t[A/G]\tchr1\t3967435\tCc01_g02160:157\texon\tNON_SYNONYMOUS_CODING\tTcc/Ccc\tS/P\t25.0%\t4\tAA\tAG\tAA\tAG\n+chr1:3967421\t[G/A]\tchr1\t3967421\tCc01_g02160:171\texon\tSYNONYMOUS_CODING\tacC/acT\tT/T\t12.5%\t4\tGG\tAG\tGG\tGG\n+chr1:3967412\t[A/C]\tchr1\t3967412\tCc01_g02160:180\texon\tSTOP_GAINED\ttaT/taG\tY/0\t37.5%\t4\tAA\tAC\tAC\tAC\n+chr1:3967382\t[A/G]\tchr1\t3967382\tCc01_g02160:210\texon\tSYNONYMOUS_CODING\tccT/ccC\tP/P\t25.0%\t4\tAA\tGA\tGA\tAA\n+chr1:3967354\t[G/T]\tchr1\t3967354\tCc01_g02160:238\texon\tNON_SYNONYMOUS_CODING\tCaa/Aaa\tQ/K\t12.5%\t4\tGG\tGG\tGG\tGT\n+chr1:3967352\t[T/C]\tchr1\t3967352\tCc01_g02160:240\texon\tSYNONYMOUS_CODING\tcaA/caG\tQ/Q\t37.5%\t4\tTT\tCC\tCT\tCC\n+chr1:3967310\t[G/C]\tchr1\t3967310\tCc01_g02160:282\texon\tNON_SYNONYMOUS_CODING\tatC/atG\tI/M\t37.5%\t4\tGG\tGC\tGC\tGC\n+chr1:3967300\t[C/T]\tchr1\t3967300\tCc01_g02160:292\texon\tNON_SYNONYMOUS_CODING\tGat/Aat\tD/N\t12.5%\t4\tCC\tCC\tCC\tTC\n+chr1:3967295\t[G/A]\tchr1\t3967295\tCc01_g02160:297\texon\tSYNONYMOUS_CODING\tatC/atT\tI/I\t12.5%\t4\tGG\tGG\tGA\tGG\n+chr1:3966909\t[T/A]\tchr1\t3966909\tCc01_g02160:683\texon\tNON_SYNONYMOUS_CODING\ttAt/tTt\tY/F\t12.5%\t4\tTT\tTA\tTT\tTT\n+chr1:3966784\t[G/A]\tchr1\t3966784\tCc01_g02160:808\texon\tNON_SYNONYMOUS_CODING\tCtt/Ttt\tL/F\t16.7%\t3\tGG\tNN\tGA\tGG\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 SNP_density/test-data/result.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNP_density/test-data/result.txt Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,21 @@ +Chromosome Position SNPs +chr1 200000 355 +chr1 400000 228 +chr1 600000 63 +chr1 800000 191 +chr1 1000000 88 +chr1 1200000 90 +chr1 1400000 190 +chr1 1600000 267 +chr1 1800000 226 +chr1 2000000 163 +chr1 2200000 357 +chr1 2400000 132 +chr1 2600000 158 +chr1 2800000 104 +chr1 3000000 146 +chr1 3200000 331 +chr1 3400000 181 +chr1 3600000 36 +chr1 3800000 167 +chr1 4000000 110 |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 SNP_density/test-data/result_bysample.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNP_density/test-data/result_bysample.txt Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,21 @@ +Chromosome BA58 BA59 BD54 +chr1 220 197 225 +chr1 130 119 133 +chr1 43 43 40 +chr1 139 167 141 +chr1 62 72 78 +chr1 35 76 38 +chr1 98 123 82 +chr1 133 205 135 +chr1 94 141 151 +chr1 112 116 117 +chr1 228 238 204 +chr1 86 95 96 +chr1 60 106 82 +chr1 60 74 64 +chr1 106 112 115 +chr1 199 220 228 +chr1 151 94 64 +chr1 18 32 27 +chr1 87 90 100 +chr1 54 75 47 |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCF2Hapmap/VCF2FastaAndHapmap.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCF2Hapmap/VCF2FastaAndHapmap.pl Fri Jul 10 04:39:30 2015 -0400 |
[ |
b'@@ -0,0 +1,525 @@\n+\n+#!/usr/bin/perl\n+\n+use strict;\n+use Getopt::Long;\n+\n+my $usage = qq~Usage:$0 <args> [<opts>]\n+\n+where <args> are:\n+\n+ -v, --vcf <VCF input>\n+ -o, --out <Output basename>\n+ \n+<opts> are:\n+\n+ -r, --reference <Reference fasta file>\n+ -g, --gff <GFF input file to create alignments of genes>\n+~;\n+$usage .= "\\n";\n+\n+my ($input,$out,$reference,$gff);\n+\n+\n+\n+GetOptions(\n+\t"vcf=s" => \\$input,\n+\t"out=s" => \\$out,\n+\t"reference=s" => \\$reference,\n+\t"gff=s" => \\$gff\n+);\n+\n+\n+die $usage\n+ if ( !$input || !$out);\n+ \n+if ($gff && !$reference)\n+{\n+\tdie "You must provide a Fasta reference file when providing GFF annotation\\n";\n+}\n+\n+ \n+my %ref_sequences; \n+if ($reference)\n+{\n+\tmy $id;\n+\tmy $sequence = "";\n+\topen(my $R,$reference) or die "cannot open file: $reference";\n+\twhile(<$R>)\n+\t{\n+\t\tmy $line =$_;\n+\t\t$line =~s/\\n//g;\n+\t\t$line =~s/\\r//g;\n+\t\tif ($line =~ />([^\\s]+)/){\n+\t\t\t$ref_sequences{$id} = $sequence;\n+\t\t\t$id=$1;$sequence="";\n+\t\t}\n+\t\telse\n+\t\t{\n+\t\t\t$sequence .= $line;\n+\t\t}\n+\t}\n+\tclose($R);\n+\t$ref_sequences{$id} = $sequence;\n+}\n+\n+\n+my %chr_of_gene;\n+my %ann;\n+if ($gff)\n+{\n+\topen(my $G,$gff) or die "cannot open file: $gff";\n+\twhile(<$G>)\n+\t{\n+\t\tmy $line =$_;\n+\t\t$line =~s/\\n//g;\n+\t\t$line =~s/\\r//g;\n+\t\tmy @i = split(/\\t/,$line);\n+\t\tmy $chr = $i[0];\n+\t\tmy $feature = $i[2];\n+\t\tmy $strand = $i[6];\n+\t\tmy $start = $i[3];\n+\t\tmy $stop = $i[4];\n+\t\tmy $inf = $i[8];\n+\t\tif ($feature eq \'gene\')\n+\t\t{\n+\t\t\t if ($inf =~/Name=([\\w\\-\\.]+)[;\\s]*/){$inf = $1;}\n+\t\t\t$ann{$inf}{"start"}=$start;\n+\t\t\t$ann{$inf}{"stop"}=$stop;\n+\t\t\t$ann{$inf}{"strand"}=$strand;\n+\t\t\t$chr_of_gene{$inf} = $chr;\n+\t\t}\n+\t}\n+\tclose($G);\n+}\n+\n+\n+\n+my %IUPAC =\n+(\n+ \'[A/G]\'=> "R",\n+ \'[G/A]\'=> "R",\n+ \'[C/T]\'=> "Y",\n+ \'[T/C]\'=> "Y",\n+ \'[T/G]\'=> "K",\n+ \'[G/T]\'=> "K",\n+ \'[C/G]\'=> "S",\n+ \'[G/C]\'=> "S",\n+ \'[A/T]\'=> "W",\n+ \'[T/A]\'=> "W",\n+ \'[A/C]\'=> "M",\n+ \'[C/A]\'=> "M",\n+ \'[C/A/T]\'=> "H",\n+ \'[A/T/C]\'=> "H",\n+ \'[A/C/T]\'=> "H",\n+ \'[C/T/A]\'=> "H",\n+ \'[T/C/A]\'=> "H",\n+ \'[T/A/C]\'=> "H",\n+ \'[C/A/G]\'=> "V",\n+ \'[A/G/C]\'=> "V",\n+ \'[A/C/G]\'=> "V",\n+ \'[C/G/A]\'=> "V",\n+ \'[G/C/A]\'=> "V",\n+ \'[G/A/C]\'=> "V",\n+ \'[C/T/G]\'=> "B",\n+ \'[T/G/C]\'=> "B",\n+ \'[T/C/G]\'=> "B",\n+ \'[C/G/T]\'=> "B",\n+ \'[G/C/T]\'=> "B",\n+ \'[G/T/C]\'=> "B",\n+ \'[T/A/G]\'=> "D",\n+ \'[A/G/T]\'=> "D",\n+ \'[A/T/G]\'=> "D",\n+ \'[T/G/A]\'=> "D",\n+ \'[G/T/A]\'=> "D",\n+ \'[G/A/T]\'=> "D",\n+);\n+\n+my %snps_of_gene;\n+my %snps_of_gene2;\n+my %indiv_order;\n+my $indiv_list;\n+my %genotyping_infos;\n+my $num_line = 0;\n+my $genename_rank_in_snpeff = 4;\n+\n+my $find_annotations = `grep -c \'EFF=\' $input`;\n+\n+open(my $HAPMAP,">$out.hapmap");\n+print $HAPMAP "rs#\talleles\tchrom\tpos\tgene\tfeature\teffect\tcodon_change\tamino_acid_change\tMAF\tmissing_data";\n+open(my $VCF,$input);\n+while(<$VCF>)\n+{\n+\tmy $line = $_;\n+\tchomp($line);\n+\tmy @infos = split(/\\t/,$line);\n+\t\n+\tif (/^##INFO=\\<ID=EFF/ && /Amino_Acid_length \\| Gene_Name \\| Transcript_BioType \\| Gene_Coding/)\n+\t{\n+\t\t$genename_rank_in_snpeff = 8;\n+\t}\n+\n+\tif (scalar @infos > 9)\n+\t{\n+\t\tif (/#CHROM/)\n+\t\t{\n+\t\t\tfor (my $j=9;$j<=$#infos;$j++)\n+\t\t\t{\n+\t\t\t\tmy $individu = $infos[$j];\n+\t\t\t\t$indiv_list .= "\t$individu";\n+\t\t\t\t$indiv_order{$j} = $individu;\n+\t\t\t}\n+\t\t\tprint $HAPMAP "$indiv_list\\n";\n+\t\t}\n+\t\telsif (!/^#/)\n+\t\t{\n+\t\t\t$num_line++;\n+\n+\t\t\tmy $chromosome = $infos[0];\n+\t\t\tmy $chromosome_position = $infos[1];\n+\t\t\tmy $ref_allele = $infos[3];\n+\t\t\tmy $alt_allele = $infos[4];\n+ \t\n+\t\t\tif ($ref_allele =~/\\w\\w+/)\n+\t\t\t{\n+\t\t\t\t$ref_allele = "A";\n+\t\t\t\t$alt_allele = "T";\n+\t\t\t}\n+\t\t\telsif ($alt_allele =~/\\w\\w+/)\n+\t\t\t{\n+\t\t\t\t$ref_allele = "T";\n+\t\t\t\t$alt_allele = "A";\n+\t\t\t}\n+\t\t\t\n+\t\t\tmy $info = $infos[7];\n+\t\t\tmy $is_in_exon = "#";\n+\t\t\tmy $is_synonyme = "#";\n+\t\t\tmy $gene;\n+\t\t\tif ($find_annotations > 1)\n+\t\t\t{\n+'..b'\n+\t\t\tmy $stop = $ann{$seq}{"stop"};\n+\t\t\tmy $strand = $ann{$seq}{"strand"};\n+\t\t\tmy $genelength = $stop - $start+1;\n+\t\t\tmy $chr = $chr_of_gene{$seq};\n+\t\tmy $refseq = substr($ref_sequences{$chr},$start-1,$genelength);\n+\t\tif ($strand eq \'-\')\n+\t\t{\n+\t\t\t$refseq =~ tr /atcgATCG/tagcTAGC/; $refseq = reverse($refseq);\n+\t\t}\t\n+\t\t#print "$seq $chr $start $stop $refseq \\n";\n+\t\tmy $previous = 0;\n+\t\tforeach my $pos(sort {$a<=>$b} keys(%hashreal))\n+\t\t{\n+\t\t\tmy $length = $pos - $previous - 1;\n+\t\t\t$flanking5{$pos} = substr($refseq,$previous,$length);\n+\t\t\t$previous = $pos;\n+\t\t}\n+\t\tmy $length = length($refseq) - $previous;\n+\t\tmy $flanking3 = substr($refseq,$previous,$length);\n+\t\tforeach my $ind(@individuals_list)\n+\t\t{\n+\t\t\tmy $nb_missing_data_for_this_individual = 0;\n+\t\t\tif ($ind)\n+\t\t\t{\n+ my $alignment_for_ind = "";\n+ my $seq_without_underscore = $seq;\n+ $seq_without_underscore =~s/_//g;\n+ $alignment_for_ind .= ">$seq_without_underscore" . "_$ind" . "_1\\n";\n+ foreach my $pos(sort {$a<=>$b} keys(%hashreal))\n+ {\n+ $alignment_for_ind .= $flanking5{$pos};\n+ my $geno = $snps_of_gene{$seq}{$pos}{$ind};\n+ $geno =~s/N/?/g;\n+ if ($geno =~/\\?/){$nb_missing_data_for_this_individual++;}\n+ my @alleles = split("",$geno);\n+ $alignment_for_ind .= $alleles[0];\n+ if ($alleles[0] eq $alleles[1])\n+ {\n+ $alignments_ind{$ind} .= $alleles[1];\n+ }\n+ else\n+ {\n+ my $snp_type = "[" . $alleles[0] . "/" . $alleles[1] . "]";\n+ $alignments_ind{$ind} .= $IUPAC{$snp_type};\n+ }\n+ }\n+ $alignment_for_ind .= $flanking3;\n+\t\t\t\t\t\t$alignment_for_ind .= "\\n";\n+\t\t\t\n+\t\t\t\n+ $alignment_for_ind .= ">$seq_without_underscore" . "_$ind" . "_2\\n";\n+ foreach my $pos(sort {$a<=>$b} keys(%hashreal))\n+ {\n+ $alignment_for_ind .= $flanking5{$pos};\n+ my $geno = $snps_of_gene{$seq}{$pos}{$ind};\n+ $geno =~s/N/?/g;\n+ my @alleles = split("",$geno);\n+ $alignment_for_ind .= $alleles[1];\n+ }\n+ $alignment_for_ind .= $flanking3;\n+\t\t\t\t\t\t$alignment_for_ind .= "\\n";\n+ if (keys(%hashreal) != $nb_missing_data_for_this_individual)\n+ {\n+ print $ALIGN_EGGLIB $alignment_for_ind;\n+ }\n+\t\t\t}\n+\t\t}\n+\t}\n+}\n+close($ALIGN_EGGLIB);\n+\n+\n+\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCF2Hapmap/vcf2FastaAndHapmap.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCF2Hapmap/vcf2FastaAndHapmap.sh Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,45 @@ +#!/bin/bash + +tool_path=$(dirname $0) + + + +filein=$1 +fileout_label=$2 +fileout=$3 +option=$4 + +option_text='' + + +if [ "$option" != "none" ] +then fileout_seq=$5 + fileout_fa1=$6 + filefasta=$7 + if [ "$option" == "fasta_gff" ] + then filegff=$8 + fi +fi + +if [ "$option" == "fasta" ] +then option_text="--reference $filefasta" +fi + +if [ "$option" == "fasta_gff" ] +then option_text="--reference $filefasta --gff $filegff" +fi + + +perl $tool_path/VCF2FastaAndHapmap.pl --vcf $filein --out $fileout_label $option_text + + +cp $fileout_label.hapmap $fileout ; rm $fileout_label.hapmap + +if [ "$option" == "fasta_gff" ] +then cp $fileout_label.flanking.txt $fileout_seq ; rm $fileout_label.flanking.txt ; cp $fileout_label.gene_alignment.fas $fileout_fa1 ; rm $fileout_label.gene_alignment.fas ; +fi + +if [ "$option" == "fasta" ] +then cp $fileout_label.flanking.txt $fileout_seq ; rm $fileout_label.flanking.txt ; +fi + |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCF2Hapmap/vcf2FastaAndHapmap.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCF2Hapmap/vcf2FastaAndHapmap.xml Fri Jul 10 04:39:30 2015 -0400 |
[ |
b'@@ -0,0 +1,251 @@\n+<tool id="sniplay_vcf2fastaandhapmap" name="VCF to Hapmap" version="1.1.0">\n+ \n+ <!-- [REQUIRED] Tool description displayed after the tool name -->\n+ <description> Convert VCF to Hapmap </description>\n+ \n+ <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->\n+ <requirements>\n+ <requirement type="binary">perl</requirement>\n+ </requirements>\n+ \n+ <!-- [OPTIONAL] Command to be executed to get the tool\'s version string -->\n+ <version_command>\n+<!--\n+ tool_binary -v\n+-->\n+ </version_command>\n+ \n+ <!-- [REQUIRED] The command to execute -->\n+ <command interpreter="bash">\n+\tvcf2FastaAndHapmap.sh $filein $fileout_label $fileout $optional.file_opt \n+\t#if str( $optional.file_opt ) != "none":\n+\t\t$fileout_seq $fileout_fa1 $filefasta \n+\t\t#if str( $optional.file_opt ) == "fasta_gff":\n+\t\t$filegff\n+\t\t#end if\n+\t#end if\n+ </command>\n+ \n+ <!-- [REQUIRED] Input files and tool parameters -->\n+ <inputs>\n+\t<param name="filein" type="data" format="vcf" optional="false" label="VCF input" />\n+\t<param name="fileout_label" type="text" value="input" optional="false" label="Output file basename"/>\n+\t<conditional name="optional" >\n+\t <param name="file_opt" type="select" label="Optional files" >\n+\t \t<option value="none" selected="true">No</option>\n+\t \t<option value="fasta">Fasta</option>\n+ \t<option value="fasta_gff">Fasta and GFF</option>\n+ </param>\n+\t <when value="none" />\n+ <when value="fasta">\n+\t\t<param name="filefasta" type="data" format="fasta" optional="false" label="Fasta file input" />\n+ </when>\n+\t <when value="fasta_gff">\n+\t\t<param name="filefasta" type="data" format="fasta" optional="false" label="Fasta file input" />\n+\t\t<param name="filegff" type="data" format="gff" optional="false" label="GFF file input" help="VCF file must be annotated" />\n+ </when>\n+ </conditional>\n+ </inputs>\n+ \n+ <!-- [REQUIRED] Output files -->\n+ <outputs>\n+\t<data name="fileout" format="txt" label="${fileout_label}.hapmap" />\n+\t<data name="fileout_seq" format="txt" label="${fileout_label}.flanking.txt">\n+\t\t<filter>(optional[\'file_opt\'] != \'none\')</filter>\n+\t</data>\n+\t<data name="fileout_fa1" format="fasta" label="${fileout_label}.gene_alignment.fas">\n+\t\t<filter>(optional[\'file_opt\'] == \'fasta_gff\')</filter>\n+\t</data>\n+ </outputs>\n+ \n+ <!-- [STRONGLY RECOMMANDED] Exit code rules -->\n+ <stdio>\n+ <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->\n+ <exit_code range="1:" level="fatal" />\n+ </stdio>\n+ \n+ <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->\n+ <tests>\n+ <!-- [HELP] Test files have to be in the ~/test-data directory -->\n+ <test>\n+ <param name="filein" value="sample.vcf" />\n+ <param name="otpional.file_opt" value="none" />\n+ <output name="fileout" file="result1.hapmap" />\n+ </test>\n+\t<test>\n+ <param name="filein" value="sample.vcf" />\n+\t <param name="otpional.file_opt" value="fasta" />\n+ <param name="filefasta" value="reference.fa" />\n+ <output name="fileout" file="result2.hapmap" />\n+ <output name="fileout_seq" file="result2.flanking.txt" />\n+ <output name="fileout_fa1" file="result2.gene_alignment.fas" />\n+ </test>\n+ </tests>\n+ \n+ <!-- [OPTIONAL] Help displayed in Galaxy -->\n+ <help>\n+\n+\n+.. class:: infomark\n+\n+**Authors** Dereeper Alexis (alexis.dereeper@ird.fr), IRD, South Green platform\n+\n+ | **Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1).\n+\n+.. class:: infomark\n+\n+**Galaxy integration** Andres Gwendoline, Institut Fran\xc3\xa7ais de Bioinformatique. \n+\n+.. class:: infomark\n+\n+**Support** For any questions, please send an e-mail '..b's fasta file and GFF file.\n+\n+------------\n+Output files\n+------------\n+\n+Hapmap file \n+\tHapmap converted file\n+\n+Additional files \n+\tIf you add fasta and/or GFF file as reference, you obtain 3 more files : One with flanking sequence and a fasta file\n+\n+---------------------------------------------------\n+\n+---------------\n+Working example\n+---------------\n+\n+Input files\n+===========\n+\n+VCF file\n+---------\n+\n+::\n+\n+\t#fileformat=VCFv4.1\n+\t#FILTER=<ID=LowQual,Description="Low quality">\n+\t#FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+\t[...]\n+\tCHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tCATB1\n+\tchr1\t2209\t.\tG\tT\t213.84\t.\tAC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)\tGT:AD:DP:GQ:PL\t1/1:0,7:7:18:242,18,0\n+\n+Fasta file\n+----------\n+\n+\n+::\n+\n+\t>chr1\n+\tCAGTAAAGTTTGCAAAGAGATTCTGGCAAAGTT\n+\n+Parameters\n+==========\n+\n+Output name -> input\n+\n+Optional files -> Fasta\n+\n+\n+Output files\n+============\n+\n+input.hapmap\n+------------\n+\n+::\n+\n+ rs#\talleles\tchrom\tpos\tstrand\tassembly#\tcenter\tprotLSID\tassayLSID\tpanelLSID\tQCcode\tCATB1\n+\tchr1:2209\tG/T\tchr1\t2209\t+\tNA\tNA\tNA\tNA\tNA\tNA\tGG\tTT\n+\tchr1:2232\tA/C\tchr1\t2232\t+\tNA\tNA\tNA\tNA\tNA\tNA\tAA\tCC\n+\n+input.flanking.txt\n+------------------\n+\n+::\n+\n+\tchr1-2209,GTCGCATCTGCAGCATATAGCCAACCTTCAACTTGCAGCTAAAACTCATCATCTCTTTCT[G/T]ACTGGCTTAACGATATTGTAAGMTGACTCAGAGGCCCACTTTTTTTTTAAAAATYAGCCT,0,0,0,Project_name,0,diploid,Other,Forward\n+\tchr1-2232,ACCTTCAACTTGCAGCTAAAACTCATCATCTCTTTCTKACTGGCTTAACGATATTGTAAG[A/C]TGACTCAGAGGCCCACTTTTTTTTTAAAAATYAGCCTGTCCCCAGCCGTGCTGACTGGGC,0,0,0,Project_name,0,diploid,Other,Forward\n+\n+input.gene_alignment.fas\n+------------------------\n+\n+::\n+\n+\t>chr1_CATB1_1\n+\tTCCTCAAACTTTCTTCAGCGCCTATGAATACAGCGTGCTATAGTTACGTGGGGCGTTT\n+\n+\t\n+ </help>\n+\n+ <citations>\n+ <!-- [HELP] As DOI or BibTex entry -->\n+ \t<citation type="bibtex">@article{Dereeper03062015,\n+author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-Fran\xc3\xa7ois and Ruiz, Manuel}, \n+title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations},\n+year = {2015}, \n+doi = {10.1093/nar/gkv351}, \n+abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, \n+URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, \n+eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, \n+journal = {Nucleic Acids Research} \n+}\n+\n+ \t}</citation>\n+\n+ </citations>\n+ \n+</tool>\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCFToolFilter/VCFToolsFilter.pl --- a/VCFToolFilter/VCFToolsFilter.pl Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,214 +0,0 @@ - -#!/usr/bin/perl - -use strict; -use Switch; -use Getopt::Long; -use Bio::SeqIO; - -my $usage = qq~Usage:$0 <args> [<opts>] - -where <args> are: - - -i, --input <VCF input> - -o, --out <Output basename> - - <opts> are: - - -s, --samples <Samples to be analyzed. Comma separated list> - -c, --chromosomes <Chromosomes to be analyzed. Comma separated list> - -e, --export <Output format (VCF/freq/plink. Default: VCF> - -f, --frequency <Minimum MAF. Default: 0.001> - -m, --max_freq <Maximum MAF. Default: 0.5> - -a, --allow_missing <Allowed missing data proportion per site. Must be comprised between 0 and 1. Default: 0> - -n, --nb_alleles <Accepted number of alleles (min,max). Default: 2,4> - -t, --type <Type of polymorphisms to keep (ALL/SNP/INDEL). Default: ALL> - -b, --bounds <Lower bound and upper bound for a range of sites to be processed (start,end). Default: 1, 100000000> -~; -$usage .= "\n"; - -my ($input,$out); - - -#my $indel_size_max = 500; -#my $indel_size_min = 1; -my $frequency_max = 0.5; -my $frequency_min = 0.001; -my $pos_max = 100000000000; -my $pos_min = 0; -my $filter_snp_type = "all"; - -my $missing_data = 0; -my $export = "VCF"; -my $type = "ALL"; -my $nb_alleles; -my $bounds; -my $samples; -my $chromosomes; - -GetOptions( - "input=s" => \$input, - "out=s" => \$out, - "samples=s" => \$samples, - "chromosomes=s" => \$chromosomes, - "frequency=s" => \$frequency_min, - "max_freq=s" => \$frequency_max, - "allow_missing=s"=> \$missing_data, - "export=s" => \$export, - "type=s" => \$type, - "nb_alleles=s" => \$nb_alleles, - "bounds=s" => \$bounds, -); - - -die $usage - if ( !$input || !$out); - -if ($samples && $samples =~/^([\w\,]+)\s*$/){ - $samples = $1; -} -elsif ($samples){ - die "Error: Samples must be a comma separated list of string\n"; -} -if ($chromosomes && $chromosomes =~/^([\w\,]+)\s*$/){ - $chromosomes = $1; -} -elsif($chromosomes){ - die "Error: Chromosomes must be a comma separated list of string\n"; -} -if ($bounds && $bounds =~/^([\d\,]+)\s*$/){ - $bounds = $1; -} -elsif($bounds){ - die "Error: Bounds must be a comma separated list of integers\n"; -} - -if ($frequency_min && $frequency_min =~/^([\d\.]+)\s*$/){ - $frequency_min = $1; -} -elsif ($frequency_min){ - die "Error: frequency must be an integer\n"; -} -if ($frequency_max && $frequency_max =~/^([\d\.]+)\s*$/){ - $frequency_max = $1; -} -elsif($frequency_max){ - die "Error: frequency must be an integer\n"; -} -if ($missing_data && $missing_data =~/^([\d\.]+)\s*$/){ - $missing_data = $1; -} -elsif ($missing_data){ - die "Error: Missing data must be an integer\n"; -} -if ($nb_alleles && $nb_alleles =~/^([\d\.\,]+)\s*$/){ - $nb_alleles = $1; -} -elsif($nb_alleles){ - die "Error: Nb alleles must be two integers\n"; -} -if ($export && $export =~/^([\w]+)\s*$/){ - $export = $1; -} -elsif($export){ - die "Error: Export must be a string\n"; -} -if ($type && $type =~/^([\w]+)\s*$/){ - $type = $1; -} -elsif($type){ - die "Error: Type must be a string\n"; -} - - -my @dnasamples; -if ($samples) -{ - @dnasamples = split(",",$samples); -} -my @nalleles; -if ($nb_alleles) -{ - @nalleles = split(",",$nb_alleles); -} -my @boundaries; -if ($bounds) -{ - @boundaries = split(",",$bounds); -} -my @chromosomes_list; -if ($chromosomes) -{ - @chromosomes_list = split(",",$chromosomes); -} - - -my $experiment = "chromosomes"; -my $table = ""; -my %genes; -my @snp_ids; -my @snp_ids_and_positions; -my @snp_ids_and_positions_all; -my $gene; -my $snp_num = 0; -my %ref_sequences; -my %snps_of_gene; - - - - -my $indiv_cmd = ""; -if (@dnasamples) -{ - $indiv_cmd = "--indv " . join(" --indv ",@dnasamples); -} - -my $chrom_cmd = ""; -if (@chromosomes_list) -{ - $chrom_cmd = "--chr " . join(" --chr ",@chromosomes_list); -} - -my $export_cmd = "--recode"; -if ($export eq "freq") -{ - $export_cmd = "--freq"; -} -if ($export eq "plink") -{ - $export_cmd = "--plink"; -} - - - -my $nb_alleles_cmd = "--min-alleles 1 --max-alleles 4"; -if (@nalleles) -{ - $nb_alleles_cmd = "--min-alleles $nalleles[0] --max-alleles $nalleles[1]"; -} -my $bounds_cmd = "--from-bp 1 --to-bp 100000000"; -if (@boundaries) -{ - $bounds_cmd = "--from-bp $boundaries[0] --to-bp $boundaries[1]"; -} - - -my $type_cmd = ""; -if ($type eq "INDEL") -{ - $type_cmd = "--keep-only-indels"; -} -if ($type eq "SNP") -{ - $type_cmd = "--remove-indels"; -} - - -system("vcftools --vcf $input --out $out --keep-INFO-all --remove-filtered-all $type_cmd $export_cmd $chrom_cmd $indiv_cmd $nb_alleles_cmd --maf $frequency_min --max-maf $frequency_max --max-missing $missing_data >>vcftools.log 2>&1"); - - - - - - - |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCFToolFilter/test-data/result.log --- a/VCFToolFilter/test-data/result.log Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,21 +0,0 @@ - -VCFtools - v0.1.12b -(C) Adam Auton and Anthony Marcketta 2009 - -Parameters as interpreted: - --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat - --chr chr1 - --recode-INFO-all - --maf 0.001 - --max-alleles 4 - --max-maf 0.5 - --min-alleles 2 - --max-missing 1 - --out filtered - --recode - --remove-filtered-all - -After filtering, kept 1 out of 1 Individuals -Outputting VCF file... -After filtering, kept 3616 out of a possible 4955 Sites -Run Time = 0.00 seconds |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCFToolFilter/test-data/result.vcf --- a/VCFToolFilter/test-data/result.vcf Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,3661 +0,0 @@\n-##fileformat=VCFv4.1\n-##FILTER=<ID=LowQual,Description="Low quality">\n-##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n-##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n-##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n-##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n-##INFO=<ID=AF,Number=A,Type'..b'0012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:30,25:55:99:802,0,993\n-chr1\t188173\t.\tG\tA\t697.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.617;DP=42;Dels=0.00;FS=2.786;HaplotypeScore=1.9991;MLEAC=1;MLEAF=0.500;MQ=59.09;MQ0=0;MQRankSum=-0.013;QD=16.61;ReadPosRankSum=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n-chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n-chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n-chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n-chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n-chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n-chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n-chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n-chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n-chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCFToolFilter/test-data/sample.vcf --- a/VCFToolFilter/test-data/sample.vcf Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,5000 +0,0 @@\n-##fileformat=VCFv4.1\n-##FILTER=<ID=LowQual,Description="Low quality">\n-##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n-##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n-##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n-##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n-##INFO=<ID=AF,Number=A,Type'..b'm=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n-chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n-chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n-chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n-chr1\t188357\t.\tC\tT\t1327.77\t.\tAC=2;AF=1.00;AN=2;DP=36;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=24.46;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t1/1:0,36:36:99:1356,105,0\n-chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n-chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n-chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n-chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n-chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n-chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCFToolFilter/vcfToolsFilter.sh --- a/VCFToolFilter/vcfToolsFilter.sh Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,46 +0,0 @@ -#!/bin/bash - -tool_path=$(dirname $0) - -filein=$1 -fileout_label=$2 -fileout=$3 -filelog=$4 -export=$5 -frequency=$6 -max_freq=$7 -allow_missing=$8 -nb_alleles_min=$9 -nb_alleles_max=${10} -type=${11} -bound_start=${12} -bound_end=${13} - - -if [ "${14}" != "None" ] -then samples="--samples ${14}" -fi - -if [ "${15}" != "None" ] -then chromosomes="--chromosomes ${15}" -fi - -if [ "$bound_start" -gt "$bound_end" ] -then tmp=$bound_start ; bound_start=$bound_end ; bound_end=$tmp ; echo "Warning : Lower bound must be lower than greater bound!" >&2 -fi - -if [ "$nb_alleles_min" -gt "$nb_alleles_max" ] -then tmp=$nb_alleles_min ; nb_alleles_min=$nb_alleles_max ; nb_alleles_max=$tmp ; echo "Warning : Minimum number of alleles must be lower than maximum number of allele!" >&2 -fi - -perl $tool_path/VCFToolsFilter.pl --input $filein --out $fileout_label --export $export --frequency $frequency --max_freq $max_freq --allow_missing $allow_missing --nb_alleles $nb_alleles_min','$nb_alleles_max --type $type --bounds $bound_start','$bound_end $samples $chromosomes - -if [ "$export" = "VCF" ] -then cp $fileout_label.recode.vcf $fileout ; rm $fileout_label.recode.vcf -elif [ "$export" = "freq" ] -then cp $fileout_label.frq $fileout ; rm $fileout_label.frq -else cp $fileout_label.ped $fileout; cp $fileout_label.map ${16} ; rm $fileout_label.ped $fileout_label.map -fi - -cp vcftools.log $filelog -rm vcftools.log |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCFToolFilter/vcfToolsFilter.xml --- a/VCFToolFilter/vcfToolsFilter.xml Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,268 +0,0 @@\n-<tool id="sniplay_vcftoolsfilter" name="VCF tools filter" version="1.1.1">\n- \n- <!-- [REQUIRED] Tool description displayed after the tool name -->\n- <description> Filter VCF using VCFtools</description>\n- \n- <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->\n- <requirements>\n- <requirement type="binary">perl</requirement>\n-\t<requirement type="package" version="0.1.12b">VCFtools</requirement>\n- </requirements>\n- \n- <!-- [OPTIONAL] Command to be executed to get the tool\'s version string -->\n- <version_command>\n-<!--\n- tool_binary -v\n--->\n- </version_command>\n- \n- <!-- [REQUIRED] The command to execute -->\n- <command interpreter="perl">\n-\tvcfToolsFilter.sh $filein $fileout_label $fileout $filelog $export $frequency $max_freq $allow_missing $nb_alleles_min $nb_alleles_max $type_p $bound_start $bound_end\n-\t#if str( $samples ) == "":\n-\t\'None\'\n-\t#else\n-\t$samples\n-\t#end if\n-\t#if str( $chromosomes ) == "":\n-\t\'None\'\n-\t#else\n-\t$chromosomes\n-\t#end if\n-\t#if str( $export ) == "plink":\n-\t$fileout_map\n-\t#else\n-\t\'\'\n-\t#end if\n- </command>\n- \n- <!-- [REQUIRED] Input files and tool parameters -->\n- <inputs>\n-\t<param name="filein" type="data" format="vcf" optional="false" label="VCF input" />\n-\t<param name="fileout_label" type="text" value="filtered" optional="false" label="Output file basename"/>\n-\t<param name="samples" type="text" optional="true" label="Samples" help="Samples to be analyzed. Comma separated list">\n-\t\t<validator type="regex" message="Please enter a comma separated list.">^\\w+(,\\w+)*$</validator>\n-\t</param>\n-\t<param name="chromosomes" type="text" optional="true" label="Chromosomes" help="Chromosomes to be analyzed. Comma separated list">\n-\t <validator type="regex" message="Please enter a comma separated list.">^\\w+(,\\w+)*$</validator>\n- </param>\n-\t<param name="export" type="select" label="Output format" >\n-\t <option value="VCF" selected="true">VCF</option>\n-\t <option value="freq">freq</option>\n- <option value="plink">plink</option>\n- </param>\n-\t<param name="frequency" type="float" value="0.001" label="Minimum MAF." help="Minimum frequency." />\n-\t<param name="max_freq" type="float" value="0.5" label="Maximum MAF." help="Maximum frequency." />\n-\t<param name="allow_missing" type="float" value="1" min="0" max="1" label="Missing data proportion" help="Allowed missing data proportion per site. Must be comprised between 0 and 1." />\n-\t<param name="nb_alleles_min" type="integer" value="2" label="Minimum number of alleles" help="Minimum accepted number of alleles." min="2" max="4" />\n-\t<param name="nb_alleles_max" type="integer" value="2" label="Maximum number of alleles" help="Maximum accepted number of alleles." min="2" max="4" />\n- <param name="type_p" type="select" label="Polymorphisms" help="Type of polymorphisms to keep." >\n- <option value="ALL" selected="true">All</option>\n- <option value="SNP">SNP</option>\n- <option value="INDEL">Indel</option>\n- </param>\n-\t<param name="bound_start" type="integer" value="1" label="Lower bound" help="Lower bound for a range of sites to be processed." />\n-\t<param name="bound_end" type="integer" value="100000000" label="Upper bound" help="Upper bound for a range of sites to be processed." />\n- </inputs>\n- \n- <!-- [REQUIRED] Output files -->\n- <outputs>\n-\t<data name="fileout" format="vcf" label="${fileout_label}.#if str($export)==\'plink\' then \'ped\' else \'\' # #if str($export)==\'freq\' then \'frq\' else \'\' # #if str($export)==\'VCF\' then \'vcf\' else \'\' #" >\n-\t\t<change_format>\n- \t<when input="export" value="freq" format="tabular" />\n-\t\t\t<when input="export" value="plink" format="txt" />\n-\t\t</change_format>\t\n-\t</data>\n-\t<data name="fileout_map" format="txt" label="${fileout_label}.map">\n-\t\t<filter>(export == \'plink\')</filter>\n-\t</data>\n-\t<data name="filelog" format="txt" label='..b'alue="0.001" />\n- <param name="max_freq" value="0.5" />\n- <param name="allow_missing" value="0" />\n- <param name="nb_alleles_min" value="2" />\n-\t<param name="nb_alleles_max" value="4" />\n- <param name="type_p" value="ALL" />\n- <param name="bound_start" value="1" />\n- <param name="bound_end" value="100000000" />\n- <output name="fileout" file="result.vcf" />\n- <output name="filelog" file="result.log" />\n- </test>\n- </tests>\n- \n- <!-- [OPTIONAL] Help displayed in Galaxy -->\n- <help>\n-\n-.. class:: infomark\n-\n-**Authors** \n-\n----------------------------------------------------\n-\n-.. class:: infomark\n-\n-**Please cite** If you use this tool, please cite Dereeper et al. 2015 in prep.\n-\n----------------------------------------------------\n-\n-================\n-VCF tools filter\n-================\n-\n------------\n-Description\n------------\n-\n- Filter VCF file \n-\n------------------\n-Workflow position\n------------------\n-\n-**Upstream tools**\n-\n-=========== ========================== =======\n-Name output file(s) format \n-=========== ========================== =======\n-=========== ========================== =======\n-\n-\n-**Downstream tools**\n-\n-=========== ========================== =======\n-Name output file(s) format\n-=========== ========================== =======\n-=========== ========================== =======\n-\n-\n-----------\n-Input file\n-----------\n-\n-VCF file\n-\tVCF file with all SNPs\n-\n-----------\n-Parameters\n-----------\n-\n-Output file basename\n-\tPrefix for the output VCF file\n-\n-Samples\n- Samples to be analyzed. Comma separated list\n-\n-Chromosomes\n-\tChromosomes to be analyzed. Comma separated list\n-\n-Output format\n-\tVCF/freq/plink\n-\n-Minimum MAF\n-\tMinimum frequency\n-\n-Maximum MAF\n-\tMaximum frequency\n-\n-Missing data proportion\n-\tAllowed missing data proportion per site. Must be comprised between 0 and 1.\n-\n-Number of alleles\n-\tAccepted number of alleles min and max.\n-\n-Polymorphisms\n-\tType of polymorphisms to keep (ALL/SNP/INDEL).\n-Bounds\n-\tLower bound and upper bound for a range of sites to be processed.\n-\n-------------\n-Output files\n-------------\n-\n-VCF file\n-\tVCF file filtered \n-\n-Log file\n-\n----------------------------------------------------\n-\n----------------\n-Working example\n----------------\n-\n-Input files\n-===========\n-\n-VCF file\n----------\n-\n-::\n-\n-\t#fileformat=VCFv4.1\n-\t#FILTER=<ID=LowQual,Description="Low quality">\n-\t#FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n-\t[...]\n-\tCHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tCATB1\n-\tchr1\t2209\t.\tG\tT\t213.84\t.\tAC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)\tGT:AD:DP:GQ:PL\t1/1:0,7:7:18:242,18,0\n-\n-\n-Parameters\n-==========\n-\n-Output name -> filtered_chr1\n-\n-Chromosomes -> chr1\n-\n-Output format -> VCF\n-\n-Minimum MAF -> 0.001\n-\n-Maximum MAF -> 0.5\n-\n-Missing data proportion -> 1\n-\n-Number of alleles min -> 2\n-\n-Number of alleles max -> 4\n-\n-Polymorphisms -> All\n-\n-Lower bound -> 1\n-\n-Upper bound -> 100000000\n-\n-\n-Output files\n-============\n-\n-filtered_genelist_intron.vcf\n----------\n-\n-::\n-\n- #fileformat=VCFv4.1\n- #FILTER=<ID=LowQual,Description="Low quality">\n- #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n- [...]\n- CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1\n-\tchr1\t5059\t.\tC\tG\t146.84\t.\tAC=2;AF=1.00;AN=2;DP=8;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=24.14;MQ0=1;QD=18.35;EFF=INTRON(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)\tGT:AD:DP:GQ:PL\t1/1:0,8:8:18:175,18,0\n-\n-\n- </help>\n- \n-</tool>\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCFToolsStats/VCFToolsStats.pl --- a/VCFToolsStats/VCFToolsStats.pl Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,71 +0,0 @@ - -#!/usr/bin/perl - -use strict; -use Switch; -use Getopt::Long; -use Bio::SeqIO; - -my $usage = qq~Usage:$0 <args> [<opts>] - -where <args> are: - - -i, --input <VCF input> - -o, --out <output basename> -~; -$usage .= "\n"; - -my ($input,$out); - -GetOptions( - "input=s" => \$input, - "out=s" => \$out -); - - -die $usage - if ( !$input); - - - -my $nb_gene = `grep -c mRNA $input`; -$nb_gene =~s/\n//g; -my $nb_intergenic = `grep -c INTERGENIC $input`; -$nb_intergenic =~s/\n//g; - -my $nb_intron = `grep -c INTRON $input`; -$nb_intron =~s/\n//g; -my $nb_UTR = `grep -c UTR $input`; -$nb_UTR =~s/\n//g; -my $nb_exon = $nb_gene - $nb_intron - $nb_UTR; - -my $nb_ns = `grep -c NON_SYNONYMOUS_CODING $input`; -$nb_ns =~s/\n//g; -my $nb_s = $nb_exon - $nb_ns; - - - - -#system("$VCFTOOLS_EXE --vcf $input --remove-filtered-all --out $out --hardy >>vcftools.log 2>&1"); -system("vcftools --vcf $input --remove-filtered-all --out $out --het >>vcftools.log 2>&1"); -system("vcftools --vcf $input --remove-filtered-all --out $out --TsTv-summary >>vcftools.log 2>&1"); -system("vcftools --vcf $input --remove-filtered-all --out $out --missing-indv >>vcftools.log 2>&1"); - -open(my $G,">$out.annotation"); -print $G "Genic $nb_gene\n"; -print $G "Intergenic $nb_intergenic\n"; -print $G "========\n"; -print $G "Intron $nb_intron\n"; -print $G "Exon $nb_exon\n"; -print $G "UTR $nb_UTR\n"; -print $G "========\n"; -print $G "Non-syn $nb_ns\n"; -print $G "Synonym $nb_s\n"; -close($G); - - - - - - - |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCFToolsStats/test-data/result.TsTv.summary --- a/VCFToolsStats/test-data/result.TsTv.summary Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,9 +0,0 @@ -MODEL COUNT -AC 371 -AG 1467 -AT 562 -CG 330 -CT 1659 -GT 397 -Ts 3126 -Tv 1660 |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCFToolsStats/test-data/result.annotation --- a/VCFToolsStats/test-data/result.annotation Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,9 +0,0 @@ -Genic 4489 -Intergenic 466 -======== -Intron 960 -Exon 3248 -UTR 281 -======== -Non-syn 226 -Synonym 3022 |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCFToolsStats/test-data/result.het --- a/VCFToolsStats/test-data/result.het Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ -INDV O(HOM) E(HOM) N_SITES F -CATB1 0 0.0 3616 0.00000 |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCFToolsStats/test-data/result.imiss --- a/VCFToolsStats/test-data/result.imiss Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ -INDV N_DATA N_GENOTYPES_FILTERED N_MISS F_MISS -CATB1 4813 0 0 0 |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCFToolsStats/test-data/result.log --- a/VCFToolsStats/test-data/result.log Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,44 +0,0 @@ - -VCFtools - v0.1.12b -(C) Adam Auton and Anthony Marcketta 2009 - -Parameters as interpreted: - --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat - --het - --out vcf_stats - --remove-filtered-all - -After filtering, kept 1 out of 1 Individuals -Outputting Individual Heterozygosity - Individual Heterozygosity: Only using biallelic SNPs. -After filtering, kept 4813 out of a possible 4955 Sites -Run Time = 0.00 seconds - -VCFtools - v0.1.12b -(C) Adam Auton and Anthony Marcketta 2009 - -Parameters as interpreted: - --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat - --out vcf_stats - --TsTv-summary - --remove-filtered-all - -After filtering, kept 1 out of 1 Individuals -Outputting Ts/Tv summary -Ts/Tv ratio: 1.883 -After filtering, kept 4813 out of a possible 4955 Sites -Run Time = 0.00 seconds - -VCFtools - v0.1.12b -(C) Adam Auton and Anthony Marcketta 2009 - -Parameters as interpreted: - --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat - --missing-indv - --out vcf_stats - --remove-filtered-all - -After filtering, kept 1 out of 1 Individuals -Outputting Individual Missingness -After filtering, kept 4813 out of a possible 4955 Sites -Run Time = 0.00 seconds |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCFToolsStats/test-data/sample.vcf --- a/VCFToolsStats/test-data/sample.vcf Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,5000 +0,0 @@\n-##fileformat=VCFv4.1\n-##FILTER=<ID=LowQual,Description="Low quality">\n-##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n-##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n-##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n-##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n-##INFO=<ID=AF,Number=A,Type'..b'm=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n-chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n-chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n-chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n-chr1\t188357\t.\tC\tT\t1327.77\t.\tAC=2;AF=1.00;AN=2;DP=36;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=24.46;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t1/1:0,36:36:99:1356,105,0\n-chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n-chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n-chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n-chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n-chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n-chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCFToolsStats/vcfToolsStats.sh --- a/VCFToolsStats/vcfToolsStats.sh Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,23 +0,0 @@ -#!/bin/bash - -tool_path=$(dirname $0) - -filein=$1 -fileout_label=$2 -fileout_annot=$3 -fileout_het=$4 -fileout_imiss=$5 -fileout_sum=$6 -filelog=$7 - - - -perl $tool_path/VCFToolsStats.pl --input $filein --out $fileout_label - -cp $fileout_label.annotation $fileout_annot ; rm $fileout_label.annotation -cp $fileout_label.het $fileout_het ; rm $fileout_label.het -cp $fileout_label.imiss $fileout_imiss ; rm $fileout_label.imiss -cp $fileout_label.TsTv.summary $fileout_sum ; rm $fileout_label.TsTv.summary - -cp vcftools.log $filelog -rm vcftools.log |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 VCFToolsStats/vcfToolsStats.xml --- a/VCFToolsStats/vcfToolsStats.xml Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,214 +0,0 @@ -<tool id="sniplay_vcftoolsstats" name="VCF tools Stats" version="1.0.0"> - - <!-- [REQUIRED] Tool description displayed after the tool name --> - <description> Various statistics from VCF using VCFtools</description> - - <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> - <requirements> - <requirement type="binary">perl</requirement> - <requirement type="package" version="0.1.13">VCFtools</requirement> - </requirements> - - <!-- [OPTIONAL] Command to be executed to get the tool's version string --> - <version_command> -<!-- - tool_binary -v ---> - </version_command> - - <!-- [REQUIRED] The command to execute --> - <command interpreter="perl"> - vcfToolsStats.sh $filein $fileout_label $fileout_annot $fileout_het $fileout_imiss $fileout_sum $filelog - </command> - - <!-- [REQUIRED] Input files and tool parameters --> - <inputs> - <param name="filein" type="data" format="vcf" optional="false" label="VCF input" /> - <param name="fileout_label" type="text" value="vcf_stats" optional="false" label="Output file basename"/> - </inputs> - - <!-- [REQUIRED] Output files --> - <outputs> - <data name="fileout_annot" format="txt" label="${fileout_label}.annotation" /> - <data name="fileout_het" format="txt" label="${fileout_label}.het" /> - <data name="fileout_imiss" format="txt" label="${fileout_label}.imiss" /> - <data name="fileout_sum" format="txt" label="${fileout_label}.TsTv.summary" /> - <data name="filelog" format="txt" label="${fileout_label}.log" /> - </outputs> - - <!-- [STRONGLY RECOMMANDED] Exit code rules --> - <stdio> - <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> - <exit_code range="1:" level="fatal" /> - </stdio> - - <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> - <tests> - <!-- [HELP] Test files have to be in the ~/test-data directory --> - <test> - <param name="filein" value="sample.vcf" /> - <output name="fileout_annot" file="result.annotation" /> - <output name="fileout_het" file="result.het" /> - <output name="fileout_imiss" file="result.imiss" /> - <output name="fileout_sum" file="result.TsTv.summary" /> - <output name="filelog" file="result.log" /> - </test> - </tests> - - <!-- [OPTIONAL] Help displayed in Galaxy --> - <help> - -.. class:: infomark - -**Authors** - ---------------------------------------------------- - -.. class:: infomark - -**Please cite** If you use this tool, please cite Dereeper et al. 2015 in prep. - ---------------------------------------------------- - -================ -VCF tools filter -================ - ------------ -Description ------------ - - Compute statistics on VCF file - ------------------ -Workflow position ------------------ - -**Upstream tools** - -=========== ========================== ======= -Name output file(s) format -=========== ========================== ======= -=========== ========================== ======= - - -**Downstream tools** - -=========== ========================== ======= -Name output file(s) format -=========== ========================== ======= -=========== ========================== ======= - - ----------- -Input file ----------- - -VCF file - VCF file with all SNPs - ----------- -Parameters ----------- - -Output file basename - Prefix for the output VCF file - ------------- -Output files ------------- - -.annotation file - Statistics on annotation/location along genome - -.het file - Statistics on heterozygosity of the individuals - -.imiss - Statistics on missing data of the inidividuals -.TsTv.summary - Statistics on mutation types and transition/transvertion number - -.log file - ---------------------------------------------------- - ---------------- -Working example ---------------- - -Input files -=========== - -VCF file ---------- - -:: - - #fileformat=VCFv4.1 - #FILTER=<ID=LowQual,Description="Low quality"> - #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> - [...] - CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1 - chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0 - - -Parameters -========== - -Output name -> vcf_stat - - -Output files -============ - -.annotation file ----------------- - -:: - - Genic 4489 - Intergenic 466 - ======== - Intron 960 - Exon 3248 - UTR 281 - ======== - Non-syn 226 - Synonym 3022 - -.het file ---------- - -:: - - INDV O(HOM) E(HOM) N_SITES F - CATB1 0 0.0 3616 0.00000 - -.imiss file ------------ - -:: - - INDV N_DATA N_GENOTYPES_FILTERED N_MISS F_MISS - CATB1 4813 0 0 0 - -.TsTv.summary file ------------------- - -:: - - MODEL COUNT - AC 371 - AG 1467 - AT 562 - CG 330 - CT 1659 - GT 397 - Ts 3126 - Tv 1660 - - - </help> - -</tool> |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 admixture/Admixture.pl --- a/admixture/Admixture.pl Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,159 +0,0 @@ -#!/usr/bin/perl - -use strict; -use Switch; -use Getopt::Long; -use Bio::SeqIO; - -my $usage = qq~Usage:$0 <args> [<opts>] -where <args> are: - -i, --input <input HAPMAP> - -o, --output <output> - -k, --kmin <K min. int> - -m, --maxK <K max. int> - -d, --directory <temporary directory> - -p, --path <path to executables> -~; -$usage .= "\n"; - -my ($input,$output,$kmin,$kmax,$directory,$path); - - -GetOptions( - "input=s" => \$input, - "output=s" => \$output, - "kmin=s" => \$kmin, - "maxK=s" => \$kmax, - "directory=s" => \$directory, - "path=s" => \$path -); - - -die $usage - if ( !$input || !$output || !$kmin || !$kmax || !$directory || !$path); - -if ($kmin =~/^(\d+)\s*$/){ - $kmin = $1; -} -else{ - die "Error: kmin must be an integer\n"; -} -if ($kmax =~/^(\d+)\s*$/){ - $kmax = $1; -} -else{ - die "Error: kmax must be an integer\n"; -} - - -###################### -# create map file -###################### -open(my $M,">$directory/input.map"); -open(my $H,$input); -<$H>; -while(<$H>) -{ - my @infos = split(/\t/,$_); - print $M $infos[2] . "\t" . $infos[0] . "\t" . "0" . "\t" . $infos[3] . "\n"; -} -close($H); -close($M); - -###################### -# create ped file -###################### -system("$path/transpose.awk $input >$directory/input.ped.2"); - -open(my $P,">$directory/input.ped"); -open(my $P2,"$directory/input.ped.2"); -my $n = 0; -my $ind_num = 0; -my @individus; -while(<$P2>) -{ - $n++; - if ($n > 11) - { - my $line = $_; - $line =~s/N/0/g; - if (/^([^\s]+)\s+(.*)$/) - { - $ind_num++; - my $ind = $1; - push(@individus,$ind); - my $genoyping_line = $2; - print $P "$ind $ind_num 0 0 1 2"; - my @genotypes = split(/\s/,$genoyping_line); - foreach my $genotype(@genotypes) - { - $genotype =~s/N/0/g; - my @alleles = split("",$genotype); - print $P " " . join(" ",@alleles); - } - - print $P "\n"; - } - } -} -close($P2); -close($P); - -unlink("$directory/input.ped.2"); - -system("plink --file $directory/input --out $directory/out --make-bed --noweb >>$directory/plink.log 2>&1"); - - -################################### -# launch admixture for different K -################################### -my %errors; -for (my $k = $kmin; $k <= $kmax; $k++) -{ - system("admixture --cv $directory/out.bed $k >>$directory/log.$k 2>&1"); - my $cv_error_line = `grep -h CV $directory/log.$k`; - if ($cv_error_line =~/: (\d+\.*\d*)$/) - { - $errors{$1} = $k; - } - system("cat $directory/log.$k >>$directory/logs"); - system("echo '\n\n====================================\n\n' >>$directory/logs"); - system("cat out.$k.Q >>$directory/outputs.Q"); - system("echo '\n\n====================================\n\n' >>$directory/outputs.Q"); - system("cat out.$k.P >>$directory/outputs.P"); - system("echo '\n\n====================================\n\n' >>$directory/outputs.P"); -} - -my @sorted_errors = sort {$a<=>$b} keys(%errors); -my $best_K = $errors{@sorted_errors[0]}; - - -#system("cp -rf out.$best_K.Q $directory/output"); - -open(BEST1,"out.$best_K.Q"); -open(BEST2,">$directory/output"); -print BEST2 "<Covariate>\n"; -print BEST2 "<Trait>"; -for (my $j=1;$j<=$best_K;$j++) -{ - print BEST2 " Q" . $j; -} -print BEST2 "\n"; -my $i = 0; -while(<BEST1>) -{ - my $line = $_; - $line =~s/ /\t/g; - my $ind = $individus[$i]; - print BEST2 "$ind "; - print BEST2 $line; - $i++; -} -close(BEST1); -close(BEST2); - -system("cp -rf $directory/log.$best_K $directory/log"); - - - - |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 admixture/admixture.sh --- a/admixture/admixture.sh Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,21 +0,0 @@ -#!/bin/bash -input=$1 -outputs=$2 -logs=$3 -best_k_output=$4 -best_k_logfile=$5 -kmin=$6 -kmax=$7 - -directory=`dirname $0` -mkdir tmpdir$$ -cp -rf $input tmpdir$$/input - -/usr/bin/perl $directory/Admixture.pl -i $input -o $outputs -k $kmin -m $kmax -d tmpdir$$ -p $directory - -mv tmpdir$$/output $best_k_output -mv tmpdir$$/log $best_k_logfile -mv tmpdir$$/outputs.Q $outputs -mv tmpdir$$/logs $logs - - |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 admixture/admixture.xml --- a/admixture/admixture.xml Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,55 +0,0 @@ -<tool id="admixture" name="Admixture" version="1.23"> - <description>a population structure from large SNP genotype datasets</description> - <requirements> - <requirement type="package" version="1.07">plink</requirement> - <requirement type="package" version="1.23">admixture</requirement> - </requirements> - <command interpreter="bash">./admixture.sh $input $outputs $logs $best_k_output $best_k_logfile $kmin $kmax - </command> - <inputs> - <param format="txt" name="input" type="data" label="Allelic file in Hapmap format" help="Allelic file in Hapmap format"/> - <param type="text" name="kmin" label="K min" value="1"/> - <param type="text" name="kmax" label="K max" value="5"/> - </inputs> - <outputs> - <data format="txt" name="best_k_output" label="Best K Output"/> - <data format="txt" name="best_k_logfile" label="Best K Logfile"/> - <data format="txt" name="outputs" label="All Outputs"/> - <data format="txt" name="logs" label="All Logs"/> - </outputs> - <help> - - -.. class:: infomark - -**Program encapsulated in Galaxy by Southgreen** - -.. class:: infomark - -**Admixture version 1.23** - ------ - -============== - Please cite: -============== - -"Fast model-based estimation of ancestry in unrelated individuals.", **D.H. Alexander, J. Novembre, and K. Lange.**, Genome Research, 19:1655{1664, 2009. - ------ - -=========== - Overview: -=========== - -ADMIXTURE is a program for estimating ancestry in a model-based manner from large autosomal SNP genotype datasets, where the individuals are unrelated (for example, the individuals in a case-control association study). - ------ - -For further informations, please visite the Admixture_ website. - - -.. _Admixture: http://www.genetics.ucla.edu/software/admixture/index.html - </help> - -</tool> |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 admixture/transpose.awk --- a/admixture/transpose.awk Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,27 +0,0 @@ -#!/usr/bin/gawk -f - -BEGIN { - max_x =0; - max_y =0; -} - -{ - max_y++; - for( i=1; i<=NF; i++ ) - { - if (i>max_x) max_x=i; - A[i,max_y] = $i; - } -} - -END { - for ( x=1; x<=max_x; x++ ) - { - for ( y=1; y<=max_y; y++ ) - { - if ( (x,y) in A ) printf "%s",A[x,y]; - if ( y!=max_y ) printf " "; - } - printf "\n"; - } -} |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 check_gwas_inputs/CheckGWASInputs.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/check_gwas_inputs/CheckGWASInputs.pl Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,184 @@ +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; + +my $usage = qq~Usage:$0 <args> [<opts>] +where <args> are: + -h, --hapmap <Hapmap input file> + -t, --trait <Trait input file> + -o, --out <Output base name> +~; +$usage .= "\n"; + +my ($hapmap,$trait,$out); + + +GetOptions( + "trait=s" => \$trait, + "out=s" => \$out, + "hapmap=s" => \$hapmap +); + + +die $usage + if ( !$trait || !$out || !$hapmap); + +my %inds; + +####################################### +# get individuals in trait file +####################################### +my %traits; +my $head_trait = `head -1 $trait`; +open(my $T,$trait); +<$T>; +while(<$T>) +{ + my @infos = split(/\t/,$_); + my $ind = $infos[0]; + $inds{$ind}++; + $traits{$ind} = $_; +} +close($T); +my $nb_ind_trait = scalar keys(%traits); + +####################################### +# get individuals in hapmap file +####################################### +my $line_ind = `head -1 $hapmap`; +chomp($line_ind); +my @infos = split(/\t/,$line_ind); +for (my $i = 11; $i <= $#infos; $i++) +{ + my $ind = $infos[$i]; + $inds{$ind}++; +} +my $nb_ind_hapmap = scalar @infos - 11; + +################################################################# +# create trait output by keeping individuals found in both files +################################################################# +open(my $O,">$out.trait"); +print $O $head_trait; +my $nb_common = 0; +foreach my $ind(keys(%inds)) +{ + my $nb_found = $inds{$ind}; + if ($nb_found == 2) + { + $nb_common++; + print $O $traits{$ind}; + } +} +close($O); + + +##################################################################### +# create hapmap output after keeping individuals found in both files +# and removing monomorphic positions +##################################################################### +open(my $O2,">$out.hapmap"); +my $numline = 0; +my %genotypes; +my %columns_to_keep; +my $nb_monomorphic = 0; +my $not_biallelic = 0; +my $diff_variation = 0; +open(my $H,$hapmap); +while(<$H>) +{ + $numline++; + my $line = $_; + $line =~s/\n//g; + $line =~s/\r//g; + my @infos = split(/\t/,$line); + if ($numline == 1) + { + my @titles; + for (my $i = 0; $i <= 10; $i++) + { + my $title = $infos[$i]; + push(@titles,$title); + } + print $O2 join("\t",@titles); + for (my $i = 11; $i <= $#infos; $i++) + { + my $ind = $infos[$i]; + my $nb_found = $inds{$ind}; + if ($nb_found == 2) + { + print $O2 " $ind"; + $columns_to_keep{$i} = 1; + } + } + print $O2 "\n"; + } + else + { + my $to_be_printed = ""; + my $variation = $infos[1]; + for (my $i = 0; $i <= 10; $i++) + { + my $title = $infos[$i]; + $to_be_printed .= "$title "; + } + my %letters; + for (my $i = 11; $i <= $#infos; $i++) + { + if ($columns_to_keep{$i}) + { + my $genotype = $infos[$i]; + if ($genotype ne 'NN') + { + my ($allele1,$allele2) = split(//,$genotype); + $letters{$allele1}=1; + $letters{$allele2}=1; + } + $to_be_printed .= "$genotype "; + } + } + chop($to_be_printed); + + my $variation_obs = join("/",sort keys(%letters)); + + # print only if polymorphic + if (scalar keys(%letters) < 2) + { + $nb_monomorphic++; + } + elsif (scalar keys(%letters) > 2) + { + $not_biallelic++; + } + else + { + if ($variation ne $variation_obs) + { + $to_be_printed =~s/$variation/$variation_obs/; + $diff_variation++; + } + + print $O2 $to_be_printed . "\n"; + } + } +} +close($H); +close($O2); + +print "==============================================\n"; +print "Individuals\n"; +print "==============================================\n"; +print "Individuals in hapmap file: $nb_ind_hapmap\n"; +print "Individuals in trait file: $nb_ind_trait\n"; +print "Individuals found in both files: $nb_common\n"; +print "==============================================\n"; +print "Markers\n"; +print "==============================================\n"; +print "Discarded markers:\n"; +print "Monomorphic: $nb_monomorphic\n"; +print "Not biallelic: $not_biallelic\n"; +print "Modified markers:\n"; +print "Difference in variation: $diff_variation\n"; + |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 check_gwas_inputs/CheckGWASInputs.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/check_gwas_inputs/CheckGWASInputs.sh Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,17 @@ +#!/bin/bash +hapmap=$1 +trait=$2 +out_hapmap=$3 +out_trait=$4 +stats=$5 + +directory=`dirname $0` +mkdir tmpdir$$ +#cp -rf $input tmpdir$$/input + +perl $directory/CheckGWASInputs.pl -h $hapmap -t $trait -o tmpdir$$/out >>$stats 2>&1 + +mv tmpdir$$/out.hapmap $out_hapmap +mv tmpdir$$/out.trait $out_trait + + |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 check_gwas_inputs/CheckGWASInputs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/check_gwas_inputs/CheckGWASInputs.xml Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,184 @@ +<tool id="check_GWAS_inputs" name="Check GWAS Inputs" version="1.0"> + <description>checks concordance between input files for GWAS analysis</description> + <command interpreter="bash">./CheckGWASInputs.sh $hapmap $trait $out_hapmap $out_trait $stats + </command> + <inputs> + <param format="text" name="hapmap" type="data" label="Allelic file in Hapmap format" help="Allelic file in Hapmap format"/> + <param format="text" name="trait" type="data" label="Trait file" help="Phenotypic file"/> + </inputs> + <outputs> + <data format="txt" name="out_hapmap" label="Hapmap output"/> + <data format="txt" name="out_trait" label="Trait output"/> + <data format="txt" name="stats" label="Logfile and statistics"/> + </outputs> + <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <stdio> + <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> + <exit_code range="1:" level="fatal" /> + </stdio> + + <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> + <tests> + <!-- [HELP] Test files have to be in the ~/test-data directory --> + <!-- [HELP] Multiple tests can be defined with different parameters --> +<!-- + <test> + </test> +--> + </tests> + <help> + + <![CDATA[ + + +.. class:: infomark + +**Authors** South Green + + | **Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). + +.. class:: infomark + +**Galaxy integration** South Green. + +--------------------------------------------------- + + +=============== +CheckGWASInputs +=============== + +----------- +Description +----------- + + | CheckGWASInputs checks concordance between input files for GWAS analysis. + + +----------------- +Workflow position +----------------- + +**Upstream tool** + +=============== ====================== =========== +Name output file(s) format +=============== ====================== =========== +VCF to Hapmap Hapmap file hapmap +=============== ====================== =========== + + + + +---------- +Input file +---------- + +Hapmap file + Allelic file in Hapmap format + +Trait file + Phenotypic file + +------------ +Output files +------------ + +Hapmap output + +Trait output + +Logfile and statistics + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +=========== + +Hapmap file +----------- + +:: + + rs# alleles chrom pos strand assembly# center protLSID assayLSID panel QCcode Ind1 Ind2 + SNP1 A/T 1 3102 + assembly NA NA NA speciesname NA AA AA AA + SNP2 A/T 1 4648 + assembly NA NA NA speciesname NA AA AA AA + +Trait file +---------- + +:: + + <Trait> Test + Ind1 -2.9985936006411 + Ind2 -2.68669426456267 + +Output files +============ + +Hapmap output +------------- + +:: + + rs# alleles chrom pos strand assembly# center protLSID assayLSID panel QCcode Ind1 Ind2 Ind3 Ind4 + SNP1 A/T 1 3102 + assembly NA NA NA speciesname NA AA AA AA AA AA + SNP2 A/T 1 4648 + assembly NA NA NA speciesname NA AA AA AA AA TT + + +Trait output +------------ + +:: + + <Trait> Test + Ind429 -26.2142525264157 + Ind373 12.0306115988504 + Ind81 1.98118654229534 + +Logfile and statistics +---------------------- + +:: + + + ============================================== + Individuals + ============================================== + Individuals in hapmap file: 500 + Individuals in trait file: 500 + Individuals found in both files: 500 + ============================================== + Markers + ============================================== + Discarded markers: + Monomorphic: 0 + Not biallelic: 0 + Modified markers: + Difference in variation: 0 + + ]]> + + + </help> + <citations> + <!-- [HELP] As DOI or BibTex entry --> + <citation type="bibtex">@article{Dereeper03062015, +author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, +title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, +year = {2015}, +doi = {10.1093/nar/gkv351}, +abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, +URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, +eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, +journal = {Nucleic Acids Research} +} + + </citation> + + </citations> +</tool> |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/CalculateDiversityIndexes.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/CalculateDiversityIndexes.pl Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,81 @@ +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; +use Bio::SeqIO; + + +my $usage = qq~Usage:$0 <args> [<opts>] +where <args> are: + -i, --input <FASTA input> + -o, --output <output filename> + -d, --directory <directory of egglib package> +~; +$usage .= "\n"; + +my ($infile,$outfile,$dir_exe); + + +GetOptions( + "input=s" => \$infile, + "output=s" => \$outfile, + "directory=s"=> \$dir_exe +); + + +die $usage + if ( !$infile || !$outfile || !$dir_exe); + + +my $EGGSTATS_EXE = "$dir_exe/egglib-2.1.5/bin/eggstats"; + +my %gene_alignments; +my $in = Bio::SeqIO->new(-file => $infile , '-format' => 'Fasta'); +while ( my $seq = $in->next_seq() ) +{ + my $id = $seq -> id(); + my $sequence = $seq -> seq(); + my ($gene,$ind,$num_allele) = split("_",$id); + $gene_alignments{$gene}.= ">$id\n$sequence\n"; +} + +open(OUT,">$outfile"); +foreach my $gene(keys(%gene_alignments)) +{ + open(F,">$gene.egglib_input.fa"); + print F $gene_alignments{$gene}; + close(F); + + my $results_egglib = `$EGGSTATS_EXE $gene.egglib_input.fa`; + + # parse Seqlib output + if ($results_egglib) + { + my %egglig_stats; + my @eggstats = split(/^/,$results_egglib); + foreach my $eggstat(@eggstats) + { + my ($desc,$value) = split(/: /,$eggstat); + chomp($value); + $egglig_stats{$desc} = $value; + } + print OUT "$gene;"; + print OUT $egglig_stats{"Total number of sequences"} . ";"; + print OUT $egglig_stats{"Total number of sites"} . ";"; + print OUT $egglig_stats{"Number of analyzed sites"} . ";"; + print OUT $egglig_stats{"S"} . ";"; + print OUT $egglig_stats{"thetaW"} . ";"; + print OUT $egglig_stats{"Pi"} . ";"; + print OUT $egglig_stats{"D"} . ";"; + print OUT $egglig_stats{"number of haplotypes"} . ";"; + print OUT $egglig_stats{"haplotypes diversity"} . ";"; + print OUT $egglig_stats{"Fay and Wu H"} . ";"; + print OUT $egglig_stats{"Fst"} . ";"; + print OUT $egglig_stats{"Snn"} . ";"; + print OUT "\n"; + unlink("$gene.egglib_input.fa"); + } +} +close(OUT); + |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/CalculateDiversityIndexes.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/CalculateDiversityIndexes.sh Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,11 @@ +#!/bin/bash +input=$1 +output=$2 +log=$3 + +directory=`dirname $0` + +perl $directory/CalculateDiversityIndexes.pl -i $input -o $output -d $directory >>$log 2>&1 + + + |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/CalculateDiversityIndexes.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/CalculateDiversityIndexes.xml Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,175 @@ +<tool id="calculate_diversity" name="Diversity by gene" version="2.1.6"> + <description>calculates various diversity indexes with EggLib.</description> + <command interpreter="bash">./CalculateDiversityIndexes.sh $input $output $log + </command> + <inputs> + <param format="fasta" name="input" type="data" label="Fasta alignment" help="..."/> + </inputs> + <outputs> + <data format="txt" name="output" label="Diversity"/> + <data format="txt" name="log" label="Logfile"/> + </outputs> + <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <stdio> + <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> + <exit_code range="1:" level="fatal" /> + </stdio> + + <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> + <tests> + <!-- [HELP] Test files have to be in the ~/test-data directory --> + <!-- [HELP] Multiple tests can be defined with different parameters --> +<!-- + <test> + </test> +--> + </tests> + <help> + + + +.. class:: infomark + +**Authors** EggLib_ + +.. _EggLib: http://egglib.sourceforge.net/ + + | "EggLib: processing, analysis and simulation tools for population genetics and genomics.", **De Mita S. and M. Siol.**, BMC Genet. 2012. 13:27. + +.. class:: infomark + +**Galaxy integration** South Green. + +.. class:: infomark + +**Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). + +--------------------------------------------------- + + + +================== +Diversity by genes +================== + +----------- +Description +----------- + + | Provides various diversity indexes using EggLib library. + | For further informations, please visite the EggLib website_. + +.. _website: http://egglib.sourceforge.net/ + + +----------------- +Workflow position +----------------- + +**Upstream tool** + +=============== ====================== =========== +Name output file(s) format +=============== ====================== =========== +VCF to Hapmap Fasta alignment fasta +=============== ====================== =========== + + + +---------- +Input file +---------- + +Fasta file + Fasta alignment + + + +------------ +Output files +------------ + +Diversity + +Log file + + +------------ +Dependencies +------------ +EggLib + version 2.1.5 + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +=========== + +Fasta file +---------- + +:: + + >LOCOs11g09160_AZUCENA_1 + ATGTGGCGGGCGAGGCGGTGGCCGTGGCCGTGGCGGTCGCAGCGGCGCGCGGCGAGGATGCTCTCGTCGGGTGAGCCAGCGGCGGGGCGGCGGAGGGTGGCGGCGCTGTGGGGGAACGGGGA + CTACGGGCGGCTGGGGATGGGGGCGCTGGAGTCGCGGTGGAGCCCCACGGCGTGCCCCTTCTTCCTCACCGGCCGCCCCGGCGACGACGACGACGACCCGCCCGCCTCCCTCGCCTGCGGCG + GCGCCCACACCCTCTTCCTCACCGGTACTCCTGAATTGAAATCGCCTGCTTGTGTAATTACTCGAGCGAGAGAGAGATTGTGTTGAATCGAGATTAATGTGGGAGTATGTGATTTTTGGCCT + GCTGGTTTTGCTCAGAGAGCGGGCGCGTGTTCGCCGCGGGTCTCAACGACTTCGGGCAGCTCGGGATAGGCTCCTCCGTGACTCATTCCCTGGTACTGAGCTTCTTGTACATCATGCCTCCA + TGTGAAATTTTCATCTACATTGTGAGCCAGCCTACTTTTACACAGTAAGCGAAAGCTGGCTGGACATATCAGAGTTGCAATGGGGATTGACCAAATCAATTCTGACTCCTGTTACATGTTGC + >LOCOs11g09160_AZUCENA_2 + ATGAGGCGGGCGAGGCGGTGGCCGTGGCCGTGGCGGTCGCAGCGGCGAGCGGCGAGGATGCTCTCGTCGGGTGAGCCAGCGGCGGGGCGGCGGAGGGTGGCGGCGCTGTGGGGGAACGGGGA + CTACGGGCGGCTGGGGATGGGGGCGCTGGAGTCGCGGTGGAGCCCCACGGCGTGCCCCTTCTTCCTCACCGGCCGCCCCGGCGACGACGACGACGACCCGCCCGCCTCCCTCGCCTGCGGCG + GCGCCCACACCCTCTTCCTCACCGGTACTCCTGAATTGAAATCGCCAGCTTGTGTAATTACTCGAGCGAGAGAGAGATTGTGTTGAATCGAGATTAATGTGGGAGTATGTGATTTTTGGCCT + GCTGGTTTTGCTCAGAGAGCGGGCGCGTGTTCGCCGCGGGTCTCAACGACTTCGGGCAGCTCGGGATAGGCTCCTCCGTGACTCATTCCCTGGTACTGAGCTTCTTGTACATCATGCCTCCA + TGTGAAATTTTCATCTACATTGTGAGCCAGCCTACTTTTACACAGTAAGCGAAAGCTGGCTGGACATATCAGAGTTGCAATGGGGATTGACCAAATCAATTCTGACTCCTGTTACATGTTGC + >LOCOs11g09160_BULUPANDAK_1 + ATGTGGCGGGCGAGGCGGTGGCCGTGGCCGTGGCGGTCGCAGCGGCGCGCGGCGAGGATGCTCTCGTCGGGTGAGCCAGCGGCGGGGCGGCGGAGGGTGGCGGCGCTGTGGGGGAACGGGGA + CTACGGGCGGCTGGGGATGGGGGCGCTGGAGTCGCGGTGGAGCCCCACGGCGTGCCCCTTCTTCCTCACCGGCCGCCCCGGCGACGACGACGACGACCCGCCCGCCTCCCTCGCCTGCGGCG + GCGCCCACACCCTCTTCCTCACCGGTACTCCTGAATTGAAATCGCCTGCTTGTGTAATTACTCGAGCGAGAGAGAGATTGTGTTGAATCGAGATTAATGTGGGAGTATGTGATTTTTGGCCT + GCTGGTTTTGCTCAGAGAGCGGGCGCGTGTTCGCCGCGGGTCTCAACGACTTCGGGCAGCTCGGGATAGGCTCCTCCGTGACTCATTCCCTGGTACTGAGCTTCTTGTACATCATGCCTCCA + TGTGAAATTTTCATCTACATTGTGAGCCAGCCTACTTTTACACAGTAAGCGAAAGCTGGCTGGACATATCAGAGTTGCAATGGGGATTGACCAAATCAATTCTGACTCCTGTTACATGTTGC + >LOCOs11g09160_BULUPANDAK_2 + ATGAGGCGGGCGAGGCGGTGGCCGTGGCCGTGGCGGTCGCAGCGGCGAGCGGCGAGGATGCTCTCGTCGGGTGAGCCAGCGGCGGGGCGGCGGAGGGTGGCGGCGCTGTGGGGGAACGGGGA + CTACGGGCGGCTGGGGATGGGGGCGCTGGAGTCGCGGTGGAGCCCCACGGCGTGCCCCTTCTTCCTCACCGGCCGCCCCGGCGACGACGACGACGACCCGCCCGCCTCCCTCGCCTGCGGCG + GCGCCCACACCCTCTTCCTCACCGGTACTCCTGAATTGAAATCGCCAGCTTGTGTAATTACTCGAGCGAGAGAGAGATTGTGTTGAATCGAGATTAATGTGGGAGTATGTGATTTTTGGCCT + GCTGGTTTTGCTCAGAGAGCGGGCGCGTGTTCGCCGCGGGTCTCAACGACTTCGGGCAGCTCGGGATAGGCTCCTCCGTGACTCATTCCCTGGTACTGAGCTTCTTGTACATCATGCCTCCA + TGTGAAATTTTCATCTACATTGTGAGCCAGCCTACTTTTACACAGTAAGCGAAAGCTGGCTGGACATATCAGAGTTGCAATGGGGATTGACCAAATCAATTCTGACTCCTGTTACATGTTGC + >LOCOs07g23169_AZUCENA_1 + ACTAGTGAAGTGTTGCCCGTTGGGTTTGGTGGAGTTTGAAGAGCTCTGTCGCCGTCTTCACTCCTCGCAGTCAGACGGGAGTGGACTACTGGAGGGAGAGAGAGGGTGAGCGAGGTGTGGGA + GCTGGTTTTGCTCAGAGAGCGGGCGCGTGTTCGCCGCGGGTCTCAACGACTTCGGGCAGCTCGGGATAGGCTCCTCCGTGACTCATTCCCTGGTACTGAGCTTCTTGTACATCATGCCTCCA + TGTGAAATTTTCATCTACATTGTGAGCCAGCCTACTTTTACACAGTAAGCGAAAGCTGGCTGGACATATCAGAGTTGCAATGGGGATTGACCAAATCAATTCTGACTCCTGTTACATGTTGC + +Output files +============ + +Diversity +--------- + +:: + + LOCOs07g23169;8;10494;10494;2;7.35039e-05;8.16793e-05;0.414213;2;0.428571;0.857143;0;1; + LOCOs11g09160;8;6577;6577;2;0.00011728;0.000130324;0.414213;2;0.428571;0.857143;0;1; + + + </help> + <citations> + <!-- [HELP] As DOI or BibTex entry --> + <citation type="bibtex">@article{Dereeper03062015, +author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, +title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, +year = {2015}, +doi = {10.1093/nar/gkv351}, +abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, +URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, +eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, +journal = {Nucleic Acids Research} +} + + </citation> + + </citations> +</tool> |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/bin/eggstats |
b |
Binary file egglib/egglib-2.1.5/bin/eggstats has changed |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/Align.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/Align.hpp Fri Jul 10 04:39:30 2015 -0400 |
[ |
b'@@ -0,0 +1,378 @@\n+/*\r\n+ Copyright 2008-2009 St\xc3\xa9phane De Mita, Mathieu Siol\r\n+\r\n+ This file is part of the EggLib library.\r\n+\r\n+ EggLib is free software: you can redistribute it and/or modify\r\n+ it under the terms of the GNU General Public License as published by\r\n+ the Free Software Foundation, either version 3 of the License, or\r\n+ (at your option) any later version.\r\n+\r\n+ EggLib is distributed in the hope that it will be useful,\r\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+ GNU General Public License for more details.\r\n+\r\n+ You should have received a copy of the GNU General Public License\r\n+ along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n+*/\r\n+\r\n+\r\n+#ifndef EGGLIB_ALIGN_HPP\r\n+#define EGGLIB_ALIGN_HPP\r\n+\r\n+#include "Container.hpp"\r\n+#include "CharMatrix.hpp"\r\n+#include <vector>\r\n+\r\n+/** \\mainpage Summary\r\n+ * \r\n+ * This is the automatically-generated reference manual of the C++\r\n+ * egglib-cpp library. The library is presented as several modules, but\r\n+ * note that they are only used to structure the documentation.\r\n+ * \r\n+ * There is a single namespace (egglib) in which all classes are\r\n+ * defined. See an example of programming with egglib-cpp in the\r\n+ * EggLib package main documentation. Use "Modules" or "Classes" above\r\n+ * to navigate in the library reference manual.\r\n+ * \r\n+ */\r\n+\r\n+\r\n+/** \\defgroup core core\r\n+ *\r\n+ * \\brief Central core of the C++ library of Egglib\r\n+ *\r\n+ * Data storage classes, parsers/formatters and tools, plus exception\r\n+ * types.\r\n+ * \r\n+ */\r\n+\r\n+namespace egglib {\r\n+\r\n+\r\n+ /** \\brief Handles a sequence alignment\r\n+ *\r\n+ * \\ingroup core\r\n+ * \r\n+ * Creation from a file or string stream should be performed using\r\n+ * the class Fasta. Align objects can be created by deep copy from\r\n+ * both Align and Container type. In the latter case, the length are\r\n+ * artificially equalized by "?" characters. Align objects can be\r\n+ * created from a DataMatrix object (and all the way arround) using\r\n+ * the specific class DMAConverter.\r\n+ *\r\n+ * Sequences are represented by two strings (name and sequence) and\r\n+ * an integer (group) that can be accessed or modified by index.The\r\n+ * order of sequences is guaranteed to be conserved, as if Align was\r\n+ * a list of triplets (name, sequence, group).\r\n+ *\r\n+ * The data matrix is implemented as continuous array (char**) and\r\n+ * allows efficient access and modification of data. For very large\r\n+ * data matrices you might claim immediately the required memory\r\n+ * using the constructor Align(unsigned int, char**).\r\n+ * \r\n+ */\r\n+ class Align : public Container, public CharMatrix {\r\n+ public:\r\n+ \r\n+ /** \\brief Creates an empty alignment\r\n+ * \r\n+ */\r\n+ Align();\r\n+\r\n+\r\n+ /** \\brief Creates an alignment from a data matrix.\r\n+ * \r\n+ * Allows you to create an object from data stored in a char*\r\n+ * array. The array\'s dimensions must be passed to the\r\n+ * constructor, and as a result there is not need to\r\n+ * terminate each sequence by a NULL character.\r\n+ * \r\n+ * \\param number_of_sequences the number of sequences (the\r\n+ * length of the first dimension of the array).\r\n+ * \r\n+ * \\param alignment_length the length of sequences (the\r\n+ * length of all lines of the array).\r\n+ * \r\n+ * \\param cstring_array the pointer to the data matrix.\r\n+ * \r\n+ */\r\n+ Align(unsigned int number_of_sequences, unsigned int alignment_length, char const * const * const cstring_array);\r\n+\r\n+\r\n+ /** \\brief Creates an alignment with given dimensions\r\n+ * \r\n+ * Allows you to allocate directly a data matrix of a given\r\n+ '..b'm position the position in the alignment (column).\r\n+ * \\param ch the new character value.\r\n+ */\r\n+ virtual void set(unsigned int sequence, unsigned position, char ch);\r\n+\r\n+\r\n+ /** \\brief Reverse a given column in binary data\r\n+ *\r\n+ * The specified column must contain only "0" ans "1" characters.\r\n+ * "0" is replaced by "1" and all the way around\r\n+ * \r\n+ */\r\n+ void binSwitch(unsigned int pos);\r\n+\r\n+\r\n+ /** \\brief Extracts specified positions (columns) of the alignment\r\n+ *\r\n+ * All the specified sites are extracted in the specified\r\n+ * order. This function is suitable for bootstrap (resample\r\n+ * allowing redrawing the same site) and permutations.\r\n+ * \r\n+ * This function doesn\'t perform out-of-bound checking.\r\n+ * \r\n+ * \\param list_of_sites a vector containing alignment\r\n+ * positions.\r\n+ * \r\n+ * \\return A copy of the object containing the specified\r\n+ * set of positions.\r\n+ * \r\n+ */\r\n+ Align vslice(std::vector<unsigned int> list_of_sites);\r\n+\r\n+\r\n+ /** \\brief Extracts a range of positions (columns)\r\n+ * \r\n+ * \\param a the first position.\r\n+ * \r\n+ * \\param b the index immediately passed the last sequence to\r\n+ * extract.\r\n+ * \r\n+ * \\return A copy of the object containing the specified\r\n+ * range of sequences.\r\n+ * \r\n+ * Positions a to b-1 are extracted, provided that the\r\n+ * indices fit in the current length of sequences. To extract\r\n+ * all sequences, use align.vslice(0, align.ls()).\r\n+ * \r\n+ * Note: invalid ranges will be silently supported. If\r\n+ * a>=ls or b<=a, an empty object is returned. If b>ns,\r\n+ * ls will be substituted to a.\r\n+ */\r\n+ Align vslice(unsigned int a, unsigned int b);\r\n+\r\n+\r\n+ /** \\brief Deletes all the content of the object\r\n+ * \r\n+ */\r\n+ virtual void clear();\r\n+\r\n+\r\n+ /** \\brief Same as ns()\r\n+ * \r\n+ */\r\n+ inline unsigned int numberOfSequences() const {\r\n+ return _ns;\r\n+ }\r\n+\r\n+\r\n+ /** \\brief Same as ls()\r\n+ * \r\n+ */\r\n+ inline unsigned int numberOfSites() const {\r\n+ return _ls;\r\n+ }\r\n+\r\n+\r\n+ /** \\brief Gets a group label (insecure)\r\n+ * \r\n+ */\r\n+ inline unsigned int populationLabel(unsigned int sequenceIndex) const {\r\n+ return groups[sequenceIndex];\r\n+ }\r\n+ \r\n+ \r\n+ /** \\brief Just return the passed value\r\n+ *\r\n+ */\r\n+ inline double sitePosition(unsigned int position) const {\r\n+ return (double) position;\r\n+ }\r\n+\r\n+\r\n+ protected:\r\n+ \r\n+ /// This function is not available for alignments\r\n+ virtual void appendSequence(unsigned int pos, const char* sequence) {}\r\n+\r\n+ // Initializer (creates a valid empty alignment)\r\n+ virtual void init();\r\n+ \r\n+ // Makes a deep copy of the specified data matrix - if cstring_array is NULL, then ignores it and pads with ?\'s\r\n+ virtual void setFromSource(unsigned int number_of_sequences, unsigned int alignment_length, const char* const * const cstring_array);\r\n+\r\n+ // Copies from a Container\r\n+ virtual void copyObject(const Container&);\r\n+ \r\n+ // Copies from an Align\r\n+ virtual void copyObject(const Align&);\r\n+ \r\n+ // Alignment length\r\n+ unsigned int _ls;\r\n+ };\r\n+}\r\n+\r\n+#endif\r\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/Arg.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/Arg.hpp Fri Jul 10 04:39:30 2015 -0400 |
[ |
b'@@ -0,0 +1,300 @@\n+/*\r\n+ Copyright 2009-2010 St\xe9phane De Mita, Mathieu Siol\r\n+\r\n+ This file is part of the EggLib library.\r\n+\r\n+ EggLib is free software: you can redistribute it and/or modify\r\n+ it under the terms of the GNU General Public License as published by\r\n+ the Free Software Foundation, either version 3 of the License, or\r\n+ (at your option) any later version.\r\n+\r\n+ EggLib is distributed in the hope that it will be useful,\r\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+ GNU General Public License for more details.\r\n+\r\n+ You should have received a copy of the GNU General Public License\r\n+ along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n+*/\r\n+\r\n+\r\n+#ifndef EGGLIB_ARG_HPP\r\n+#define EGGLIB_ARG_HPP\r\n+\r\n+\r\n+#include "Current.hpp"\r\n+#include "Edge.hpp"\r\n+#include <string>\r\n+\r\n+\r\n+/** \\defgroup coalesce coalesce\r\n+ *\r\n+ * \\brief Coalescent simulator\r\n+ *\r\n+ * The set of classes implements a three-scale coalescent simulator with\r\n+ * recombination, and a flexible mutation model. The main classes are\r\n+ * Controller (the starting point for generating genealogies), ParamSet\r\n+ * (that centralizes parameter specification), the Change hierarchy\r\n+ * (that implements demographic change specifications), Arg (ancestral\r\n+ * recombination graph; the result of generation a genealogy) and\r\n+ * Mutator (that generates genotype data from an ARG).\r\n+ * \r\n+ */\r\n+\r\n+\r\n+namespace egglib {\r\n+ \r\n+ class Random;\r\n+\r\n+ /** \\brief Ancestral recombination graph\r\n+ *\r\n+ * \\ingroup coalesce\r\n+ * \r\n+ * This class stores the ARG (genealogical information). It is\r\n+ * progressively built by appropriate (especially regarding to the\r\n+ * timing) calls to coal() and recomb() methods. Then it can be\r\n+ * used by a mutator class to generates data, or it can also\r\n+ * generate newick trees (one tree by non-recombining segment).\r\n+ *\r\n+ */\r\n+ class Arg {\r\n+\r\n+ public:\r\n+ \r\n+ /** \\brief Default constructor\r\n+ *\r\n+ * Creates a null, useless, object.\r\n+ *\r\n+ */\r\n+ Arg();\r\n+\r\n+ \r\n+ /** \\brief Object initialization\r\n+ * \r\n+ * \\param current address of the Current instance used by\r\n+ * the simulator.\r\n+ * \r\n+ * \\param numberOfSegments number of recombining segments.\r\n+ * \r\n+ */\r\n+ void set(Current* current, unsigned int numberOfSegments);\r\n+\r\n+ \r\n+ /** \\brief Object reset method\r\n+ * \r\n+ * This method doesn\'t reset all parameters (the number of\r\n+ * segments and associated tables are retained, as well as\r\n+ * the Edge object pool).\r\n+ * \r\n+ * \\param current address of the Current instance used by\r\n+ * the simulator.\r\n+ * \r\n+ */\r\n+ void reset(Current* current);\r\n+\r\n+\r\n+ /** \\brief Standard constructor\r\n+ * \r\n+ * \\param current address of the Current instance used by\r\n+ * the simulator.\r\n+ * \r\n+ * \\param numberOfSegments number of recombining segments\r\n+ *\r\n+ */\r\n+ Arg(Current* current, unsigned int numberOfSegments);\r\n+\r\n+ \r\n+ /** \\brief Destructor\r\n+ * \r\n+ * Clears all Edge instances referenced in the object.\r\n+ * \r\n+ */\r\n+ virtual ~Arg();\r\n+ \r\n+\r\n+ /** \\brief Gets the current value of the time counter\r\n+ * \r\n+ */ \r\n+ double time() const;\r\n+ \r\n+\r\n+ /** \\brief Increments the time counter\r\n+ * \r\n+ */\r\n+ void addTime(double increment);\r\n+ \r\n+\r\n+ /** \\brief Performs a coalescence ev'..b'\n+ * \r\n+ * The uMRCA is the ultimate Most Recent Common Ancestor,\r\n+ * that is the point where the last segment finds its most\r\n+ * recent common ancestor. This member will have a meaningful\r\n+ * value only if the coalescent process is completed.\r\n+ * \r\n+ */\r\n+ inline double ageUltimateMRCA() const {\r\n+ return _time;\r\n+ }\r\n+ \r\n+\r\n+ /** \\brief Age of the MRCA for a given segment\r\n+ * \r\n+ * The MRCA is the Most Recent Common Ancestor, that is the\r\n+ * point where the coalescent process is over (all lineages\r\n+ * have coalesced). This member will have a meaningful\r\n+ * value only if the coalescent process is completed.\r\n+ * \r\n+ * Note that the value is cached; it is computed only one\r\n+ * upon first call and no again, even if the Arg is modified<\r\n+ * \r\n+ */\r\n+ inline double ageMRCA(unsigned int segmentIndex) {\r\n+ return _MRCA[segmentIndex]->bottom;\r\n+ }\r\n+\r\n+ /** \\brief MRCA for each segment\r\n+ * \r\n+ * The MRCA is the Most Recent Common Ancestor, that is the\r\n+ * point where the coalescent process is over (all lineages\r\n+ * have coalesced). This member will have a meaningful\r\n+ * value only if the coalescent process is completed.\r\n+ * \r\n+ * Note that the value is cached; it is computed only one\r\n+ * upon first call and no again, even if the Arg is modified\r\n+ * \r\n+ */\r\n+ inline const Edge* MRCA(unsigned int segmentIndex) {\r\n+ return _MRCA[segmentIndex];\r\n+ }\r\n+\r\n+ /// Ultimate MRCA\r\n+ \r\n+ inline const Edge* uMRCA() {\r\n+ return edges[numberOfEdges-1];\r\n+ }\r\n+ \r\n+ \r\n+ /// the number of recombining segments\r\n+ unsigned int numberOfSegments;\r\n+\r\n+ /** \\brief Formats the newick-formatted tree for a segment\r\n+ * \r\n+ */\r\n+ std::string newick(unsigned int segment);\r\n+ \r\n+\r\n+ /// Number of initial lineages\r\n+ unsigned int numberOfSamples;\r\n+\r\n+\r\n+ /** \\brief Total tree length (summed over all segments)\r\n+ * \r\n+ */\r\n+ double totalLength;\r\n+\r\n+ /** \\brief Segment-specific tree length\r\n+ * \r\n+ */\r\n+ double* segmentLengths;\r\n+\r\n+ /// Current number of Edges in the tree (including the MRCA node)\r\n+ unsigned int numberOfEdges;\r\n+\r\n+ /// Total number of recombination events that occurred\r\n+ unsigned int numberOfRecombinationEvents;\r\n+ \r\n+ /// Set the number of actual sites in all branches\r\n+ void set_actualNumberOfSites(unsigned int actualNumberOfSites);\r\n+ \r\n+ \r\n+ private:\r\n+ \r\n+ /// Copy constructor not available\r\n+ Arg(const Arg&) { }\r\n+ \r\n+ /// Assignment operator not available\r\n+ Arg& operator=(const Arg&) { return *this; }\r\n+\r\n+ void init_stable_parameters();\r\n+ void init_variable_parameters();\r\n+ void clear();\r\n+ void addEdge(Edge*);\r\n+ std::string rnewick(Edge* edge, unsigned int segment, double cache);\r\n+\r\n+ Current* current;\r\n+ double _time;\r\n+ Edge** edges;\r\n+ \r\n+ void findMRCA(unsigned int segmentIndex);\r\n+ void computeTotalLength();\r\n+ void computeSegmentLength(unsigned int segmentIndex);\r\n+\r\n+ unsigned int* numberOfEdgesPerSegment;\r\n+ Edge** _MRCA;\r\n+ \r\n+ EdgePool edgePool;\r\n+ };\r\n+\r\n+}\r\n+\r\n+#endif\r\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/BaseDiversity.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/BaseDiversity.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,169 @@ +/* + Copyright 2009 St�phane De Mita, Mathieu Siol + + This file is part of the EggLib library. + + EggLib is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EggLib is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with EggLib. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef EGGLIB_BASEDIVERSITY_HPP +#define EGGLIB_BASEDIVERSITY_HPP + +#include "CharMatrix.hpp" +#include "SitePolymorphism.hpp" +#include <string> + +/** \defgroup polymorphism polymorphism + * + * \brief Diversity analyses + * + * Two classes are contained in this module: NucleotideDiversity, that + * performs site-centered polymorphism analyses, and HaplotypeDiversity, + * that performs haplotype-centered analyses. The detection of + * polymorphic sites is common to both, through the base class + * BaseDiversity. However this phase must be repeated when stats from + * the two classes are needed. To reduce the computational burden, the + * function reserve() can be use, that directly allocates needed memory + * when the eventual number of polymorphic sites is known prior to + * analysis (even if not precisely). For both classes, a set of + * statistics are computed immediately upon load of a data set. For + * NucleotideDiversity, additional statistics are computed per group + * upon use of the corresponding accessors. This number of operations + * performed several times is strictly limited. This is particularly + * useful when different statistics are needed for a given alignment. + * However, this system allows not computing unnecessary statistics to + * a certain extend. + * + */ + +namespace egglib { + + /** \brief Base class of diversity classes + * + * Mutualizes the analysis of polymorphic sites through the method + * importSites() and related accessors. + * + * \ingroup polymorphism + * + */ + class BaseDiversity { + + public: + + /** \brief Constructor + * + */ + BaseDiversity(); + + /** \brief Destructor + * + */ + virtual ~BaseDiversity(); + + /** \brief Reserve sufficient memory for a given number of + * polymorphic sites. + * + * This method makes importSite function faster when you + * already know how many polymorphic sites to expect, since + * the necessary memory will be allocated prior the screening + * of data. It is possible to use reserve() even if with a + * number of sites that is not matching what importSites() + * will find. + * + * \param numberOfSites a strictly positive integer. + * + */ + virtual void reserve(unsigned int numberOfSites); + + /// Gets a site + const SitePolymorphism* get_site(unsigned int index) const; + + /// Gets a site position + unsigned int get_position(unsigned int index) const; + + /** \brief Predefined mapping string for DNA data + * + */ + static const std::string dnaMapping; + + + /** \brief Predefined mapping string for RNA data + * + */ + static const std::string rnaMapping; + + + /** \brief Predefined mapping string for amino acid data + * + */ + static const std::string aaMapping; + + + /// Clears and re-initializes object + virtual void reset(); + + + protected: + + virtual void init(); + virtual void clear(); + + // + void importSites(CharMatrix& data, bool allowMultipleMutations, + double minimumExploitableData, unsigned int ignoreFrequency, + std::string characterMapping, bool useZeroAsAncestral, + bool ignoreOutgroup); + + // + void analyzeSite(CharMatrix& data, unsigned int index, double maxMissingData, bool ignoreOutgroup); // analyzes a site, adds a Site to the Site container if the site is polymorphic + unsigned int getPopIndex(unsigned int label) const; // returns v_npop if not found + + SitePolymorphism** v_sites; // holder of polymorphic site addresses + bool* v_orientables; // stores whether the sites are orientable or not + unsigned int* v_sitePositions; // stores position of sites + + unsigned int v_reserved; + unsigned int v_ns; // maximum number of sequences analyzed (max of sites' ns) + unsigned int v_S; // number of polymorphic sites + unsigned int v_So; // number of orientable sites + unsigned int v_eta; // number of mutation (whatever multiple) + double v_nseff; // average number of analyzed sequence + unsigned int v_lseff; // number of analyzed sites + double v_nseffo; // average number of analyzed sequences for analyzes with outgroup + unsigned int v_lseffo; // number of analyzed sites for analyzes with outgroup + unsigned int v_npop; // number of populations + unsigned int *v_popLabel; // label of each pop + + // options + bool p_allowMultipleMutations; + double p_minimumExploitableData; + std::string p_characterMapping; + unsigned int p_pos_sep_mapping; + bool p_useZeroAsAncestral; + unsigned int p_ignoreFrequency; + + + + private: + + BaseDiversity(const BaseDiversity& source) { } + + BaseDiversity& operator=(const BaseDiversity& source) { + return *this; + } + + }; +} + +#endif |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/ChangeTypes.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/ChangeTypes.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
b'@@ -0,0 +1,428 @@\n+/*\r\n+ Copyright 2009-2010 St\xe9phane De Mita, Mathieu Siol\r\n+\r\n+ This file is part of the EggLib library.\r\n+\r\n+ EggLib is free software: you can redistribute it and/or modify\r\n+ it under the terms of the GNU General Public License as published by\r\n+ the Free Software Foundation, either version 3 of the License, or\r\n+ (at your option) any later version.\r\n+\r\n+ EggLib is distributed in the hope that it will be useful,\r\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+ GNU General Public License for more details.\r\n+\r\n+ You should have received a copy of the GNU General Public License\r\n+ along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n+*/\r\n+\r\n+#ifndef EGGLIB_CHANGETYPES_HPP\r\n+#define EGGLIB_CHANGETYPES_HPP\r\n+\r\n+#include "ParamSet.hpp"\r\n+#include "Controller.hpp"\r\n+\r\n+namespace egglib {\r\n+\r\n+/**********************************************************************/\r\n+\r\n+ /** \\brief Pure virtual base class for parameter changes\r\n+ *\r\n+ * \\ingroup coalesce\r\n+ *\r\n+ */\r\n+ class Change {\r\n+ public:\r\n+ \r\n+ /** \\brief Default constructor\r\n+ *\r\n+ * The default date is 0.\r\n+ *\r\n+ */\r\n+ Change();\r\n+ \r\n+ /** \\brief Standard constructor\r\n+ *\r\n+ * \\param date the event date.\r\n+ *\r\n+ */\r\n+ Change(double date);\r\n+\r\n+ /// Gets the event date value\r\n+ double date() const;\r\n+ \r\n+ /// Sets the event date value\r\n+ void date(double value);\r\n+ \r\n+ /** \\brief Applies the event\r\n+ *\r\n+ * \\param paramSet the ParamSet instance to which the Change\r\n+ * instance is attached.\r\n+ * \\param controller the Controller instance of the\r\n+ * simulation.\r\n+ *\r\n+ */\r\n+ virtual void apply(ParamSet* paramSet, Controller* controller) const = 0;\r\n+ \r\n+ protected:\r\n+ double _date;\r\n+ \r\n+ };\r\n+\r\n+/**********************************************************************/\r\n+\r\n+ /** \\brief Pure virtual base class for single parameter changes\r\n+ *\r\n+ * \\ingroup coalesce\r\n+ *\r\n+ */\r\n+ class SingleParamChange : public Change {\r\n+ public:\r\n+ \r\n+ /** \\brief Default constructor\r\n+ *\r\n+ * The default date is 0., the default parameter value is 0.\r\n+ *\r\n+ */\r\n+ SingleParamChange();\r\n+ \r\n+ /** \\brief Standard constructor\r\n+ *\r\n+ * \\param date the event date.\r\n+ * \\param value the parameter value.\r\n+ *\r\n+ */\r\n+ SingleParamChange(double date, double value);\r\n+\r\n+ /// Gets the parameter value\r\n+ double value() const;\r\n+ \r\n+ /// Sets the parameter value\r\n+ void value(double value);\r\n+\r\n+ protected:\r\n+ double _value;\r\n+\r\n+ };\r\n+\r\n+/**********************************************************************/\r\n+\r\n+ /** \\brief Single parameter changes applied to a single population\r\n+ *\r\n+ * \\ingroup coalesce\r\n+ *\r\n+ */\r\n+ class PopulationParamChange : public SingleParamChange {\r\n+ public:\r\n+\r\n+ /** \\brief Default constructor\r\n+ *\r\n+ * The default date is 0., the default parameter value is 0.,\r\n+ * the default population is 0\r\n+ *\r\n+ */\r\n+ PopulationParamChange();\r\n+ \r\n+ /** \\brief Standard constructor\r\n+ *\r\n+ * \\param date the event date.\r\n+ * \\param population the population index.\r\n+ * \\param value the parameter value.\r\n+ *\r\n+ */\r\n+ PopulationParamCha'..b' */\r\n+ PopulationSplit();\r\n+\r\n+ /** \\brief Standard constructor\r\n+ *\r\n+ * A the time given by date, the specified population is\r\n+ * split in two. An additional population (whose index is\r\n+ * incremented from the current total number of population)\r\n+ * is created and lineages are randomly picked and moved to\r\n+ * the new population. The parameter proba gives the\r\n+ * probability that a lineage from the population number pop\r\n+ * moves instantly to the new population. If proba is 0,\r\n+ * the program emulates the creation of an empty population\r\n+ * (thinking forward in time, this is a population\r\n+ * extinction). In general, forward in time, this is a\r\n+ * population fusion.\r\n+ * \r\n+ * \\param date the date of the event.\r\n+ * \\param pop population index.\r\n+ * \\param proba the probability that lineages move to the\r\n+ * new population.\r\n+ *\r\n+ */\r\n+ PopulationSplit(double date, unsigned int pop, double proba);\r\n+ \r\n+ void apply(ParamSet* paramSet, Controller* controller) const;\r\n+ \r\n+ /// Gets the population index\r\n+ unsigned int population() const;\r\n+ \r\n+ /// Sets the population index\r\n+ void population(unsigned int);\r\n+ \r\n+ /// Gets the probability of instant migration\r\n+ double probability() const;\r\n+\r\n+ /// Sets the probability of instant migration\r\n+ void probability(double);\r\n+ \r\n+ protected:\r\n+ unsigned int _population;\r\n+ double _probability;\r\n+ };\r\n+\r\n+/**********************************************************************/\r\n+\r\n+ /** \\brief Change of the migration rate of all population pairs\r\n+ *\r\n+ * The parameter is the new rate (applied to all population pairs)\r\n+ *\r\n+ * \\ingroup coalesce\r\n+ *\r\n+ */\r\n+ class AllMigrationRateChange : public SingleParamChange {\r\n+ public:\r\n+ AllMigrationRateChange(double date, double value) : SingleParamChange(date, value) {}\r\n+ void apply(ParamSet* paramSet, Controller* controller) const;\r\n+ };\r\n+\r\n+/**********************************************************************/\r\n+ \r\n+ /** \\brief Change of a single migration rate\r\n+ *\r\n+ * \\ingroup coalesce\r\n+ *\r\n+ */\r\n+ class SingleMigrationRateChange : public SingleParamChange {\r\n+ public:\r\n+ /** \\brief Default constructor\r\n+ *\r\n+ * The default date is 0., the default parameter value is 0.,\r\n+ * the default source population is 0, the default\r\n+ * destination population 1.\r\n+ *\r\n+ */\r\n+ SingleMigrationRateChange();\r\n+\r\n+ /** \\brief Standard constructor\r\n+ *\r\n+ * \\param date the date of the event.\r\n+ * \\param source index of the source population.\r\n+ * \\param dest index of the destination population.\r\n+ * \\param migr new value of the pairwise migration rate.\r\n+ *\r\n+ */\r\n+ SingleMigrationRateChange(double date, unsigned int source, unsigned int dest, double migr);\r\n+ \r\n+ /// Gets the source population index\r\n+ unsigned source() const;\r\n+ \r\n+ /// Sets the source population index\r\n+ void source(unsigned int);\r\n+\r\n+ /// Gets the dest population index\r\n+ unsigned dest() const;\r\n+ \r\n+ /// Sets the dest population index\r\n+ void dest(unsigned int);\r\n+\r\n+ void apply(ParamSet* paramSet, Controller* controller) const;\r\n+ \r\n+ protected:\r\n+ unsigned int _source;\r\n+ unsigned int _dest;\r\n+ };\r\n+}\r\n+\r\n+#endif\r\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/CharMatrix.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/CharMatrix.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,70 @@ +/* + Copyright 2009 St�phane De Mita, Mathieu Siol + + This file is part of the EggLib library. + + EggLib is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EggLib is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with EggLib. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef EGGLIB_CHARMATRIX_HPP +#define EGGLIB_CHARMATRIX_HPP + + +namespace egglib { + + /** \brief Interface for classes usable as a square matrix of characters + * + * \ingroup core + * + */ + class CharMatrix { + + public: + + /** \brief Gets number of rows or sequences + * + */ + virtual unsigned int numberOfSequences() const = 0; + + + /** \brief Gets number of columns or sites + * + */ + virtual unsigned int numberOfSites() const = 0; + + + /** \brief Gets the character at a given position + * + * The accessor should be "fast" and does not guarantee to + * perform out-of-bounds checks + * + */ + virtual char character(unsigned int sequence, unsigned int site) const = 0; + + + /** \brief Gets population index + * + */ + virtual unsigned int populationLabel(unsigned int row) const = 0; + + + /** \brief Get site position + * + */ + virtual double sitePosition(unsigned int column) const = 0; + + }; +} + +#endif |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/Consensus.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/Consensus.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
b'@@ -0,0 +1,421 @@\n+/*\r\n+ Copyright 2008-2009 St\xe9phane De Mita, Mathieu Siol\r\n+\r\n+ This file is part of the EggLib library.\r\n+\r\n+ EggLib is free software: you can redistribute it and/or modify\r\n+ it under the terms of the GNU General Public License as published by\r\n+ the Free Software Foundation, either version 3 of the License, or\r\n+ (at your option) any later version.\r\n+\r\n+ EggLib is distributed in the hope that it will be useful,\r\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+ GNU General Public License for more details.\r\n+\r\n+ You should have received a copy of the GNU General Public License\r\n+ along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n+*/\r\n+\r\n+#ifndef EGGLIB_CONSENSUS_HPP\r\n+#define EGGLIB_CONSENSUS_HPP\r\n+\r\n+#include "Align.hpp"\r\n+#include <sstream>\r\n+#include <string>\r\n+#include <vector>\r\n+\r\n+namespace egglib {\r\n+\r\n+ /** \\brief Generates consensus sequences\r\n+ *\r\n+ * \\ingroup polymorphism\r\n+ * \r\n+ *\r\n+ * A consensus is generated when two sequences have the same name, \r\n+ * ignoring everything after the first separator character (by\r\n+ * default, "_"). Hence, the names "foo", "foo_goo" and "foo_third"\r\n+ * will be treated as identical and the root will be "foo". The root\r\n+ * will be used to name the resulting sequence. Note that the\r\n+ * class works only for DNA sequences.\r\n+ *\r\n+ * Symbol convention:\r\n+ * - A: adenosine\r\n+ * - C: cytosine\r\n+ * - G: guanine\r\n+ * - T: thymine\r\n+ * - M: A or C\r\n+ * - R: A or G\r\n+ * - W: A or T (weak)\r\n+ * - S: C or G (strong)\r\n+ * - Y: C or T\r\n+ * - K: G or T\r\n+ * - B: C or G or T(not A)\r\n+ * - D: A or G or T (not C)\r\n+ * - H: A or C or T (not G)\r\n+ * - V: A or C or G (not T)\r\n+ * - N: A or C or G or T\r\n+ * - ?: nonsequenced position\r\n+ * \r\n+ * Other symbols will be treated as ? (lowercase are supported).\r\n+ * \r\n+ * Rigorous (alias liberal or strong) mode:\r\n+ * - If two characters are the same, it is retained whatever it is\r\n+ * (A + A = A)\r\n+ * - Otherwise:\r\n+ * - If one is the missing character (?) the other is retained\r\n+ * whatever it is (A + ? = A).\r\n+ * - If characters are consistent, that is one contains\r\n+ * more information, that one is retained (A + M = A).\r\n+ * - If characters are not consistent, the closest \r\n+ * generic symbol is retained (A + C = M).\r\n+ * .\r\n+ * Note that the feedback of inconsistent characters in the\r\n+ * outcome is not garanteed.\r\n+ * In fact, (A + A + G) will result in R (as expected) but (A +\r\n+ * G + A) will result in A, masking the problem.\r\n+ * However, the position will indeed be counted as inconsistent.\r\n+ * \r\n+ * Not rigorous (conservative/weak) mode:\r\n+ * - If two characters are the same, it is retained whatever it\r\n+ * is (A + A = A).\r\n+ * - Otherwise:\r\n+ * - If one is ? the other is retained whatever it is (A + ?\r\n+ * = A).\r\n+ * - Otherwise an inconsistent character (by default, Z) is\r\n+ * retained (A + C = Z).\r\n+ * \r\n+ * Iterative process of consensus:\r\n+ * - Each sequence is taken in turn.\r\n+ * - Each pair involving the focus sequence is processed and a\r\n+ * consensus is generated.\r\n+ * - When all pair have been processsed, the consensus already\r\n+ * generated are themselves iteratively processed until only one\r\n+ * remains.\r\n+ * - Note that at each time the last two are taken first.\r\n+ * \r\n+ * A transparent interface gives access to the data for all steps of\r\n+ * the consensus process, as vectors that covers all pairs (including\r\n+ * intermediate steps of the iterative procedure described above) as\r\n+ * well as singleton sequences. For the latter'..b'\r\n+ // Code for disgrement\r\n+ char DISAGREEMENT;\r\n+ \r\n+ public:\r\n+ // This class manages relationships different symbols\r\n+ class CharacterContainer {\r\n+ public:\r\n+ // Default value: @\r\n+ CharacterContainer();\r\n+ \r\n+ // Initiates to a given symbol\r\n+ CharacterContainer(const char&);\r\n+ \r\n+ // Assignment operator\r\n+ CharacterContainer& operator=(const char&);\r\n+ \r\n+ // Sets the symbol\r\n+ void setValue(char);\r\n+ \r\n+ // Set the descendants\r\n+ void setSons(std::vector<CharacterContainer>);\r\n+ \r\n+ // Tests whether the symbol is the same\r\n+ bool is(CharacterContainer);\r\n+ \r\n+ // Tests if the query is contained amongst the sons\r\n+ bool has(CharacterContainer);\r\n+ \r\n+ // Tests if the query is contained amongst the sons\r\n+ bool has(char);\r\n+ \r\n+ /* Tests whether the left character has the left one\r\n+ * Should be called on the N object only.\r\n+ */\r\n+ char lhas(CharacterContainer,CharacterContainer);\r\n+ \r\n+ /* Creates the object with the proper sons\r\n+ * Should be called on the N object only.\r\n+ */\r\n+ CharacterContainer init(char);\r\n+ \r\n+ // The symbol\r\n+ char value;\r\n+ \r\n+ // The descendants\r\n+ std::vector<CharacterContainer> sons;\r\n+ };\r\n+\r\n+ private:\r\n+ // Symbol ?\r\n+ CharacterContainer ccQ;\r\n+ \r\n+ // Symbol A\r\n+ CharacterContainer ccA;\r\n+ \r\n+ // Symbol C\r\n+ CharacterContainer ccC;\r\n+ \r\n+ // Symbol G\r\n+ CharacterContainer ccG;\r\n+ \r\n+ // Symbol T\r\n+ CharacterContainer ccT;\r\n+ \r\n+ // Symbol U\r\n+ CharacterContainer ccU;\r\n+ \r\n+ // Symbol M\r\n+ CharacterContainer ccM;\r\n+ \r\n+ // Symbol R\r\n+ CharacterContainer ccR;\r\n+ \r\n+ // Symbol W\r\n+ CharacterContainer ccW;\r\n+ \r\n+ // Symbol S\r\n+ CharacterContainer ccS;\r\n+ \r\n+ // Symbol Y\r\n+ CharacterContainer ccY;\r\n+ \r\n+ // Symbol K\r\n+ CharacterContainer ccK;\r\n+ \r\n+ // Symbol B\r\n+ CharacterContainer ccB;\r\n+ \r\n+ // Symbol D\r\n+ CharacterContainer ccD;\r\n+ \r\n+ // Symbol H\r\n+ CharacterContainer ccH;\r\n+ \r\n+ // Symbol V\r\n+ CharacterContainer ccV;\r\n+ \r\n+ // Symbol N\r\n+ CharacterContainer ccN;\r\n+ \r\n+ // Symbol -\r\n+ CharacterContainer ccGAP;\r\n+ };\r\n+ };\r\n+}\r\n+\r\n+#endif\r\n+\r\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/Container.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/Container.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
b"@@ -0,0 +1,326 @@\n+/*\r\n+ Copyright 2008-2009 St\xe9phane De Mita, Mathieu Siol\r\n+ \r\n+ This file is part of the EggLib library.\r\n+\r\n+ EggLib is free software: you can redistribute it and/or modify\r\n+ it under the terms of the GNU General Public License as published by\r\n+ the Free Software Foundation, either version 3 of the License, or\r\n+ (at your option) any later version.\r\n+\r\n+ EggLib is distributed in the hope that it will be useful,\r\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+ GNU General Public License for more details.\r\n+\r\n+ You should have received a copy of the GNU General Public License\r\n+ along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n+*/\r\n+\r\n+\r\n+#ifndef EGGLIB_CONTAINER_HPP\r\n+#define EGGLIB_CONTAINER_HPP\r\n+\r\n+\r\n+namespace egglib {\r\n+\r\n+ /** \\brief Handles a set of sequence alignment (aligned or not)\r\n+ *\r\n+ * \\ingroup core\r\n+ * \r\n+ * Creation from a file or string stream should be performed using\r\n+ * the class Fasta.\r\n+ *\r\n+ * Sequences are represented by two strings (name and sequence) and\r\n+ * an integer (group) that can be accessed or modified by index.The\r\n+ * order of sequences is guaranteed to be conserved, as if Container\r\n+ * was a list of triplets (name, sequence, group).\r\n+ *\r\n+ * The data matrix is implemented as continuous arrays (char**) and\r\n+ * allows efficient access and modification of data. For very large\r\n+ * data matrices you might claim immediately the required memory\r\n+ * using the constructor Container(unsigned int, char**).\r\n+ *\r\n+ */\r\n+ class Container {\r\n+ \r\n+ public:\r\n+ \r\n+ /** \\brief Creates an empty object\r\n+ * \r\n+ */\r\n+ Container();\r\n+ \r\n+ \r\n+ /** \\brief Copy constructor\r\n+ * \r\n+ */\r\n+ Container(const Container& source);\r\n+ \r\n+ \r\n+ /** \\brief Assignment operator\r\n+ * \r\n+ */\r\n+ Container& operator= (const Container& source);\r\n+\r\n+\r\n+ /** \\brief Creates an object from a data matrix\r\n+ * \r\n+ * Allows you to create an object from data stored in a char*\r\n+ * array. The array's size must be passed to the constructor.\r\n+ * Since sequences can have different lengths, you need to\r\n+ * terminate each sequence by a NULL character. This constructor\r\n+ * is dedicated to very performance-critical tasks. For usual\r\n+ * tasks, using the default constructor and subsequently adding\r\n+ * sequences with addSeq should be enough.\r\n+ * \r\n+ * \\param number_of_sequences the number of sequences (the length\r\n+ * of the first dimension of the array).\r\n+ * \r\n+ * \\param cstring_array the pointer to the data matrix.\r\n+ * \r\n+ */\r\n+ Container(unsigned int number_of_sequences, char const* const* const cstring_array);\r\n+\r\n+\r\n+ /** \\brief Destructor\r\n+ * \r\n+ */\r\n+ virtual ~Container();\r\n+\r\n+ \r\n+ /** \\brief Clears all content of the object\r\n+ * \r\n+ */\r\n+ virtual void clear();\r\n+\r\n+\r\n+ /** \\brief Adds a sequence to the object\r\n+ *\r\n+ * \\param name the name of the sequence, as a c-string.\r\n+ * \\param sequence the sequence string, as a c-string.\r\n+ * \\param group the group index of the sequence.\r\n+ *\r\n+ * \\return The new number of sequences.\r\n+ * \r\n+ */\r\n+ virtual unsigned int append(const char* name, const char* sequence, unsigned int group=0);\r\n+ \r\n+ \r\n+ /** \\brief Removes a sequence from the object\r\n+ *\r\n+ * \\param pos the index of the seque"..b"r of sequences\r\n+ * \r\n+ */\r\n+ unsigned int ns() const;\r\n+ \r\n+ \r\n+ /** \\brief Gets the length of a given sequence\r\n+ * \r\n+ * \\param pos the index of the sequence.\r\n+ * \r\n+ * \\return The length of that particular sequence.\r\n+ * \r\n+ */\r\n+ virtual unsigned int ls(unsigned int pos) const ;\r\n+ \r\n+ \r\n+ /** \\brief Gets the name of the a given sequence\r\n+ * \r\n+ * \\param pos the index of the sequence.\r\n+ * \r\n+ * \\return The name of that particular sequence.\r\n+ * \r\n+ */\r\n+ virtual const char* name(unsigned int pos) const;\r\n+\r\n+ \r\n+ /** \\brief Gets the name of a given sequence\r\n+ * \r\n+ * \\param pos the index of the sequence.\r\n+ * \r\n+ * \\return The sequence string for that particular sequence.\r\n+ * \r\n+ */\r\n+ virtual const char* sequence(unsigned int pos) const;\r\n+\r\n+\r\n+\r\n+ /** \\brief Gets the group index of a given sequence\r\n+ * \r\n+ * \\param pos the index of the sequence.\r\n+ * \r\n+ * \\return The group index of that particular sequence.\r\n+ * \r\n+ */\r\n+ virtual unsigned int group(unsigned int pos) const;\r\n+ \r\n+ \r\n+ /** \\brief Checks if all lengths are equal\r\n+ * \r\n+ * Returns true if the length of all sequences are equal or\r\n+ * if there is less thant two sequences.\r\n+ * \r\n+ */\r\n+ bool isEqual() const;\r\n+\r\n+\r\n+ /** \\brief Equalizes sequence lengths\r\n+ *\r\n+ * Extends sequences as need to ensure that all sequences\r\n+ * have the same length.\r\n+ *\r\n+ * \\param ch the character to use for padding.\r\n+ * \r\n+ * \\return The final length obtained, which is the length of\r\n+ * the longest sequence before the operation.\r\n+ * \r\n+ */\r\n+ unsigned int equalize(char ch='?');\r\n+\r\n+ \r\n+ /** \\brief Finds a sequence by its name\r\n+ * \r\n+ * Gets the position of the first sequence with the specified\r\n+ * name.\r\n+ * \r\n+ * \\param string a sequence name.\r\n+ * \r\n+ * \\param strict if true, seeks an exact match. If false,\r\n+ * compares only until the end of the requested name (for\r\n+ * example: ATCFF will match ATCFF_01 if strict is false).\r\n+ * \r\n+ * \\return The lowest index where the name matches, -1 if no\r\n+ * sequence has such name.\r\n+ * \r\n+ */\r\n+ int find(const char* string, bool strict=true) const;\r\n+\r\n+\r\n+ protected:\r\n+ // The number of sequences\r\n+ unsigned int _ns;\r\n+ \r\n+ // The array of name lengths\r\n+ unsigned int* lnames;\r\n+ \r\n+ // The array of names\r\n+ char** names;\r\n+ \r\n+ // The array of sequences (as c-strings)\r\n+ char** sequences;\r\n+ \r\n+ // The array of groups\r\n+ unsigned int* groups;\r\n+ \r\n+ // Imports an array of c-strings\r\n+ virtual void setFromSource(unsigned int number_of_sequences, const char* const* const cstring_array);\r\n+ \r\n+ // Constructor helper\r\n+ virtual void copyObject(const Container&);\r\n+ \r\n+ // Constructor partial helper\r\n+ virtual void getNamesAndGroups(const Container&);\r\n+ \r\n+ private:\r\n+ \r\n+ // The array of sequence lengths\r\n+ unsigned int* lsequences;\r\n+ \r\n+ // Setup a valid empty object\r\n+ virtual void init();\r\n+ };\r\n+}\r\n+ \r\n+#endif\r\n" |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/Controller.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/Controller.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,156 @@ +/* + Copyright 2009-2010 St�phane De Mita, Mathieu Siol + + This file is part of the EggLib library. + + EggLib is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EggLib is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with EggLib. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef EGGLIB_CONTROLLER_HPP +#define EGGLIB_CONTROLLER_HPP + +#include "Current.hpp" +#include "Arg.hpp" +#include "ParamSet.hpp" + +namespace egglib { + + class Random; + + /** \brief Controls a coalescent simulation + * + * \ingroup coalesce + * + * This class generates the gene genealogy, based on the parameters + * stocked in a ParamSet object. + * + */ + class Controller { + + public: + + /** \brief Default constructor + * + * Uses a default ParamSet object that will not allow + * performing simulations. + * + */ + Controller(); + + /** \brief Destructor + * + */ + ~Controller(); + + /** \brief Standard constructor + * + * \param paramSet a ParamSet object containing run + * parameters (it is taken as a reference and stored as this + * so it must not be modified during simulations). + * + * \param random the address of the random number generator. + * + */ + Controller(const ParamSet* paramSet, Random* random); + + /** \brief Reset for a new simulation + * + * Object is reinitiliazed for a new simulation. + * + */ + void reset(); + + /** \brief Increments the coalescent model + * + * \return The number of lineages. + * + */ + unsigned int step(); + + /** \brief Gets the Ancestral Recombination Graph + * + * \return The address of the ARG contained in the object. + * + */ + Arg* getArg(); + + /** \brief Applies a bottleneck to a given population + * + * The bottleneck is applied following Galtier, Depaulis and + * Barton (Genetics, 2000): the general time counter is + * stopped, and coalescence events are performed during a + * time (of normal coalescent process) given by the parameter + * strength. All coalescent events are instantaneous. + * + * \param populationIndex index of the population concerned + * by the event. + * + * \param strength strength of the bottleneck given by a + * number of time units (2N generations times the size of + * the population). + * + */ + void bottleneck(unsigned int populationIndex, double strength); + + /** \brief Migrate a complete population + * + * Takes all the lineages currently present in the population + * source to the population dest. + * + */ + void moveAllLineages(unsigned int source, unsigned int dest); + + /** \brief Migrate a complete population + * + * Takes all the lineages currently present in the population + * source to the population dest. + * + * \param source source population. + * \param dest destination population. + * \param probability the probability that a lineage of + * source migrates to dest. + * + */ + void moveSomeLineages(unsigned int source, unsigned int dest, double probability); + + /// Adds an empty population + void addPopulation(); + + private: + + /// The copy constructor is disabled + Controller(const Controller& source) {} + + /// The assignment operator is disabled + Controller& operator=(const Controller& source) {return *this;} + + void diploids(); + double getMigrationTime(double& migrationParameterDestination); + void getCoalescenceTime(double& destTime, unsigned int& destPopIndex); + double getCoalescenceTimeForPopulation(unsigned int populationIndex); + double getRecombinationTime() const; + void migrate(double migrationParameter); + + const ParamSet* initialParamSet; + ParamSet paramSet; + Current current; + Arg arg; + + Random* random; + + }; + +} + +#endif |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/Convert.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/Convert.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
b'@@ -0,0 +1,234 @@\n+/*\r\n+ Copyright 2009 St\xc3\xa9phane De Mita, Mathieu Siol\r\n+\r\n+ This file is part of the EggLib library.\r\n+\r\n+ EggLib is free software: you can redistribute it and/or modify\r\n+ it under the terms of the GNU General Public License as published by\r\n+ the Free Software Foundation, either version 3 of the License, or\r\n+ (at your option) any later version.\r\n+\r\n+ EggLib is distributed in the hope that it will be useful,\r\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+ GNU General Public License for more details.\r\n+\r\n+ You should have received a copy of the GNU General Public License\r\n+ along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n+*/\r\n+\r\n+\r\n+#ifndef EGGLIB_CONVERT_HPP\r\n+#define EGGLIB_CONVERT_HPP\r\n+\r\n+\r\n+#include "DataMatrix.hpp"\r\n+#include "Align.hpp"\r\n+#include "EggException.hpp"\r\n+#include "Random.hpp"\r\n+#include <string>\r\n+\r\n+#include "config.h"\r\n+\r\n+#ifdef HAVE_LIBBPP_SEQ\r\n+#include <Bpp/Seq/Alphabet.all>\r\n+#include <Bpp/Seq/Sequence.h>\r\n+#include <Bpp/Seq/Container.all>\r\n+#endif\r\n+\r\n+\r\n+\r\n+namespace egglib {\r\n+\r\n+\r\n+ /** \\brief Performs conversion between sequence holder types\r\n+ *\r\n+ * \\ingroup core\r\n+ * \r\n+ * Static methods of this class allows conversion between sequence\r\n+ * holder types implying parametrizable modifications.\r\n+ * \r\n+ */\r\n+ class Convert {\r\n+\r\n+ public:\r\n+ \r\n+ /** \\brief DataMatrix to Align conversion\r\n+ * \r\n+ * By defaut, this method generates an Align instance\r\n+ * containing only the polymorphic sites. The integers of\r\n+ * the DataMatrix will be converted as follow: 0 to A, 1 to\r\n+ * C, 2 to G and 3 to T. This behaviour can be largely\r\n+ * modified using options.\r\n+ * \r\n+ * \\param dataMatrix DataMatrix instance.\r\n+ * \r\n+ * \\param length length of the desired alignment. Non-varying\r\n+ * stretches of data will be introduced to reach the\r\n+ * specified length. By default the positions of segregating\r\n+ * sites will be determined from the positions given by the\r\n+ * DataMatrix object. Those positions are expressed in a\r\n+ * continuous range, and will be discretized. Mutations\r\n+ * falling on the same site will be moved of one position\r\n+ * left or right (always preserving the order of mutation\r\n+ * sites). If positions are all zero (the default of the\r\n+ * DataMatrix class) and if length is larger than the number\r\n+ * of segregating sites, then all segregating sites will\r\n+ * cluster on the left-hand side of the alignment.\r\n+ * \r\n+ * \\param random the address to a Random object allowing to \r\n+ * draw random numbers (for randomizing positions and/or\r\n+ * non-varying states). If an address is provided but no\r\n+ * random numbers are required, it is ignored. If no address\r\n+ * if provided and random numbers are required, a Random\r\n+ * instance is built internally.\r\n+ * \r\n+ * \\param randomizePositions if true, the positions specified\r\n+ * in the DataMatrix objects are ignored and the positions of\r\n+ * mutations are drawn randomly along the interval (only if\r\n+ * the specified length is larger than the number of\r\n+ * segregating sites). If randomizePositions and false and\r\n+ * positions are not\r\n+ * \r\n+ * \\param enforceLength specify whether a\r\n+ * EggRuntimeError should be thrown when the number of\r\n+ * polymorphic sites is larger than the specified length. If\r\n+ * false (the default) and in cases where the specified\r\n+ * length is too short to harbor all polymorphic '..b'rt).\r\n+ * \r\n+ * \\param nonVaryingState character to use for conserved\r\n+ * stretches of data. It doesn\'t have to be included in the\r\n+ * mapping. If randomizeNonVaryingState is true, this\r\n+ * argument is ignored.\r\n+ * \r\n+ * \\return The resulting Align object.\r\n+ * \r\n+ */\r\n+ static Align align(\r\n+ DataMatrix& dataMatrix,\r\n+ unsigned int length=0,\r\n+ Random* random=NULL,\r\n+ bool randomizePositions=false,\r\n+ bool randomizeNonVaryingStates=false,\r\n+ bool randomizeAlleles=false,\r\n+ bool enforceLength=false,\r\n+ std::string mapping="ACGT",\r\n+ char unknown=\'?\',\r\n+ char nonVaryingState=\'A\'\r\n+ );\r\n+\r\n+\r\n+#ifdef HAVE_LIBBPP_SEQ\r\n+\r\n+ /** \\brief Converts an alignment to the equivalent Bio++ type\r\n+ *\r\n+ * During conversion, name information is lost (arbitrary\r\n+ * names are generated in order toprevent duplicate names).\r\n+ * The object is attached to an alphabet matching the passed\r\n+ * integer. The names are bare rank integers (starting at the\r\n+ * value giving by *offset*).\r\n+ *\r\n+ * \\param align the source alignment object.\r\n+ * \r\n+ * \\param alphabetID an integer indicating which alphabet to\r\n+ * use:\r\n+ * - 1 for DNA\r\n+ * - 2 for RNA\r\n+ * - 3 for proteins\r\n+ * - 4 for standard codon\r\n+ * - 5 for vertebrate mitochondrial codon\r\n+ * - 6 for invertebrate mitochondrial codon\r\n+ * - 7 for echinoderm mitochondrial codon\r\n+ * .\r\n+ * Other values will result in an exception.\r\n+ * \r\n+ * \\param outgroupFlag an integer indicating whether to\r\n+ * include outgroup sequences:\r\n+ * - 0 use all sequences\r\n+ * - 1 use only sequences without 999 label (ingroup)\r\n+ * - 2 use only sequences with 999 label (outgroup)\r\n+ * .\r\n+ * Other values will result in an exception.\r\n+ * \r\n+ * \\param offset enter an integer to shift the names of the\r\n+ * resulting alignment (useful to merge alignment and ensure\r\n+ * that names are not duplicated).\r\n+ * \r\n+ * \\return A Bio++ alignment.\r\n+ * \r\n+ */\r\n+ static bpp::AlignedSequenceContainer egglib2bpp(Align& align, unsigned int alphabetID, unsigned int outgroupFlag, unsigned int offset=0);\r\n+\r\n+#endif\r\n+\r\n+\r\n+\r\n+ protected:\r\n+\r\n+ /** \\brief This class cannot be instantiated\r\n+ * \r\n+ */\r\n+ Convert() { }\r\n+\r\n+\r\n+ /** \\brief This class cannot be instantiated\r\n+ * \r\n+ */\r\n+ Convert(const Convert& source) { }\r\n+\r\n+\r\n+ /** \\brief This class cannot be instantiated\r\n+ * \r\n+ */\r\n+ Convert& operator=(const Convert& source) { return *this; }\r\n+\r\n+\r\n+ /** \\brief This class cannot be instantiated\r\n+ * \r\n+ */\r\n+ virtual ~Convert() { }\r\n+\r\n+#ifdef HAVE_LIBBPP_SEQ\r\n+ static bpp::DNA dnaAlphabet;\r\n+ static bpp::RNA rnaAlphabet;\r\n+ static bpp::ProteicAlphabet proteicAlphabet;\r\n+ static bpp::StandardCodonAlphabet standardCodonAlphabet;\r\n+ static bpp::VertebrateMitochondrialCodonAlphabet vertebrateMitochondrialCodonAlphabet;\r\n+ static bpp::InvertebrateMitochondrialCodonAlphabet invertebrateMitochondrialCodonAlphabet;\r\n+ static bpp::EchinodermMitochondrialCodonAlphabet echinodermMitochondrialCodonAlphabet;\r\n+#endif\r\n+\r\n+ };\r\n+}\r\n+\r\n+#endif\r\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/Current.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/Current.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,128 @@ +/* + Copyright 2009-2010 St�phane De Mita, Mathieu Siol + + This file is part of the EggLib library. + + EggLib is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EggLib is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with EggLib. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef EGGLIB_CURRENT_HPP +#define EGGLIB_CURRENT_HPP + + +namespace egglib { + + class Population; + class ParamSet; + + /** \brief Represents the current set of populations + * + * \ingroup coalesce + * + */ + class Current { + + public: + + /** \brief Default constructor + * + */ + Current(); + + /** \brief Standard constructor + * + * \param paramSet allows to initiate the correct structure + * of populations. + * + */ + Current(ParamSet* paramSet); + + /** \brief Rebuilds the object + * + * \param paramSet allows to initiate the correct structure + * of populations. + * + */ + void reset(ParamSet* paramSet); + + /** \brief Destructor + * + */ + virtual ~Current(); + + /** \brief Copy constructor + * + */ + Current(const Current&); + + /** \brief Assignment operator + * + */ + Current& operator=(const Current&); + + /** \brief Gets the current number of populations + * + */ + unsigned int numberOfPopulations() const; + + + /** \brief Adds an empty population to the system + * + */ + void addPopulation(); + + + /** \brief Gets the number of lineages contained by a given + * population + * + */ + unsigned int populationNumberOfLineages(unsigned int populationIndex) const; + + + /** \brief Provides access to a given population + * + * The returned pointer can be used to modify the object. + * + */ + Population* population(unsigned int populationIndex); + + + /** \brief Total number of lineages + * + */ + unsigned int totalNumberOfLineages() const; + + + /** \brief Efficient number of lineages + * + * This sums the number of covered segments of each lineage. + * + */ + unsigned int efficientNumberOfLineages() const; + + + private: + + void setPopulationArray(); + void copy(const Current&); + void clear(); + + unsigned int _numberOfPopulations; + unsigned int _numberOfSegments; + Population** populations; + }; + +} + +#endif |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/DataMatrix.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/DataMatrix.hpp Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,219 @@ +/* + Copyright 2009-2010 St�phane De Mita, Mathieu Siol + + This file is part of the EggLib library. + + EggLib is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EggLib is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with EggLib. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef EGGLIB_DATAMATRIX_HPP +#define EGGLIB_DATAMATRIX_HPP + + +#include "CharMatrix.hpp" + + +namespace egglib { + + /** \brief Data table + * + * \ingroup core + * + * Holds a data matrix representing genotype data from simulations. + * Data are stored as integers, to each site is associated a + * position, and to each sequence is associated a group index (any + * integer labelling, for example, a subpopulation). Supports the + * CharMatrix interface with the condition that allele genotype + * datum is >=0 and <=9. + * + */ + class DataMatrix : public CharMatrix { + + public: + + /** \brief Default constructor + * + * The data table default dimensions are {0,0} + * + */ + DataMatrix(); + + + /** \brief Standard constructor + * + * The data table dimensions must be given. + * Each cell default default is 0, and each site position is 0.. + * + * \param numberOfSequences number of lines of the data table. + * \param numberOfSites number of columns of the data table. + * + */ + DataMatrix(unsigned int numberOfSequences, unsigned int numberOfSites); + + + /** \brief Copy constructor + * + */ + DataMatrix(const DataMatrix&); + + + /** \brief Copy constructor + * + */ + DataMatrix(const CharMatrix&); + + + /** \brief Assignment operator + * + */ + virtual DataMatrix& operator=(const DataMatrix&); + + + /** \brief Assignment operator + * + */ + virtual DataMatrix& operator=(const CharMatrix&); + + + /** \brief Destructor + * + */ + virtual ~DataMatrix(); + + + /** \brief Gets number of sites + * + */ + unsigned int numberOfSites() const; + + + /** \brief Gets number of sequences + * + */ + unsigned int numberOfSequences() const; + + + /** \brief Sets a value of the data table + * + */ + void set(unsigned int sequence, unsigned int site, int value); + + + /** \brief Gets a value from the data table + * + */ + int get(unsigned int sequence, unsigned int site) const; + + + /** \brief Faster and unsecure version of get + * + */ + inline int fget(unsigned int sequence, unsigned int site) const { + return dataMatrix[sequence][site]; + } + + + /** \brief Sets the position of a site + * + */ + void sitePosition(unsigned int site, double value); + + + /** \brief Gets the position of a site + * + */ + double sitePosition(unsigned int site) const; + + + /** \brief Sets the group label of a sequence + * + */ + void populationLabel(unsigned int sequence, unsigned int value); + + + /** \brief Gets the group label of a sequence + * + */ + unsigned int populationLabel(unsigned int sequence) const; + + + /** \brief Removes all information from the object + * + */ + void clear(); + + + /** \brief Resizes the data matrix + * + * \param newNumberOfSequences number of sequences (rows) + * \param newNumberOfSites number of sites (columns) + * + * If new values are larger, data already set is left unchanged. + * New data are set to zero. + * + */ + void resize(unsigned int newNumberOfSequences, unsigned int newNumberOfSites); + + + /** \brief Shifts allele value + * + * \param minimum the minimum allele value. + * + * Shifts all alleles at all sites to ensure that alleles alleles + * are equal to or larger than minimum. The shifting is specific + * to each site. + * + */ + void shift(int minimum); + + /** \brief Gets the character at a given position + * + * An exception is generated if the allele value at this + * position is not >=0 and <=9. Not out-of-bound check is + * performed. + * + */ + char character(unsigned int sequence, unsigned int site) const; + + + + private: + + // Initializes to default values (for empty object) + void init(); + + // Copies from a source object + virtual void copy(const CharMatrix&); + + // Copies from a source object + virtual void copy(const DataMatrix&); + + // Number of lines of the data matrix + unsigned int _numberOfSequences; + + // Number of columns of the data matrix + unsigned int _numberOfSites; + + // Data matrix + int **dataMatrix; + + // Vector of site positions + double *positions; + + // Vector of group indices + unsigned int *groups; + }; +} + +#endif |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/Edge.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/Edge.hpp Fri Jul 10 04:39:30 2015 -0400 |
[ |
b'@@ -0,0 +1,331 @@\n+/*\r\n+ Copyright 2009-2010 St\xe9phane De Mita, Mathieu Siol\r\n+\r\n+ This file is part of the EggLib library.\r\n+\r\n+ EggLib is free software: you can redistribute it and/or modify\r\n+ it under the terms of the GNU General Public License as published by\r\n+ the Free Software Foundation, either version 3 of the License, or\r\n+ (at your option) any later version.\r\n+\r\n+ EggLib is distributed in the hope that it will be useful,\r\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+ GNU General Public License for more details.\r\n+\r\n+ You should have received a copy of the GNU General Public License\r\n+ along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n+*/\r\n+\r\n+#ifndef EGGLIB_EDGE_HPP\r\n+#define EGGLIB_EDGE_HPP\r\n+\r\n+#include <vector>\r\n+#include <climits>\r\n+#include "EggException.hpp"\r\n+\r\n+namespace egglib {\r\n+\r\n+ class Random;\r\n+\r\n+ /** \\brief Edge of the ancestral recombination graph\r\n+ *\r\n+ * \\ingroup coalesce\r\n+ *\r\n+ * Each Edge instance provides access to its 0, 1 or 2 descendants\r\n+ * (the former holds for a terminal node, the middle for the parent\r\n+ * of a recombined node and the latter for the parent of a coalesced\r\n+ * node (most classical node in the coalescent).The Edge also\r\n+ * provides to the edge length. Note that the Edge instance must be\r\n+ * understood as an ARG node and the branch above it (latter in the\r\n+ * coalescence process). Edge instances also keep track of the list\r\n+ * of descendants descending from this node (which may differ along\r\n+ * recombining segment). Edge instances *must* be created through one\r\n+ * of the "default" and "coalescence" constructors or through the \r\n+ * recombination method. Edge instances should never be copied but\r\n+ * manipulated by references.\r\n+ * \r\n+ */\r\n+ class Edge {\r\n+ \r\n+ public:\r\n+ \r\n+ /// Destructor\r\n+ virtual ~Edge();\r\n+ \r\n+ /** \\brief Constructor\r\n+ * \r\n+ * \\param numberOfSegments the number of recombining segments\r\n+ * (one for a non-recombining region).\r\n+ * \r\n+ * Use the Pool, instead. Objects are delivered with a\r\n+ * complete coverage.\r\n+ * \r\n+ */ \r\n+ Edge(unsigned int numberOfSegments);\r\n+\r\n+\r\n+ /// Restore object to `factory` state\r\n+ void reset();\r\n+\r\n+\r\n+ /** \\brief Builds for internal node\r\n+ * \r\n+ * \\param date the date of creation of the edge.\r\n+ * \\param son1 first edge descending from this edge.\r\n+ * \\param son2 second edge descending from this edge.\r\n+ * \\param edgesPerSegments counts the current number of\r\n+ * (non-coalesced lineages for each lineages); must have the\r\n+ * appropriate size and will be updated.\r\n+ * \\param MRCA the list where to place the address of segment\r\n+ * MRCA, if it occurs.\r\n+ * \\param totalLength the total length of the tree.\r\n+ * \\param segmentLengths the table of tree lengths per\r\n+ * segment.\r\n+ *\r\n+ * Assumes the current object has the correct number of\r\n+ * segments.\r\n+ * \r\n+ */\r\n+ void coalescence(double date, Edge* son1, Edge* son2,\r\n+ unsigned int* edgesPerSegments, Edge** MRCA,\r\n+ double& totalLength, double* segmentLengths);\r\n+\r\n+\r\n+ /** \\brief Generates a recombination event\r\n+ * \r\n+ * \\param date the date of the event.\r\n+ * \\param dest1 destination for the first resulting edge.\r\n+ * \\param dest2 destination for the second resulting edge.\r\n+ * \\param random pointer to the Random instance used by the \r\n+ * simulator.\r\n+ '..b"numberOfSegments) {\r\n+ if (segbools[i]==true) {\r\n+ totalLength += segments[i]*length;\r\n+ for (j=0; j<segments[i]; j++) {\r\n+ segmentLengths[i+j] += length;\r\n+ }\r\n+ }\r\n+ i+=segments[i];\r\n+ }\r\n+ }\r\n+\r\n+\r\n+ };\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+\r\n+ /** \\brief Pool of Edge objects\r\n+ *\r\n+ * \\ingroup coalesce\r\n+ *\r\n+ * Holds a pool of Edge objects that can be recycled to spare the\r\n+ * building burden. A construction time, a number of Edge objects\r\n+ * equals to the predicted number of needed instances should be\r\n+ * requested. The Edge's will be prebuilt immediately and delivered\r\n+ * upon request. After use, the Edge's should be released. It is only\r\n+ * possible to release the last issued Edge instance or all of them\r\n+ * at once.\r\n+ * \r\n+ */\r\n+ class EdgePool {\r\n+\r\n+ public:\r\n+ \r\n+ /// Default constructor (nothing allocated)\r\n+ EdgePool();\r\n+\r\n+\r\n+ /// Destructor\r\n+ virtual ~EdgePool();\r\n+\r\n+\r\n+ /** \\brief Configure pool\r\n+ * \r\n+ * Pre-allocates a given number of Edge objects. The objects\r\n+ * will be immediately available.\r\n+ * \r\n+ * Data previously allocated (by a previous call of this\r\n+ * function or by the deliver() method) will be lost so it\r\n+ * can be required to use clear() before.\r\n+ * \r\n+ * \\param numberOfSegments the number of segments of the\r\n+ * simulation; all Edge instances will use this value.\r\n+ * \r\n+ * \\param numberOfPreAllocated the number of Edge that should\r\n+ * be kept ready for immediate use.\r\n+ * \r\n+ */\r\n+ void set(unsigned int numberOfSegments, unsigned numberOfPreAllocated);\r\n+ \r\n+ \r\n+ /** \\brief Frees internally stored memory\r\n+ * \r\n+ * This invalidate all points that have been delivered\r\n+ * previously. However, any previously set number of segments\r\n+ * (0, by default) is retained.\r\n+ * \r\n+ */\r\n+ void clear();\r\n+\r\n+ \r\n+ /** \\brief Deliver an Edge\r\n+ * \r\n+ * The object must not be freed by the client! This object is\r\n+ * allocated on the heap if the cache is not large enough,\r\n+ * only reset if it was previously released, or just delivered\r\n+ * if it is one of the initially allocated instances.\r\n+ * \r\n+ */\r\n+ Edge* deliver();\r\n+ \r\n+ \r\n+ /** \\brief Release an Edge\r\n+ * \r\n+ * Release the last delivered Edge. The instance is only\r\n+ * cached for a potential future use; it is not freed nor\r\n+ * reset immediately. If no Edge's are in use, nothing is\r\n+ * done.\r\n+ * \r\n+ */\r\n+ void releaseLast();\r\n+\r\n+ \r\n+ /** \\brief Release all Edge's\r\n+ * \r\n+ * Release all delivered Edges. The instances are only\r\n+ * cached for a potential future use; they are not freed nor\r\n+ * reset immediately. If no Edge's are in use, nothing is\r\n+ * done.\r\n+ * \r\n+ */\r\n+ void releaseAll();\r\n+\r\n+ private:\r\n+ \r\n+ /// Not available\r\n+ EdgePool(const EdgePool& ep) {}\r\n+ \r\n+ /// Not available\r\n+ EdgePool& operator=(const EdgePool& ep) { return *this; }\r\n+\r\n+ unsigned int numberOfSegments;\r\n+ unsigned int used;\r\n+ unsigned int released;\r\n+ unsigned int ready;\r\n+ Edge** cache;\r\n+ \r\n+ };\r\n+\r\n+}\r\n+\r\n+#endif\r\n" |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/EggException.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/EggException.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,173 @@ +/* + Copyright 2009 St�phane De Mita, Mathieu Siol + + This file is part of the EggLib library. + + EggLib is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EggLib is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with EggLib. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef EGGLIB_EGGEXCEPTION_HPP +#define EGGLIB_EGGEXCEPTION_HPP + +#include <string> +#include <exception> + +namespace egglib { + + /** \brief Base exception type for errors occurring in this library + * + * \ingroup core + * + */ + class EggException : public std::exception { + public: + /// Constructor with empty error message + EggException(); + /// Creates the exception + EggException(const char* message); + /// Destructor + ~EggException() throw() {} + /// Gets error message + virtual const char* what() const throw(); + + protected: + std::string message; + + }; + + + /** \brief Exception type for memory errors + * + * \ingroup core + * + */ + class EggMemoryError : public EggException { + public: + /// Creates the exception + EggMemoryError(); + /// Destructor + ~EggMemoryError() throw() {} + }; + + + /** \brief Exception type for argument value errors + * + * \ingroup core + * + */ + class EggArgumentValueError : public EggException { + public: + /// Creates the exception + EggArgumentValueError(const char* m ); + /// Destructor + ~EggArgumentValueError() throw() {} + }; + + + /** \brief Exception type for runtime errors + * + * Runtime error definition is rather large. Includes bugs as well + * as logical errors. + * + * \ingroup core + * + */ + class EggRuntimeError : public EggException { + public: + /// Creates the exception + EggRuntimeError(const char* m ); + /// Destructor + ~EggRuntimeError() throw() {} + }; + + + /** \brief Exception type for file/string formatting errors + * + * \ingroup core + * + */ + class EggFormatError : public EggException { + public: + /// Creates the exception + EggFormatError(const char* fileName, const char* expectedFormat, const char* m); + /// Destructor + ~EggFormatError() throw() {} + /// Gets the file name + std::string fileName() const; + /// Gets the expected format + std::string expectedFormat() const; + /// Formats a longer string + virtual const char* what_more() const; + + protected: + std::string fname; + std::string eformat; + }; + + + /** \brief Exception type for errors while opening a file + * + * \ingroup core + * + */ + class EggOpenFileError : public EggException { + public: + /// Creates the exception + EggOpenFileError(const char* fileName ); + /// Destructor + ~EggOpenFileError() throw() {} + }; + + + /** \brief Exception type for unaligned sequences + * + * \ingroup core + * + */ + class EggUnalignedError : public EggException { + public: + /** \brief Creates the exception + * + */ + EggUnalignedError(); + + /** \brief Destructor + * + */ + ~EggUnalignedError() throw() {} + }; + + /** \brief Exception type for invalid character + * + * \ingroup core + * + */ + class EggInvalidCharacterError : public EggException { + public: + /** \brief Creates the exception + * + */ + EggInvalidCharacterError(char c, unsigned int seqIndex, unsigned int posIndex); + + /** \brief Destructor + * + */ + ~EggInvalidCharacterError() throw() {} + }; + +} + + + +#endif |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/FStatistics.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/FStatistics.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
b'@@ -0,0 +1,288 @@\n+/*\r\n+ Copyright 2009 St\xe9phane De Mita, Mathieu Siol\r\n+\r\n+ This file is part of the EggLib library.\r\n+\r\n+ EggLib is free software: you can redistribute it and/or modify\r\n+ it under the terms of the GNU General Public License as published by\r\n+ the Free Software Foundation, either version 3 of the License, or\r\n+ (at your option) any later version.\r\n+\r\n+ EggLib is distributed in the hope that it will be useful,\r\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+ GNU General Public License for more details.\r\n+\r\n+ You should have received a copy of the GNU General Public License\r\n+ along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n+*/\r\n+\r\n+#ifndef EGGLIB_FSTATISTICS_HPP\r\n+#define EGGLIB_FSTATISTICS_HPP\r\n+\r\n+\r\n+\r\n+namespace egglib {\r\n+\r\n+\r\n+ /** \\brief Computes Fis, Fst and Fit from diploid data\r\n+ *\r\n+ * The class requires loading data. Data are loaded by individual\r\n+ * (two genotypes per individual). The analyses are cached: they are\r\n+ * performed upon the first call to statistics accessors. The cache\r\n+ * is emptied whenever a datum is loaded.\r\n+ * \r\n+ * The computations are performed after Weir and Cockerham. The\r\n+ * statistics F, theta and f are generalized for multiple alleles.\r\n+ * To allow computation of multi-locus statistics, variance\r\n+ * components are also available. The three components of the\r\n+ * variance are Vpopulation (between-population), Vindividual\r\n+ * (within-population, between-individual) and Vallele (within-\r\n+ * individual). The formulas to compute the F-statistics are as\r\n+ * follows:\r\n+ * - 1-F = Vallele/(Vpopulation+Vindividual+Vallele)\r\n+ * - theta = Vpopulation/(Vpopulation+Vindividual+Vallele)\r\n+ * - 1-f = Vallele/(Vindividual+Vallele).\r\n+ * \r\n+ * \\ingroup polymorphism\r\n+ *\r\n+ */\r\n+ class FStatistics {\r\n+ \r\n+ public:\r\n+ \r\n+ /** \\brief Constructor\r\n+ * \r\n+ */ \r\n+ FStatistics();\r\n+\r\n+ \r\n+ /** \\brief Destructor\r\n+ * \r\n+ */ \r\n+ virtual ~FStatistics();\r\n+\r\n+ \r\n+ /** \\brief Reserve sufficient memory for a given number of\r\n+ * individuals.\r\n+ * \r\n+ * This method makes the load function faster by allocating\r\n+ * all required memory at once.\r\n+ * \r\n+ * \\param numberOfIndividuals a strictly positive integer.\r\n+ * \r\n+ */\r\n+ void reserve(unsigned int numberOfIndividuals);\r\n+\r\n+\r\n+ /** \\brief Loads the data for one individual\r\n+ * \r\n+ * \\param genotype1 an integer giving the first allele.\r\n+ * \\param genotype2 an integer giving the second allele.\r\n+ * \\param populationLabel an integer indication belonging to\r\n+ * a population.\r\n+ * \r\n+ * Genotypes and population labels are not required to be\r\n+ * consecutive (both are labels, not indices). They are\r\n+ * internally mapped to indices (the mapping can be obtained\r\n+ * by accessors populationLabel and allele).\r\n+ * \r\n+ * All genotypes are considered to be valid (no missing data).\r\n+ * If statistics were computed previous to call to this\r\n+ * function, all data will be erase.\r\n+ * \r\n+ */\r\n+ void loadIndividual(unsigned int genotype1,\r\n+ unsigned int genotype2, unsigned int populationLabel);\r\n+\r\n+\r\n+ /** \\brief Label of a population\r\n+ * \r\n+ * The index corresponds to the local mapping of populations\r\n+ * regardless of the ranking of population labels. (No out\r\n+ * of bound checking.)\r\n+ * \r\n+ */\r\n+ unsigne'..b'leIndex);\r\n+\r\n+\r\n+ /** \\brief Absolute genotype frequency\r\n+ * \r\n+ * Note that allele AB is considered different to BA (this\r\n+ * means that values can be accessed both sides of the\r\n+ * diagonal.\r\n+ * \r\n+ */\r\n+ unsigned int genotypeFrequencyTotal(unsigned int alleleIndex1, unsigned int alleleIndex2);\r\n+\r\n+\r\n+ /** \\brief Absolute genotype frequency in a population\r\n+ * \r\n+ * Note that allele AB is considered different to BA (this\r\n+ * means that values can be accessed both sides of the\r\n+ * diagonal.\r\n+ * \r\n+ */\r\n+ unsigned int genotypeFrequencyPerPopulation(unsigned int populationIndex, unsigned int alleleIndex1, unsigned int alleleIndex2);\r\n+\r\n+ \r\n+ /** \\brief Sample size of a population\r\n+ * \r\n+ */\r\n+ unsigned int populationFrequency(unsigned int populationIndex);\r\n+\r\n+\r\n+ /** \\brief Weir-Cockerham F-statistic\r\n+ * \r\n+ * Note: equivalent to Fit.\r\n+ * \r\n+ */\r\n+ double F();\r\n+\r\n+\r\n+ /** \\brief Weir-Cockerham theta-statistic\r\n+ * \r\n+ * Note: equivalent to Fst.\r\n+ * \r\n+ */\r\n+ double theta();\r\n+\r\n+\r\n+ /** \\brief Weir-Cockerham f-statistic\r\n+ * \r\n+ * Note: equivalent to Fis.\r\n+ * \r\n+ */\r\n+ double f();\r\n+ \r\n+\r\n+ /** \\brief Between-population component of variance\r\n+ * \r\n+ */\r\n+ double Vpopulation();\r\n+\r\n+\r\n+ /** \\brief Within-population, between-individual component of variance\r\n+ * \r\n+ */\r\n+ double Vindividual();\r\n+ \r\n+ \r\n+ /** \\brief Within-individual component of variance\r\n+ * \r\n+ */\r\n+ double Vallele();\r\n+\r\n+\r\n+ protected:\r\n+ \r\n+ bool d_flag;\r\n+ void d_init();\r\n+ void d_clear();\r\n+ unsigned int d_reserved;\r\n+ unsigned int d_numberOfGenotypes;\r\n+ unsigned int *d_genotypes;\r\n+ unsigned int *d_populationLabels;\r\n+\r\n+ bool s_flag;\r\n+ void s_init();\r\n+ void s_clear();\r\n+ void s_compute();\r\n+ void processPopulations();\r\n+ void processAlleles();\r\n+ unsigned int getPopulationIndex(unsigned int) const;\r\n+ unsigned int getAlleleIndex(unsigned int) const;\r\n+ unsigned int s_numberOfAlleles;\r\n+ unsigned int *s_alleleValueMapping;\r\n+ unsigned int s_numberOfPopulations;\r\n+ unsigned int *s_populationLabelMapping;\r\n+ unsigned int *s_populationFrequencies;\r\n+ unsigned int *s_alleleFrequenciesTotal;\r\n+ unsigned int **s_alleleFrequenciesPerPopulation;\r\n+ unsigned int **s_genotypeFrequenciesTotal;\r\n+ unsigned int ***s_genotypeFrequenciesPerPopulation;\r\n+\r\n+ bool w_flag;\r\n+ void w_init();\r\n+ void w_clear();\r\n+ void w_compute();\r\n+ double w_F;\r\n+ double w_T;\r\n+ double w_f;\r\n+ double *w_a;\r\n+ double *w_b;\r\n+ double *w_c;\r\n+ double w_nbar;\r\n+ double w_nc;\r\n+ double *w_pbar;\r\n+ double *w_ssquare;\r\n+ double *w_hbar;\r\n+ double w_sum_a;\r\n+ double w_sum_b;\r\n+ double w_sum_c;\r\n+ double w_sum_abc;\r\n+ double w_sum_bc;\r\n+\r\n+ \r\n+ private:\r\n+ \r\n+ FStatistics(const FStatistics& source) { }\r\n+ \r\n+ FStatistics& operator=(const FStatistics& source) {\r\n+ return *this;\r\n+ }\r\n+\r\n+ };\r\n+}\r\n+\r\n+#endif\r\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/Fasta.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/Fasta.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
b'@@ -0,0 +1,278 @@\n+/*\r\n+ Copyright 2008-2009 St\xe9phane De Mita, Mathieu Siol\r\n+\r\n+ This file is part of the EggLib library.\r\n+\r\n+ EggLib is free software: you can redistribute it and/or modify\r\n+ it under the terms of the GNU General Public License as published by\r\n+ the Free Software Foundation, either version 3 of the License, or\r\n+ (at your option) any later version.\r\n+\r\n+ EggLib is distributed in the hope that it will be useful,\r\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+ GNU General Public License for more details.\r\n+\r\n+ You should have received a copy of the GNU General Public License\r\n+ along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n+*/\r\n+\r\n+#ifndef EGGLIB_FASTA_HPP\r\n+#define EGGLIB_FASTA_HPP\r\n+\r\n+#include <istream>\r\n+#include <iostream>\r\n+#include <string>\r\n+#include "Container.hpp"\r\n+\r\n+namespace egglib {\r\n+\r\n+ /** \\brief Fasta parser/formatted\r\n+ *\r\n+ * \\ingroup core\r\n+ *\r\n+ * Reads a multifasta sequence file from a string, a stream or a file\r\n+ * and returns a Container. See the description of the format below.\r\n+ * Formats a fasta string from a sequence container object and places\r\n+ * it in a string, a stream of a file. All methods are static and the\r\n+ * class cannot be instantiated. The methods parsef and formatf will\r\n+ * open the file for you while the others will read/write directly\r\n+ * in a string.\r\n+ * \r\n+ * Specifications of the fasta format:\r\n+ * \r\n+ * - The number of sequences is not limited.\r\n+ * \r\n+ * - Each sequence is preceded by a header limited to a single\r\n+ * line and starting by a ">" character.\r\n+ * \r\n+ * - The header length is not limited and all characters are\r\n+ * allowed but white spaces and special characters are\r\n+ * discouraged.\r\n+ * \r\n+ * - Group indices are specified by \\@0, \\@1, \\@2... strings\r\n+ * appearing at the end of the header string (just before the\r\n+ * carriage return). Note that group labels are ignored by\r\n+ * default.\r\n+ * \r\n+ * - Group indices are ignored unless specifically specified in a\r\n+ * parser\'s options.\r\n+ * \r\n+ * - The sequence itself continues on following lines until the\r\n+ * next ">" character or the end of the file.\r\n+ * \r\n+ * - White spaces, tab and carriage returns are allowed at any\r\n+ * position There is no limitation in length and different\r\n+ * sequences can have different lengths.\r\n+ * \r\n+ * - Although the standard is lower case characters, Fasta\r\n+ * assumes upper case characters and only supports lower case\r\n+ * characters (and converts them to upper case characters).\r\n+ * Information coded by change in case is lost.\r\n+ *\r\n+ */\r\n+ class Fasta {\r\n+\r\n+ public:\r\n+\r\n+ /** \\brief Imports a fasta file\r\n+ *\r\n+ * Imports the content of the file as is. Calls the method\r\n+ * pase(std::istream*, bool) by creating its own istream.\r\n+ *\r\n+ * \\param fname the name of a fasta file.\r\n+ * \r\n+ * \\param importGroupLabels if set to true, scan automatically\r\n+ * for groups. The format is @ followed by an integer, placed\r\n+ * at the end of the header string(sequences without labels\r\n+ * will be treated as \\@0).\r\n+ * \r\n+ * \\return A Container object containing the sequences.\r\n+ * \r\n+ */\r\n+ static Container parsef(const char* fname, bool importGroupLabels=false);\r\n+\r\n+\r\n+ /** \\brief Imports a fasta file\r\n+ *\r\n+ * Imports the content of the file as is. Calls the method\r\n+ * pase(std::istream*, bool) by creating its own istream. This\r\n+ * method expects a reference to a Container to which the\r\n+ * sequences will be appended.\r\n+ *\r\n+'..b'ner object containing the sequences.\r\n+ * \r\n+ */\r\n+ static Container parse(std::istream& stream, bool importGroupLabels=false);\r\n+\r\n+\r\n+ /** \\brief Imports a fasta file from an open stream\r\n+ *\r\n+ * Imports the content of the file as is. This\r\n+ * method expects a reference to a Container to which the\r\n+ * sequences will be appended.\r\n+ *\r\n+ * \\param stream an open stream (file or string) containing the\r\n+ * data.\r\n+ * \r\n+ * \\param container a Container instance, empty or not.\r\n+ * \r\n+ * \\param importGroupLabels if set to true, scan automatically\r\n+ * for groups. The format is @ followed by an integer, placed\r\n+ * at the end of the header string(sequences without labels\r\n+ * will be treated as \\@0).\r\n+ * \r\n+ * \\return Nothing: the new sequences are appended to the\r\n+ * Container passed as argument.\r\n+ * \r\n+ */\r\n+ static void parse(std::istream& stream, Container& container, bool importGroupLabels=false);\r\n+ \r\n+ \r\n+ /** \\brief Export sequences as fasta\r\n+ *\r\n+ * \\param fname the name of the file where to place the result.\r\n+ * \r\n+ * \\param container Container object to export.\r\n+ * \r\n+ * \\param exportGroupLabels if set to true, exports group\r\n+ * indices as a \\@x at the end of the sequence name, where x is\r\n+ * the group index. Otherwise, this information is discarded.\r\n+ * \r\n+ * \\param lineLength the number of characters to place on a\r\n+ * single line. If zero, no newlines are inserted within\r\n+ * sequences.\r\n+ * \r\n+ */\r\n+ static void formatf(const char* fname, const Container& container, bool exportGroupLabels=false, unsigned int lineLength=50);\r\n+\r\n+\r\n+ /** \\brief Export sequences as fasta\r\n+ *\r\n+ * \\param file an open stream.\r\n+ * \r\n+ * \\param container Container object to export.\r\n+ * \r\n+ * \\param exportGroupLabels if set to true, exports group\r\n+ * indices as a \\@x at the end of the sequence name, where x is\r\n+ * the group index. Otherwise, this information is discarded.\r\n+ * \r\n+ * \\param lineLength the number of characters to place on a\r\n+ * single line. If zero, no newlines are inserted within\r\n+ * sequences.\r\n+ * \r\n+ */\r\n+ static void format(std::ostream& file, const Container& container, bool exportGroupLabels=false, unsigned int lineLength=50);\r\n+\r\n+\r\n+ /** \\brief Export sequences as fasta\r\n+ * \r\n+ * This medod creates internally an ostringstream, calls the\r\n+ * method format(ostream, container, bool) and returns the\r\n+ * resulting string.\r\n+ *\r\n+ * \\param container Container object to export.\r\n+ * \r\n+ * \\param exportGroupLabels if set to true, exports group\r\n+ * indices as a \\@x at the end of the sequence name, where x is\r\n+ * the group index. Otherwise, this information is discarded.\r\n+ * \r\n+ * \\param lineLength the number of characters to place on a\r\n+ * single line. If zero, no newlines are inserted within\r\n+ * sequences.\r\n+ * \r\n+ * \\return The formatted string.\r\n+ * \r\n+ */\r\n+ static std::string format(const Container& container, bool exportGroupLabels=false, unsigned int lineLength=50);\r\n+\r\n+ \r\n+ \r\n+ protected:\r\n+ \r\n+ /// This class cannot be instantiated\r\n+ Fasta() { }\r\n+ \r\n+ /// This class cannot be instantiated\r\n+ Fasta(const Fasta& source) { }\r\n+ \r\n+ /// This class cannot be or copied\r\n+ Fasta& operator=(const Fasta& source) { return *this; }\r\n+ \r\n+ /// This class cannot be instantiated\r\n+ virtual ~Fasta() { }\r\n+\r\n+ \r\n+ };\r\n+}\r\n+\r\n+#endif\r\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/HFStatistics.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/HFStatistics.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,227 @@ +/* + Copyright 2010 St�phane De Mita, Mathieu Siol + + This file is part of the EggLib library. + + EggLib is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EggLib is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with EggLib. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef EGGLIB_HFSTATISTICS_HPP +#define EGGLIB_HFSTATISTICS_HPP + + + +namespace egglib { + + + /** \brief Computes Fst and Fit from haploid data + * + * The class requires loading data. Data are loaded by haploid + * (one genotype per individual). The analyses are cached: they are + * performed upon the first call to statistics accessors. The cache + * is emptied whenever a datum is loaded. + * + * The computations are performed after Weir and Cockerham. The + * statistic theta is generalized for multiple alleles. To allow + * computation of multi-locus statistics, variance components are + * also available. The two components of the variance are T1 and T2 + * and theta is T1/T2 (from Weir 1996 "Genetic Data Analysis II", + * Sinauer associates, Sunderland MA). + * + * \ingroup polymorphism + * + */ + class HFStatistics { + + public: + + /** \brief Constructor + * + */ + HFStatistics(); + + + /** \brief Destructor + * + */ + virtual ~HFStatistics(); + + + /** \brief Reserve sufficient memory for a given number of + * individuals. + * + * This method makes the load function faster by allocating + * all required memory at once. + * + * \param numberOfIndividuals a strictly positive integer. + * + */ + void reserve(unsigned int numberOfIndividuals); + + + /** \brief Loads the data for one individual + * + * \param genotype an integer giving the allele. + * \param populationLabel an integer indication belonging to + * a population. + * + * Genotypes and population labels are not required to be + * consecutive (both are labels, not indices). They are + * internally mapped to indices (the mapping can be obtained + * by accessors populationLabel and allele). + * + * All genotypes are considered to be valid (no missing data). + * If statistics were computed previous to call to this + * function, all data will be erased. + * + */ + void loadIndividual(unsigned int genotype, unsigned int populationLabel); + + + /** \brief Label of a population + * + * The index corresponds to the local mapping of populations + * regardless of the ranking of population labels. (No out + * of bound checking.) + * + */ + unsigned int populationLabel(unsigned int populationIndex); + + + /** \brief Value of an allele + * + * The index corresponds to the local mapping of alleles + * regardless of the ranking of allele values. (No out of + * bound checking.) + * + */ + unsigned int alleleValue(unsigned int alleleIndex); + + + /// Allele of a given individual (no checking) + unsigned int allele(unsigned int individualIndex) const; + + /// Population label of a given individual (no checking) + unsigned int individualLabel(unsigned int individualIndex) const; + + + /** \brief Number of alleles + * + */ + unsigned int numberOfAlleles(); + + + /** \brief Number of populations + * + */ + unsigned int numberOfPopulations(); + + + /** \brief Number of loaded genotypes + * + */ + unsigned int numberOfGenotypes() const; + + + /** \brief Absolute total allele frequency + * + */ + unsigned int alleleFrequencyTotal(unsigned int alleleIndex); + + + /** \brief Absolute allele frequency in a population + * + */ + unsigned int alleleFrequencyPerPopulation(unsigned int populationIndex, unsigned int alleleIndex); + + + /** \brief Sample size of a population + * + */ + unsigned int populationFrequency(unsigned int populationIndex); + + + /** \brief Weir-Cockerham theta-statistic + * + * Note: equivalent to Fst. + * + */ + double theta(); + + + /** \brief Between-population component of variance + * + */ + double T1(); + + + /** \brief Total variance + * + */ + double T2(); + + + protected: + + bool d_flag; + void d_init(); + void d_clear(); + unsigned int d_reserved; + unsigned int d_numberOfGenotypes; + unsigned int *d_genotypes; + unsigned int *d_populationLabels; + + bool s_flag; + void s_init(); + void s_clear(); + void s_compute(); + void processPopulations(); + void processAlleles(); + unsigned int getPopulationIndex(unsigned int) const; + unsigned int getAlleleIndex(unsigned int) const; + unsigned int s_numberOfAlleles; + unsigned int *s_alleleValueMapping; + unsigned int s_numberOfPopulations; + unsigned int *s_populationLabelMapping; + unsigned int *s_populationFrequencies; + unsigned int *s_alleleFrequenciesTotal; + unsigned int **s_alleleFrequenciesPerPopulation; + + bool w_flag; + void w_init(); + void w_clear(); + void w_compute(); + double w_T; + double *w_T1; + double *w_T2; + double w_nbar; + double w_nc; + double *w_pbar; + double *w_ssquare; + double w_sum_T1; + double w_sum_T2; + + + private: + + HFStatistics(const HFStatistics& source) { } + + HFStatistics& operator=(const HFStatistics& source) { + return *this; + } + + }; +} + +#endif |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/HaplotypeDiversity.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/HaplotypeDiversity.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,176 @@ +/* + Copyright 2008-2009 Stéphane De Mita, Mathieu Siol + + This file is part of the EggLib library. + + EggLib is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EggLib is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with EggLib. If not, see <http://www.gnu.org/licenses/>. +*/ + + +#ifndef EGGLIB_HAPLOTYPEDIVERSITY_HPP +#define EGGLIB_HAPLOTYPEDIVERSITY_HPP + +#include "BaseDiversity.hpp" + +namespace egglib { + + + /** \brief Computes diversity based on haplotype analysis + * + * \ingroup polymorphism + * + * This class relies on detection of polymorphic sites, as does + * NucleotideDiversity, with the exception that sites with missing + * data cannot be processed (minimumExploitableData is enforced to + * 1.). + * + * Like NucleotideDiversity, the same object can be used to analyze + * different data sets. Only the call to load() is required before + * accessing the data. + * + * Hst, Gst and Kst are between population differenciation indices. + * They are respectively defined in equations 2, 5-6 and 9 of Hudson + * et al. 1992a (Molecular Biology and Evolution 9:138-151). Also, + * Fst is defined in equation 3 of Hudson et al. 1992b (Genetics + * 132:583-589). Finally, Snn is from Hudson 2000 Genetics. It is + * computed as the average of Xi for all sequences. Where Xi is the + * ratio of nearest neighbours from the same group to the number of + * nearest neighbours. Nearest neigbours are all the sequences with + * the lowest number of differences to the focal sequence. NOTE: + * Gst/Hst are quite similar, but Fst and Kst are more different. Snn + * is a different statistic. Gst and Hst are two ways to estimate the + * between-population fraction of haplotypic diversity. + * + */ + class HaplotypeDiversity : public BaseDiversity { + + public: + + /** \brief Constructor + * + */ + HaplotypeDiversity(); + + /** \brief Destructor + * + */ + virtual ~HaplotypeDiversity(); + + /** \brief Identifies polymorphic sites and computes basis + * statistics + * + * \param data an alignment object (subclass of CharMatrix). + * The presence of outgroup or of different populations will + * be detected based on the populationLabel members of the + * passed object. The populationLabel 999 will be interpreted + * as outgroups. If several outgroups are passed, sites were + * the outgroups are not consistent will be treated as "non- + * orientable". + * + * \param allowMultipleMutations if true, sites with more + * than two alleles will not be ignored. The sum of the + * frequencies of all alleles not matching the outgroup will + * treated as the derived allele frequency (for orientable + * sites). + * + * \param ignoreFrequency removes sites that are polymorph + * because of an allele at absolute frequency smaller than or + * equal to this value. If ignoreFrequency=1, no sites are + * removed, if ignoreFrequency=1, singleton sites are + * ignored. Such sites are completely removed from the + * analysis (not counted in lseff). Note that if more than + * one mutation is allowed, the site is removed only if all + * the alleles but one are smaller than or equal to this + * value. For example, an alignment column AAAAAAGAAT is + * ignored with an ignoreFrequency of 1, but AAAAAAGGAT is + * conserved (including the third allele T which is a + * singleton). + * + * \param characterMapping a string giving the list of + * characters that should be considered as valid data. If a + * space is present in the string, the characters left of the + * space will be treated as valid data and the characters + * right of the space will be treated as missing data, that + * is tolerated but ignored. All characters not in the string + * will cause an EggInvalidCharacterError to be raised. + * + */ + void load(CharMatrix& data, + bool allowMultipleMutations=false, + unsigned int ignoreFrequency=0, + std::string characterMapping=dnaMapping + ); + + /// Number of distinct haplotypes + unsigned int K() const; + + /// Haplotype diversity (unbiased) + double He() const; + + /** \brief Returns the allele number of a given sequence + * + * The passed index must be given ignoring any outgroup + * sequence. + * + */ + unsigned int haplotypeIndex(unsigned int) const; + + /// Population differenciation, based on nucleotides (Hudson 1992a) + double Kst() const; + + /// Population differenciation, based on nucleotides (Hudson 1992b) + double Fst() const; + + /// Population differenciation, based on haplotypes (Nei version) + double Gst() const; + + /// Population differenciation, based on haplotypes (Hudson et al. version) + double Hst() const; + + /// Hudson's Snn (nearest neighbor statistics) + double Snn() const; + + + protected: + + void init(); + void clear(); + + inline unsigned int diff(CharMatrix& data, unsigned int ind1, unsigned int ind2) const; + + bool m_loaded; + unsigned int m_K; + double m_He; + double m_Kst; + double m_Fst; + double m_Gst; + double m_Hst; + double m_Snn; + unsigned int *m_haplotypeIndex; + + + private: + + HaplotypeDiversity(const HaplotypeDiversity& source) { + + } + + HaplotypeDiversity& operator=(const HaplotypeDiversity& source) { + return *this; + } + + }; +} + +#endif |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/LinkageDisequilibrium.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/LinkageDisequilibrium.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
b'@@ -0,0 +1,233 @@\n+/*\r\n+ Copyright 2009 St\xe9phane De Mita, Mathieu Siol\r\n+\r\n+ This file is part of the EggLib library.\r\n+\r\n+ EggLib is free software: you can redistribute it and/or modify\r\n+ it under the terms of the GNU General Public License as published by\r\n+ the Free Software Foundation, either version 3 of the License, or\r\n+ (at your option) any later version.\r\n+\r\n+ EggLib is distributed in the hope that it will be useful,\r\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+ GNU General Public License for more details.\r\n+\r\n+ You should have received a copy of the GNU General Public License\r\n+ along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n+*/\r\n+\r\n+#ifndef EGGLIB_LINKAGEDISEQUILIBRUM_HPP\r\n+#define EGGLIB_LINKAGEDISEQUILIBRUM_HPP\r\n+\n+\r\n+#include "BaseDiversity.hpp"\r\n+#include "EggException.hpp"\r\n+\r\n+\r\n+namespace egglib {\r\n+\r\n+ /** \\brief Analyzes linkage disequilibrium per pair of polymorphic sites\r\n+ *\r\n+ * \\ingroup polymorphism\r\n+ * \r\n+ * The class considers an alignment and detects polymorphic sites\r\n+ * using the BaseDiversity functionality (shared with other classes\r\n+ * of the module). Only sites with exactly two alleles are\r\n+ * considered. Statistics of pairwise linkage disequilibrium can\r\n+ * be accessed by pair index (note that out-of-range errors are not\r\n+ * checked). Population labels are ignored (but outgroups are\r\n+ * excluded from the analysis).\r\n+ *\r\n+ */\r\n+ class LinkageDisequilibrium : public BaseDiversity {\r\n+\r\n+ public:\n+\n+ /// Default constructor\r\n+ LinkageDisequilibrium();\n+\n+ /// Destructor\r\n+ virtual ~LinkageDisequilibrium();\r\n+\r\n+ /** \\brief Analyzes polymorphic sites of an alignment\r\n+ * \r\n+ * \\param data an alignment object (subclass of CharMatrix).\r\n+ * The presence of outgroup or of different populations will\r\n+ * be detected based on the populationLabel members of the\r\n+ * passed object. The populationLabel 999 will be interpreted\r\n+ * as outgroups. If several outgroups are passed, sites were\r\n+ * the outgroups are not consistent will be treated as "non-\r\n+ * orientable".\r\n+ * \r\n+ * \\param minimumExploitableData site where the non-missing\r\n+ * data (as defined by characterMapping) are at a frequency\r\n+ * larger than this value will be removed from the analysis.\r\n+ * Use 1. to take only \'complete\' sites into account and 0.\r\n+ * to use all sites.\r\n+ * \r\n+ * \\param ignoreFrequency removes sites that are polymorphic\r\n+ * because of an allele at absolute frequency smaller than or\r\n+ * equal to this value. If ignoreFrequency=1, no sites are\r\n+ * removed, if ignoreFrequency=1, singleton sites are\r\n+ * ignored. Such sites are completely removed from the\r\n+ * analysis (not counted in lseff). Note that if more than\r\n+ * one mutation is allowed, the site is removed only if all\r\n+ * the alleles but one are smaller than or equal to this\r\n+ * value. For example, an alignment column AAAAAAGAAT is\r\n+ * ignored with an ignoreFrequency of 1, but AAAAAAGGAT is\r\n+ * conserved (including the third allele T which is a\r\n+ * singleton).\r\n+ * \r\n+ * \\param characterMapping a string giving the list of\r\n+ * characters that should be considered as valid data. If a\r\n+ * space is present in the string, the characters left of the\r\n+ * space will be treated as valid data and the characters\r\n+ * right of the space will be treated as missing data, that\r\n+ * is tolerated but ignored. All characters not in the string\r\n+ * will cause an EggInvalidCharacterError to be raised.\r\n+ */\r\n+ void load(CharMatrix& data,\r\n+ double minimumExploitableData=1.,\r\n+ '..b'ir_index);\r\n+\r\n+ /// position of the first site for a given pair\r\n+ unsigned int site1(unsigned int pair_index);\r\n+\r\n+ /// position of the second site for a given pair\r\n+ unsigned int site2(unsigned int pair_index);\r\n+\r\n+ /// correlation coefficient between r2 and distance\r\n+ double correl() const;\r\n+ \r\n+ /** \\brief Computes the minimal number of recombination events\r\n+ * \r\n+ * The computation is performed as described in Hudson, RR and\r\n+ * NL Kaplan. 1985. Statistical properties of the number of\r\n+ * recombination events in the history of a sample of DNA\r\n+ * sequences. Genetics 111: 147-164. The returned parameter is\r\n+ * the minimal number of recombination events, given by the\r\n+ * number of non-overlapping pairs of segregating sites violating\r\n+ * the rule of the four gamete. Only sites with two alleles are\r\n+ * considered. Note that homoplasy (multiple mutations) mimicks\r\n+ * recombination. The result of this function is not stored\r\n+ * in this instance, and re-computed at each call.\r\n+ * \r\n+ * \\param data the same CharMatrix instance as passed to the load\r\n+ * method. The instance must not have been modified.\r\n+ * \r\n+ */\r\n+ unsigned int Rmin(CharMatrix& data) const;\r\n+\r\n+\r\n+\r\n+ protected:\r\n+ \r\n+ // adds a pair of polymorphic sites\r\n+ // assume position2>position1,\r\n+ // sites are polymorphic with exactly 2 alleles\r\n+ void add(CharMatrix& data, unsigned int position1, unsigned int position2);\r\n+\r\n+ // Constructor help\r\n+ void init();\r\n+ \r\n+ // Destructor helper\r\n+ void clear();\r\n+ \r\n+ // Resizes arrays\r\n+ void reset();\r\n+ \r\n+ // Small helper\r\n+ inline double min(double a, double b) { return (a>b)?a:b;}\r\n+\r\n+ // Small helper\r\n+ inline double max(double a, double b) { return (a>b)?b:a;}\r\n+\r\n+ // Small helper\r\n+ inline void check(unsigned int pos) { if (pos>=_n) throw EggRuntimeError("tried to access an invalid index"); }\r\n+\r\n+ /* Performs correlation\r\n+ *\r\n+ * This function works independently from the rest of the class.\r\n+ *\r\n+ * \\param n length of data arrays.\r\n+ * \\param x first data vector.\r\n+ * \\param y second data vector.\r\n+ * \\param r variable to receive the correlation coefficient.\r\n+ * \\param a variable to receive the regression slope.\r\n+ */\r\n+ static void _correl(unsigned int n, const int* x, const double* y, double& r, double& a);\r\n+\r\n+ // Distance between pairs\r\n+ int* _d;\r\n+ \r\n+ // D (classical) measure of LD\r\n+ double *_D;\r\n+ \r\n+ // D\'\r\n+ double *_Dp;\r\n+ \r\n+ // r, correlation coefficient\r\n+ double *_r;\r\n+ \r\n+ // square r\r\n+ double *_r2;\r\n+ \r\n+ // Data array (not managed by the instance)\r\n+ unsigned int *_site1;\r\n+\r\n+ // Data array (not managed by the instance)\r\n+ unsigned int *_site2;\r\n+ \r\n+ // Number of pairs\r\n+ unsigned int _n;\r\n+\r\n+ private:\r\n+ \r\n+ /// Copy constructor not available\r\n+ LinkageDisequilibrium(const LinkageDisequilibrium&) { }\r\n+\r\n+ /// Assignment operator not available\r\n+ LinkageDisequilibrium& operator=(const LinkageDisequilibrium&) {\r\n+ return *this;\r\n+ }\r\n+\r\n+\r\n+ class Interval {\r\n+ public:\r\n+ Interval(unsigned int, unsigned int);\r\n+ unsigned int a() const;\r\n+ unsigned int b() const;\r\n+ bool good() const;\r\n+ void set_false();\r\n+ private:\r\n+ unsigned int _a;\r\n+ unsigned int _b;\r\n+ unsigned int _good;\r\n+ };\r\n+\r\n+\r\n+ };\r\n+}\r\n+\r\n+#endif\r\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/MicrosatelliteDiversity.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/MicrosatelliteDiversity.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,120 @@ +/* + Copyright 2008-2010 St�phane De Mita, Mathieu Siol + + This file is part of the EggLib library. + + EggLib is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EggLib is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with EggLib. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef EGGLIB_MICROSATELLITEDIVERSITY_HPP +#define EGGLIB_MICROSATELLITEDIVERSITY_HPP + +#include "DataMatrix.hpp" +#include <cstdlib> + +namespace egglib { + + /** \brief Analyzes microsatellite data + * + * \ingroup polymorphism + * + * Use the load() method to analyze data. All sites will be analyzed + * and accessors allow to access the value of a given statistics for + * a given site. There is no out-of-bound checking implemented in + * accessors. + * + */ + class MicrosatelliteDiversity { + + public: + + /** \brief Creates an object + * + */ + MicrosatelliteDiversity(); + + + /** \brief Destroys an object + * + */ + virtual ~MicrosatelliteDiversity(); + + + /** \brief Performs the analysis + * + * \param dataMatrix the object to analyze. + * + * \param missingData the integer identifying missing data. + * + * \param noMissingData if true, no allele will be + * excluded (including the one identified by the argument + * missingData). + * + */ + void load(const DataMatrix& dataMatrix, + int missingData=999, bool noMissingData=false); + + + /// Number of sites (or markers) + unsigned int numberOfSites() const; + + /// Heterozygosity + double He(unsigned int siteIndex) const; + + /// Number of alleles + unsigned int numberOfAlleles(unsigned int siteIndex) const; + + /// Variance of allele size + double sizeVariance(unsigned int siteIndex) const; + + /// IAM-based estimator of theta + double thetaAssumingIAM(unsigned int siteIndex) const; + + /// SMM-based estimator of theta, calculated from He + double thetaAssumingSMMfromHe(unsigned int siteIndex) const; + + /// SMM-based estimator of theta, calculated from VarSize + double thetaAssumingSMMfromSizeVariance(unsigned int siteIndex) const; + + + protected: + + unsigned int v_numberOfSites; + double *v_He; + unsigned int *v_numberOfAlleles; + double *v_sizeVariance; + double *v_thetaAssumingIAM; + double *v_thetaAssumingSMMfromHe; + double *v_thetaAssumingSMMfromSizeVariance; + + void init(); + void clear(); + + + private: + + + /// No copy allowed + MicrosatelliteDiversity(const MicrosatelliteDiversity& source) { + } + + /// No copy allowed + MicrosatelliteDiversity& operator=(const MicrosatelliteDiversity& source) { + return *this; + } + + }; +} + +#endif |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/Ms.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/Ms.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
b'@@ -0,0 +1,199 @@\n+/*\r\n+ Copyright 2008,2009,2011 St\xe9phane De Mita and Mathieu Siol\r\n+\r\n+ This file is part of the EggLib library.\r\n+\r\n+ EggLib is free software: you can redistribute it and/or modify\r\n+ it under the terms of the GNU General Public License as published by\r\n+ the Free Software Foundation, either version 3 of the License, or\r\n+ (at your option) any later version.\r\n+\r\n+ EggLib is distributed in the hope that it will be useful,\r\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+ GNU General Public License for more details.\r\n+\r\n+ You should have received a copy of the GNU General Public License\r\n+ along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n+*/\r\n+\r\n+#ifndef EGGLIB_GMS_HPP\r\n+#define EGGLIB_GMS_HPP\r\n+\r\n+#include "DataMatrix.hpp"\r\n+#include <string>\r\n+#include <istream>\r\n+\r\n+namespace egglib {\r\n+\r\n+ /** \\brief ms-like sequence format parser\r\n+ * \r\n+ * The class provides parsing (input) and formatting (output)\r\n+ * operations in ms format, that is the format used by Richard\r\n+ * Hudson\'s program ms for outputting genotypes and by the\r\n+ * associated program samplestat for reading them. Both types of\r\n+ * operations are available through static methods using either\r\n+ * a string or a stream (which can be a stream to or from a file\r\n+ * or a string). In either case, types from the STL are used.\r\n+ * Although ms deals only with data coded with 0 and 1, the class Ms\r\n+ * offers the possibility of both importing and exporting data coded\r\n+ * with by integer. All methods have an option named "separated". If\r\n+ * this option is true, the parser or formatter introduces a slight\r\n+ * modification of the format: genotypes individual data are\r\n+ * separated by a white space ("1 0 1 1" instead of "1011", allowing\r\n+ * genotype values larger than 9: "1 0 11 1").\r\n+ *\r\n+ * \\ingroup core\r\n+ *\r\n+ */\r\n+ class Ms {\r\n+\r\n+ public:\r\n+ \r\n+ /** \\brief Imports a sequence alignment\r\n+ * \r\n+ * Creates a istringstream from the string and calls the\r\n+ * overloaded method.\r\n+ * \r\n+ * \\param str the string to parse.\r\n+ * \\param ns the expected number of sequences.\r\n+ * \\param separated true if a white space separator is placed\r\n+ * between genotype at each site.\r\n+ *\r\n+ * \\return A sequence alignment as a data matrix.\r\n+ */\r\n+ static DataMatrix get(std::string, unsigned int ns, bool separated=false);\r\n+\r\n+\r\n+ /** \\brief Imports a sequence alignment\r\n+ * \r\n+ * Attemps to generate a DataMatrix object from the stream.\r\n+ * Reads only one simulation and throws a SeqlibFormatError\r\n+ * exception in case of format error.\r\n+ * \r\n+ * Allows any number of white lines before the //, but no other\r\n+ * data. Supports \\r at the end of lines (before the \\n).\r\n+ * Accepted symbols are all integers (0-9).\r\n+ *\r\n+ * \\param stream the stream to parse.\r\n+ * \\param ns the expected number of sequences.\r\n+ * \\param separated true if a white space separator is placed\r\n+ * between genotype at each site.\r\n+ * \r\n+ * \\return A sequence alignment as a data matrix.\r\n+ */\r\n+ static DataMatrix get(std::istream& stream, unsigned int ns, bool separated=false);\r\n+\r\n+\r\n+ /** \\brief Exports a sequence alignment\r\n+ * \r\n+ * Internally creates a stringstream, calls the overloaded method\r\n+ * and returns the outcome.\r\n+ *\r\n+ * \\param dataMatrix the alignment object to write.\r\n+ * \\param separated true if a white space separator must be placed\r\n+ * between the genotype at each site.\r\n+ * \r\n+ */\r\n+ static std::string format(DataMatrix& dataMatrix, bool separated=false);\r\n+ '..b'd string to the stream \'on the fly\'. The\r\n+ * formatted string is guaranteed to starts with a // line and\r\n+ * ends with an empty line. The client is expected to take care\r\n+ * of writing any header and add an additional white line between\r\n+ * simulations if needed. The method throws a SeqlibRuntimeError\r\n+ * if the stream is not writable. The data matrix should contain\r\n+ * only data within range 0-9 if separated is false (default) and\r\n+ * any positive (>=0) integer if separated is true. Note that\r\n+ * output generated with separated=true is never compatible with\r\n+ * the original ms format, and that output generated with\r\n+ * separator=false is compatible with the original ms format only\r\n+ * if all alleles are 0 or 1 (which is not checked by this\r\n+ * formatted).\r\n+ * \r\n+ * \\param stream the stream (file or string stream) where to\r\n+ * write the output.\r\n+ * \\param dataMatrix the alignment object to write.\r\n+ * \\param separated true if a white space separator must be placed\r\n+ * between the genotype at each site.\r\n+ * \r\n+ */\r\n+ static void format(std::ostream& stream, DataMatrix& dataMatrix, bool separated=false);\r\n+\r\n+\r\n+ /** \\brief Returns the last tMRCA read by any Ms instance\r\n+ * \r\n+ * If a tMRCA value was present in the last simulation read by\r\n+ * any Ms instance, it will be returned by this method. A value\r\n+ * of -1. is returned if no simulation was read, or if the last\r\n+ * simulation didn\'t contain a tMRCA value or if the last\r\n+ * simulation provoked an exception before reaching the tMRCA\r\n+ * line.\r\n+ * \r\n+ */\r\n+ static double tMRCA();\r\n+\r\n+\r\n+ /** \\brief Returns the last "prob" read by any Ms instance\r\n+ * \r\n+ * "prob" is returned by ms when a fixed number of segregating\r\n+ * sites is used in conjunction with a theta value. If a "prob"\r\n+ * value was present in the last simulation read by any Ms\r\n+ * instance, it will be returned by this method. A value of -1\r\n+ * is returned if no simulation was read, or if the last\r\n+ * simulation didn\'t contain a "prob" value or if the last\r\n+ * simulation provoked an exception before reaching the "prob"\r\n+ * line.\r\n+ * \r\n+ */\r\n+ static double prob();\r\n+ \r\n+\r\n+ /** \\brief Returns the tree string found in the last simulation read by any Ms instance\r\n+ * \r\n+ * If one or more trees were present in the last simulation read\r\n+ * by any Ms instance, they will be returned as a unique string\r\n+ * by this method. An empty string is returned if no simulation\r\n+ * was read, or if the last simulation, or if the last simulation\r\n+ * didn\'t contain any tree value or if the last simulation\r\n+ * provoked an exception before reaching the tree line.\r\n+ * \r\n+ * Note: the trees are returned as a single line.\r\n+ * \r\n+ */\r\n+ static std::string trees();\r\n+\r\n+ \r\n+ private:\r\n+ // Line parser (the last \\n is extracted and discarded - no error upon EOF)\r\n+ std::string next_line(std::istream& stream);\r\n+ \r\n+ /// tMRCA (-1 if not found in ms output)\r\n+ static double _tMRCA;\r\n+ \r\n+ /// probability (-1 if not found in ms output)\r\n+ static double _prob;\r\n+ \r\n+ /// tree string (maybe contain several trees) (empty string if not found in ms output)\r\n+ static std::string _trees;\r\n+\r\n+ \r\n+ /// No instantiation allowed\r\n+ Ms() { }\r\n+ \r\n+ /// A fortiori no destruction allowed\r\n+ ~Ms() { }\r\n+\r\n+ /// No copy allowed\r\n+ Ms(const Ms&) { }\r\n+\r\n+ /// No copy allowed\r\n+ Ms& operator=(const Ms&) { return *this; }\r\n+ \r\n+ };\r\n+}\r\n+ \r\n+#endif\r\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/Mutation.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/Mutation.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,63 @@ +/* + Copyright 2009-2010 St�phane De Mita, Mathieu Siol + + This file is part of the EggLib library. + + EggLib is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EggLib is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with EggLib. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef EGGLIB_MUTATION_HPP +#define EGGLIB_MUTATION_HPP + +#include <vector> +#include "Edge.hpp" + +namespace egglib { + + /** \brief Very simple container of some information relative to a mutation + * + * \ingroup coalesce + * + */ + class Mutation { + + public: + + /// Default constructor + Mutation(); + + /// Age + //double age; + + /// Mutation index (for finding in Edge) + unsigned int actualSiteIndex; + + /// Position + double position; + + /// Segment index + unsigned int segmentIndex; + + /// Pointer to edge + //const Edge* edge; + + private: + + void init(); + + }; + +} + +#endif |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/Mutator.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/Mutator.hpp Fri Jul 10 04:39:30 2015 -0400 |
[ |
b'@@ -0,0 +1,397 @@\n+/*\r\n+ Copyright 2009, 2010, 2012 St\xe9phane De Mita, Mathieu Siol\r\n+\r\n+ This file is part of the EggLib library.\r\n+\r\n+ EggLib is free software: you can redistribute it and/or modify\r\n+ it under the terms of the GNU General Public License as published by\r\n+ the Free Software Foundation, either version 3 of the License, or\r\n+ (at your option) any later version.\r\n+\r\n+ EggLib is distributed in the hope that it will be useful,\r\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+ GNU General Public License for more details.\r\n+\r\n+ You should have received a copy of the GNU General Public License\r\n+ along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n+*/\r\n+\r\n+#ifndef EGGLIB_MUTATOR_HPP\r\n+#define EGGLIB_MUTATOR_HPP\r\n+\r\n+\r\n+#include "DataMatrix.hpp"\r\n+#include "Random.hpp"\r\n+#include "Arg.hpp"\r\n+#include "Mutation.hpp"\r\n+\r\n+\r\n+namespace egglib {\r\n+ \r\n+\r\n+ /** \\brief Implements mutation models\r\n+ *\r\n+ * \\ingroup coalesce\r\n+ * \r\n+ * Works with a previously built Ancestral Reconbination Graph. The\r\n+ * user must sets options using the setter-based interface. After\r\n+ * that he or she can call the method mute() that will generates\r\n+ * a DataMatrix object.\r\n+ * \r\n+ * Genotype data are represented by integer numbers. Regardless of\r\n+ * the mutation model, the ancestral state is always 0. The user can\r\n+ * set the rate of mutation (or, alternatively, fix the number of\r\n+ * mutations that occurred - which is the number of segregating sites\r\n+ * only with an infinite site model).\r\n+ * \r\n+ * Other options fall into two separate groups: the positions of the\r\n+ * mutated sites and the process of mutation (how new alleles are\r\n+ * generated).\r\n+ * \r\n+ * Concerning allele generation, several mutation models are available\r\n+ * (coded by single letters):\r\n+ * - F: fixed number of alleles. Among other markers, this model is\r\n+ * appropriate for simulating nucleotides. The user is able\r\n+ * to choose the number of alleles (where 2 is the standard\r\n+ * for an infinite site model and 4 for a finite site model).\r\n+ * Mutator allows assigning independent weights between all\r\n+ * different transition types and can draw randomly the\r\n+ * ancestral states, providing a way to emulate evolution of\r\n+ * nucleotides with multiple mutations at the same site and\r\n+ * reversion.\r\n+ * - I: infinite number of alleles. At a given site, each mutation\r\n+ * raises a new allele. The value of the alleles is therefore\r\n+ * irrelevant (it only denotes its order of appearance). This\r\n+ * model does not permit homoplasy.\r\n+ * - S: stepwise mutation model. In this model the value of the\r\n+ * alleles are interpreted as a size (typically for simulating\r\n+ * a microsatellite marker). Each mutation either increases\r\n+ * or decreases the allele size by an increment of one.\r\n+ * - T: two-phase mutation model. This model is a generalization\r\n+ * of the stepwise mutation model (S). For a mutation, the\r\n+ * increment (either increase or decrease) is 1 with the\r\n+ * probability given by the parameter (1-TPMproba). With\r\n+ * probability TPMproba, the increment is drawn from a\r\n+ * geometric distribution of parameter given by the other\r\n+ * parameter (TPMparam).\r\n+ * \r\n+ * By default, the program will assume an infinite site model (ISM).\r\n+ * Each mutation will occur to a new position drawn from the interval\r\n+ * [0,1]. It is possible to set any mutation model with an ISM \r\n+ * (including microsatellite-like models I, S and T). Alternatively,\r\n+ * the user can specify a finite number of sites available for\r\n+ * mutation. For a microsatellite mark'..b'del). It gives the parameter\r\n+ * of the geometric distribution which is used to generate\r\n+ * the mutation step (if it is not one).\r\n+ * \r\n+ * The value must be >=0. and <=1. \r\n+ * \r\n+ */\r\n+ void TPMparam(double value);\r\n+\r\n+\r\n+ /** \\brief Gets the number of mutable sites\r\n+ * \r\n+ * A value a zero must be interpreted as the infinite site\r\n+ * model. Note that after all calls all data from the tables\r\n+ * sitePositions and siteWeights will be reset.\r\n+ * \r\n+ */\r\n+ unsigned int numberOfSites() const;\r\n+ \r\n+ \r\n+ /** \\brief Sets the number of mutable sites\r\n+ * \r\n+ * The value of zero is accepted and imposed the infinite\r\n+ * site model.\r\n+ * \r\n+ */\r\n+ void numberOfSites(unsigned int);\r\n+ \r\n+ \r\n+ /** \\brief Gets the position of a given site\r\n+ * \r\n+ */\r\n+ double sitePosition(unsigned int siteIndex) const;\r\n+\r\n+ \r\n+ /** \\brief Set the position of a given site\r\n+ * \r\n+ * The position must be >=0 and <=1\r\n+ * \r\n+ */\r\n+ void sitePosition(unsigned int siteIndex, double position);\r\n+\r\n+\r\n+ /** \\brief Gets the mutation weight of a given site\r\n+ * \r\n+ */\r\n+ double siteWeight(unsigned int siteIndex) const;\r\n+\r\n+ \r\n+ /** \\brief Set the site weight of a given site\r\n+ * \r\n+ * The weight must be strictly positive.\r\n+ * \r\n+ */\r\n+ void siteWeight(unsigned int siteIndex, double weight);\r\n+\r\n+\r\n+ /** \\brief Performs mutation\r\n+ * \r\n+ * \\param arg Ancestral recombination graph instance. If the\r\n+ * ARG is partially built or not a all, or improperly so,\r\n+ * the behaviour of this method is not defined.\r\n+ * \r\n+ * \\param random The address of a Random instance to be\r\n+ * used for generating random numbers.\r\n+ * \r\n+ * \\return A DataMatrix instance containing simulated data.\r\n+ * \r\n+ */\r\n+ DataMatrix mute(Arg* arg, Random* random);\r\n+\r\n+\r\n+ /** \\brief Gets the last number of mutations\r\n+ *\r\n+ * Returns the number of mutations of the last call of mute( ).\r\n+ * By default, this method returns 0.\r\n+ *\r\n+ */\r\n+ unsigned int numberOfMutations() const; \r\n+\r\n+\r\n+ private:\r\n+ \r\n+ void clear();\r\n+ void init();\r\n+ void copy(const Mutator&);\r\n+\r\n+ //int nextAllele(int allele, Random* random);\r\n+ int TPMstep(double inTPMproba, Random* random);\r\n+ void apply_mutation(unsigned int matrixIndex,\r\n+ unsigned int actualSite, DataMatrix& data,\r\n+ const Edge* edge, int allele,\r\n+ unsigned int segment, Random* random);\r\n+\r\n+ \r\n+ char _model;\r\n+ double _mutationRate;\r\n+ unsigned int _fixedNumberOfMutations;\r\n+ unsigned int _numberOfAlleles;\r\n+ double** _transitionWeights;\r\n+ bool _randomAncestralAllele;\r\n+ unsigned int _numberOfSites;\r\n+ double* _sitePositions;\r\n+ double* _siteWeights;\r\n+ double _TPMproba;\r\n+ double _TPMparam;\r\n+ int maxAllele;\r\n+ unsigned int _numberOfMutations;\r\n+ std::vector<Mutation> _cache_mutations;\r\n+ unsigned int _cache_mutations_reserved;\r\n+\r\n+ };\r\n+\r\n+\r\n+ bool compare(Mutation mutation1, Mutation mutation2); // returns True if mutation1 is older\r\n+\r\n+}\r\n+\r\n+\r\n+\r\n+\r\n+#endif\r\n+\r\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/NucleotideDiversity.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/NucleotideDiversity.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
b'@@ -0,0 +1,377 @@\n+/*\r\n+ Copyright 2008-2009 St\xc3\xa9phane De Mita, Mathieu Siol\r\n+\r\n+ This file is part of the EggLib library.\r\n+\r\n+ EggLib is free software: you can redistribute it and/or modify\r\n+ it under the terms of the GNU General Public License as published by\r\n+ the Free Software Foundation, either version 3 of the License, or\r\n+ (at your option) any later version.\r\n+\r\n+ EggLib is distributed in the hope that it will be useful,\r\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+ GNU General Public License for more details.\r\n+\r\n+ You should have received a copy of the GNU General Public License\r\n+ along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n+*/\r\n+\r\n+\r\n+#ifndef EGGLIB_NUCLEOTIDEDIVERSITY_HPP\r\n+#define EGGLIB_NUCLEOTIDEDIVERSITY_HPP\r\n+\r\n+\r\n+#include "BaseDiversity.hpp"\r\n+#include <string>\r\n+#include <vector>\r\n+\r\n+\r\n+\r\n+namespace egglib {\r\n+\r\n+\r\n+ /** \\brief Performs analyzes of population genetics\r\n+ *\r\n+ * \\ingroup polymorphism\r\n+ * \r\n+ * This class computes several summary statistics based on\r\n+ * nucleotide analysis. Note that it is possible to use the same\r\n+ * object to analyze different data set. Calling the load() method\r\n+ * erases all data preivously computed (if any). Calling the load()\r\n+ * method is absolutely required to compute any statistics. Some\r\n+ * statistics are not computed by default, but are if the\r\n+ * corresponding accessor is used (only load() is required).\r\n+ * \r\n+ * Note that "unsecure" accessors don\'t perform out-of-bound checks.\r\n+ * \r\n+ * S is the number of varying sites (only in sites that were not\r\n+ * rejected).\r\n+ * \r\n+ * eta is the minimum number of mutations, that is the sum of the\r\n+ * number of alleles minus 1 for each varying site. eta = S if all\r\n+ * sites have no variant or 2 alleles. eta is computed independently\r\n+ * of the option multiple and IS NOT computed over lseff sites.\r\n+ *\r\n+ * Pi is the average number of pairwise differences between sequences\r\n+ * (expressed here per site) or (as computed here) the mean per site\r\n+ * (unbiased) heterozygosity. Pi is zero if no polymorphic sites.\r\n+ *\r\n+ * D is the Tajima\'s test of neutrality\r\n+ * Ref. Tajima F.: Statistical method for testing the neutral\r\n+ * mutation hypothesis by DNA polymorphism. Genetics 1989, 123:585-595.\r\n+ * It is arbitrary set to 0 if no polymorphic sites.\r\n+ *\r\n+ * tW: thetaW: estimator of theta based on polymorphic sites (ref.\r\n+ * e.g. Watterson 1975 Theor. Pop. Biol.).\r\n+ * Both D and thetaW are computed assuming that rounded nseff samples\r\n+ * have been sampled.\r\n+ * The variance of D is computed using rounded nseff instead of ns.\r\n+ *\r\n+ * H is the Fay and Wu\'s test of neutrality.\r\n+ * Z is the standardized version and E a similar test.\r\n+ * Ref. Fay J. C., Wu C.-I.: Hitchhiking under positive Darwinian\r\n+ * selection. Genetics 2000, 155:1405-1413. and Zeng K., Fu Y. X.,\r\n+ * Shi S., Wu C.-I.: Statistical tests for detecting positive\r\n+ * selection by utilizing high-frequency variants. Genetics 2006,\r\n+ * 174:1431-9. Both are arbitrary set to 0 if no polymorphic or\r\n+ * orientable sites.\r\n+ *\r\n+ * tH and tL: theta H: estimators of theta based on derived\r\n+ * polymorphic sites (ref in Fay and Wu and Zeng al.). The variance\r\n+ * of H/Z are computed assuming that rounded nseff samples have\r\n+ * been sampled.\r\n+ * \r\n+ */\r\n+ class NucleotideDiversity : public BaseDiversity {\r\n+\r\n+ public:\r\n+\r\n+ /** \\brief Builds an object\r\n+ * \r\n+ */\r\n+ NucleotideDiversity();\r\n+\r\n+\r\n+ /** \\brief Destroys an object\r\n+ * \r\n+ */\r\n+ virtual ~NucleotideDiversity();\r\n+\r\n+\r\n+ /** \\brief Identifies polymorphic sites and computes basis\r\n+ '..b" * - 1: A&G A G specific 1 + fixed 2-3\r\n+ * - 2: A A&G A specific 2\r\n+ * - 3: A A&G G specific 2 + fixed 1-3\r\n+ * - 4: A A A&G specific 3\r\n+ * - 5: A G A&G specific 3 + fixed 1-2\r\n+ * - 6: A&G A&G A shared 1-2\r\n+ * - 7: A&G A A&G shared 1-3\r\n+ * - 8: A A&G A&G shared 2-3\r\n+ * - 9: A&G A&G A&G shared 1-2-3\r\n+ * - 10: A G G fixed 1\r\n+ * - 11: A G A fixed 2\r\n+ * - 12: A A G fixed 3\r\n+ *\r\n+ * \\param index must be an index from 0 to 12.\r\n+ * \r\n+ */\r\n+ unsigned int triConfiguration(unsigned int index);\r\n+\r\n+\r\n+ /// Builds and returns the vector of positions of all polymorphic sites\r\n+ std::vector<unsigned int> polymorphic_positions() const;\r\n+\r\n+\r\n+ /** \\brief Builds and returns the vector of positions of all singleton sites\r\n+ * \r\n+ * A site singleton when it is polymorphic according to\r\n+ * parameter of the diversity analysis, when it has exactly two\r\n+ * alleles and one of them is at absolute frequency 1 (one\r\n+ * copy) disregarding the outgroup.\r\n+ * \r\n+ */\r\n+ std::vector<unsigned int> singleton_positions() const;\r\n+\r\n+\r\n+ protected:\r\n+\r\n+ /** \\brief This class cannot be copied\r\n+ * \r\n+ */\r\n+ NucleotideDiversity(const NucleotideDiversity& source) { }\r\n+\r\n+\r\n+ /** \\brief This class cannot be copied\r\n+ * \r\n+ */\r\n+ NucleotideDiversity& operator=(const NucleotideDiversity& source) { return *this; }\r\n+\r\n+\r\n+ void init(); // initializes values\r\n+ void clear(); // free memory but doesn't initializes\r\n+ \r\n+ // diversity (without outgroup)\r\n+ void diversity();\r\n+ \r\n+ // diversity with outgroup\r\n+ void outgroupDiversity();\r\n+ \r\n+ // site patterns\r\n+ void differentiation();\r\n+ \r\n+ // triconfigurations\r\n+ void triConfigurations();\r\n+ \r\n+\r\n+ // holders for statistics, with booleans flagging groups of stats\r\n+ \r\n+ bool b_analysisSites;\r\n+ \r\n+ bool b_diversity;\r\n+ \r\n+ double v_Pi; // nucleotide diversity\r\n+ double v_thetaW; // theta (Watterson estimator)\r\n+ double v_average_Pi; // average diversity across populations\r\n+ double *v_pop_Pi; // diversity per population\r\n+ double v_D; // Tajima's D\r\n+ \r\n+ bool b_outgroupDiversity;\r\n+ \r\n+ double v_thetaH; // theta (Fay and Wu estimator)\r\n+ double v_thetaL; // theta (Zeng estimator)\r\n+ double v_H; // Fay and Wu's H\r\n+ double v_Z; // normalized Fay and Wu's H\r\n+ double v_E; // Zeng et al.'s E\r\n+ \r\n+ bool b_differentiation;\r\n+ \r\n+ unsigned int *v_pairwiseFixedDifferences;\r\n+ unsigned int *v_pairwiseCommonAlleles;\r\n+ unsigned int *v_pairwiseSharedAlleles;\r\n+ unsigned int *v_popPolymorphic;\r\n+ unsigned int *v_popSpecific;\r\n+ unsigned int *v_popSpecificDerived;\r\n+ unsigned int v_countFixedDifferences;\r\n+ unsigned int v_countCommonAlleles;\r\n+ unsigned int v_countSharedAlleles;\r\n+ unsigned int v_countSpecificAlleles;\r\n+ unsigned int v_countSpecificDerivedAlleles;\r\n+ \r\n+ \r\n+ bool b_triConfigurations;\r\n+ \r\n+ unsigned int *v_triConfigurations;\r\n+\r\n+ };\r\n+}\r\n+\r\n+#endif\r\n" |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/ParamSet.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/ParamSet.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
b'@@ -0,0 +1,279 @@\n+/*\r\n+ Copyright 2009-2010 St\xe9phane De Mita, Mathieu Siol\r\n+\r\n+ This file is part of the EggLib library.\r\n+\r\n+ EggLib is free software: you can redistribute it and/or modify\r\n+ it under the terms of the GNU General Public License as published by\r\n+ the Free Software Foundation, either version 3 of the License, or\r\n+ (at your option) any later version.\r\n+\r\n+ EggLib is distributed in the hope that it will be useful,\r\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n+ GNU General Public License for more details.\r\n+\r\n+ You should have received a copy of the GNU General Public License\r\n+ along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n+*/\r\n+\r\n+#ifndef EGGLIB_PARAMSET_HPP\r\n+#define EGGLIB_PARAMSET_HPP\r\n+\r\n+\r\n+#include "DataMatrix.hpp"\r\n+\r\n+\r\n+namespace egglib {\r\n+\r\n+ class Change;\r\n+ class Controller;\r\n+\r\n+\r\n+ /** \\brief Set of parameters\r\n+ *\r\n+ * \\ingroup coalesce\r\n+ *\r\n+ */\r\n+ class ParamSet {\r\n+\r\n+ public:\r\n+ \r\n+ /** \\brief Default constructor\r\n+ *\r\n+ * Initializes all parameters to reasonnable values (except\r\n+ * that the sample size is null: 1 population, 0 samples,\r\n+ * selfing rate of 0, recombination rate of 0, growth rate of\r\n+ * 0, population size of 1 and no changes.\r\n+ *\r\n+ */\r\n+ ParamSet();\r\n+\r\n+ /** \\brief Destructor\r\n+ * \r\n+ */\r\n+ ~ParamSet();\r\n+ \r\n+ /** \\brief Copy constructor\r\n+ * \r\n+ */\r\n+ ParamSet(const ParamSet&);\r\n+ \r\n+ /** \\brief Assignment operator\r\n+ * \r\n+ */\r\n+ ParamSet& operator=(const ParamSet&);\r\n+\r\n+ /** \\brief Restores default value of all parameters\r\n+ * \r\n+ */\r\n+ void reset();\r\n+\r\n+ /** \\brief Gets the number of populations\r\n+ * \r\n+ */\r\n+ unsigned int numberOfPopulations() const;\r\n+ \r\n+ /** \\brief Gets a pairwise migration rate\r\n+ * \r\n+ * It is allowed to access a diagonal value. Diagonal\r\n+ * values contain the sum of values of the corresponding\r\n+ * line (diagonal cell excepted, of course).\r\n+ * \r\n+ */\r\n+ double pairwiseMigrationRate(unsigned int source, unsigned int dest) const;\r\n+ \r\n+ /** \\brief Sets a pairwise migration rate\r\n+ * \r\n+ * It is not allowed to set a value on the diagonal (this\r\n+ * would raise an exception). The method takes care of\r\n+ * modifying the diagonal accordingly (still this is not\r\n+ * relevant for the client);\r\n+ * \r\n+ */\r\n+ void pairwiseMigrationRate(unsigned int source, unsigned int dest, double value);\r\n+ \r\n+ /** \\brief Sets the migration rate for all matrix\r\n+ * \r\n+ */\r\n+ void migrationRate(double value);\r\n+ \r\n+ /** \\brief Gets a population size\r\n+ * \r\n+ */\r\n+ double populationSize(unsigned int populationIndex) const;\r\n+ \r\n+ /** \\brief Sets a population size\r\n+ * \r\n+ * The size must be strictly positive.\r\n+ * \r\n+ */\r\n+ void populationSize(unsigned int populationIndex, double value);\r\n+ \r\n+ /** \\brief Gets a growth rate\r\n+ * \r\n+ */\r\n+ double growthRate(unsigned int populationIndex) const;\r\n+ \r\n+ /** \\brief Sets a growth rate\r\n+ * \r\n+ */\r\n+ void growthRate(unsigned int populationIndex, double value);\r\n+ \r\n+ /** \\brief Gets the recombination rate\r\n+'..b'e is planned.\r\n+ * \r\n+ */\r\n+ double nextChangeDate() const;\r\n+ \r\n+ /** \\brief Applies the next change event\r\n+ * \r\n+ * \\param controller the Change event might need to have\r\n+ * access to simulation controller (to trigger coalescent\r\n+ * events, for example).\r\n+ * \r\n+ */\r\n+ void nextChangeDo(Controller* controller);\r\n+ \r\n+ /** \\brief Gets the number of single sample from a population\r\n+ * \r\n+ */\r\n+ unsigned int singles(unsigned int populationIndex) const;\r\n+\r\n+ /** \\brief Sets the number of single sample from a population\r\n+ * \r\n+ */\r\n+ void singles(unsigned int populationIndex, unsigned int value);\r\n+\r\n+ /** \\brief Gets the number of double sample from a population\r\n+ * \r\n+ */\r\n+ unsigned int doubles(unsigned int populationIndex) const;\r\n+\r\n+ /** \\brief Sets the number of double sample from a population\r\n+ * \r\n+ */\r\n+ void doubles(unsigned int populationIndex, unsigned int value);\r\n+ \r\n+ /** \\brief Computes the total number of samples\r\n+ * \r\n+ */\r\n+ unsigned int numberOfSamples() const;\r\n+ \r\n+ /** \\brief Gives the date of the last size change\r\n+ * \r\n+ * \\param populationIndex the index of the population.\r\n+ * \\return The date where the last change occurred, or 0. if\r\n+ * no change occurred during the simulation.\r\n+ *\r\n+ */\r\n+ double dateOfLastChange(unsigned int populationIndex) const;\r\n+\r\n+\r\n+ /** \\brief Sets the date of the last size change\r\n+ * \r\n+ * \\param populationIndex the index of the population.\r\n+ * \\param date the date where the last change occurred, or 0.\r\n+ * if no change occurred during the simulation.\r\n+ *\r\n+ */\r\n+ void dateOfLastChange(unsigned int populationIndex, double date) const;\r\n+\r\n+ \r\n+ /** \\brief Set groups labels\r\n+ * \r\n+ * Sets the group labels of the DataMatrix, according to the\r\n+ * current state of population structure, and assuming that\r\n+ * the DataMatrix was generated by the class Arg.\r\n+ * \r\n+ * \\param dataMatrix the DataMatrix object to modify. The\r\n+ * number of sequences must match the total number of samples\r\n+ * defined by the ParamSet object this method is called on.\r\n+ * \r\n+ * \\param labelIndividuals by default, labels the different\r\n+ * samples depending on the population they come from (0\r\n+ * being the label of the first population). If this flag is\r\n+ * set to true, then the samples are labelled depending on\r\n+ * the individual they come from, regardless of populations.\r\n+ * In that case there can be only one or two genes for a\r\n+ * given group label.\r\n+ * \r\n+ */\r\n+ void setGroups(DataMatrix& dataMatrix, bool labelIndividuals=false);\r\n+\r\n+ private:\r\n+\r\n+ void clear();\r\n+ void init();\r\n+ void copy(const ParamSet&);\r\n+ \r\n+ double _selfingRate;\r\n+ double _recombinationRate;\r\n+ unsigned int _numberOfSegments;\r\n+ unsigned int _numberOfPopulations;\r\n+ unsigned int* _singles;\r\n+ unsigned int* _doubles;\r\n+ double* _growthRates;\r\n+ double* _populationSize;\r\n+ double* _dateOfLastChange;\r\n+ double** migrationMatrix;\r\n+ unsigned int _numberOfChanges;\r\n+ unsigned int nextChangeIndex;\r\n+ Change const** changes;\r\n+ };\r\n+\r\n+}\r\n+\r\n+#endif\r\n' |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/Population.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/Population.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,143 @@ +/* + Copyright 2009-2010 St�phane De Mita, Mathieu Siol + + This file is part of the EggLib library. + + EggLib is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EggLib is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with EggLib. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef EGGLIB_POPULATION_HPP +#define EGGLIB_POPULATION_HPP + + +#include "Edge.hpp" + +namespace egglib { + + class Random; + + /** \brief Handles a single population + * + * \ingroup coalesce + * + */ + class Population { + + public: + + /** \brief Default constructor + * + * Generates an empty population. + * + */ + Population(); + + /** \brief Copy constructor + * + */ + Population(const Population& source); + + /** \brief Assignment operator + * + */ + Population& operator=(const Population& source); + + /** \brief Destructor + * + * The object only cleans Edge objects currently stored in it. + * + */ + ~Population(); + + /** \brief Standard constructor + * + * The Edge instances will be handled by address and they + * MUST be passed using the method set(). + * + * \param numberOfSegments number of recombining segments. + * + * \param numberOfLineages the number of lineages contained + * in this population. + * + * \param firstIndex the absolute index (or ID) of the first + * lineage (the other will have consecutive incremented + * ID's). + * + */ + Population(unsigned int numberOfSegments, + unsigned int numberOfLineages, unsigned firstIndex); + + /** \brief Gets the number of lineages + * + */ + unsigned int numberOfLineages() const; + + /** \brief Gets the efficient number of lineages + * + * The number of lineages is multiplied by the number of + * covered segments of each lineages. + * + */ + unsigned int efficientNumberOfLineages() const; + + /** \brief Sets the Edge of a lineage + * + * \param index the index of the lineage within the + * population. + * \param edge the address of the Edge instance representing + * the lineage. + * + */ + void set(unsigned int index, Edge* edge); + + /** \brief Removes and returns a random lineage. + * + * \param random pointer to simulator's random generator + * instance. + * + */ + Edge* extractRandomly(Random* random); + + /** \brief Removes and returns a given lineage. + * + * \param index the relative index of the lineage. + * + */ + Edge* extractByIndex(unsigned int index); + + /** \brief Appends a lineage to the object + * + */ + void push(Edge* edge); + + /** \brief Gets coverage + * + */ + unsigned int coverage(unsigned int edgeIndex) const; + + + private: + + void copy(const Population& source); + void clear(); + Edge* pick(unsigned int index); + void init(); + unsigned int _numberOfLineages; + unsigned int _efficientNumberOfLineages; + Edge** lineages; + }; + +} + +#endif |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/Random.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/Random.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,134 @@ +/* + Copyright 2008,2009,2012 St�phane De Mita, Mathieu Siol + Adapted from MStrat, developed by Charles-Edouard Coste, + Thomas M. Bataillon, Mathieu Cotisson, Guy Decoux, Chistophe Rozale, + Daniel J. Schoen and Jacques L. David. + + This file is part of the EggLib library. + + EggLib is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EggLib is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with EggLib. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef EGGLIB_RANDOM_HPP +#define EGGLIB_RANDOM_HPP + +namespace egglib { + + /** \brief Pseudo-random number generator + * + * \ingroup core + * + * Random is a pseudo-random number generator, adapted from a part of MStrat, + * developed by Charles-Edouard Coste, Thomas M. Bataillon, Mathieu Cotisson, + * Guy Decoux, Chistophe Rozale, Daniel J. Schoen and Jacques L. David. + * + * It uses two different seeds. By default, they are initialized to available + * arbitrary values. However, a given sequence can be repeated by passing the + * same two seeds. + * + */ + class Random { + public: + /** \brief Initializes using default seeds + * + * Uses the current system time and the memory address of the object as an attempt to generate unique sequences. + */ + Random(); + + /** \brief Initializes using given seeds + * + * This constructor can be used to reproduce a given sequence. + */ + Random(double seed1, double seed2); + + /** \brief Draws a number from an exponential distribution + * + * \param expectation the distribution mean (also 1/lambda + * where lambda is the rate parameter). + * + */ + double erand(double expectation); + + /** \brief Draws an integer from a uniform distribution bound by 0 and max (max is not included) + * + * max is not included. + * + */ + unsigned int irand(unsigned int max); + + /** \brief Draws an integer from a Poisson distribution with parameter p + * + * The Poisson transformation algorithm was taken from (in French) + * http://www.u-picardie.fr/~cochard/IEM/demos/C107/C107_3.htm. + */ + unsigned int prand(double p); + + /** \brief Draws a number from a normal distribution of expectation 0 and variance 1 + * + * The algorithm used is the polar form of the Box-Muller + * algorithm. \todo use the Ziggurat algorithm for the + * nrand() method of Random. + * + */ + double nrand(); + + /** \brief Draws a number from a geometric law + * + * \param param the parameter of the law + * + */ + unsigned int grand(double); + + /** \brief Draws a number from a uniform distribution between 0 and 1 + * + */ + double uniform(); + + /** \brief Gets the current value of the first seed + * + */ + double seed1() const; + + /** \brief Gets the current value of the second seed + * + */ + double seed2() const; + + /** \brief Sets the current value of the first seed + * + */ + void seed1(double); + + /** \brief Sets the current value of the second seed + * + */ + void seed2(double); + + private: + // First seed + double _seed1; + + // Second seed + double _seed2; + + /* since the normal random generator draws two numbers at + * a time, one is cached and returned at any subsequent call + */ + bool b_ncached; + double v_ncached; + + }; +} + +#endif |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/SitePolymorphism.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/SitePolymorphism.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,207 @@ +/* + Copyright 2008-2009 Stéphane De Mita, Mathieu Siol + + This file is part of the EggLib library. + + EggLib is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EggLib is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with EggLib. If not, see <http://www.gnu.org/licenses/>. +*/ + + +#ifndef EGGLIB_SITEPOLYMORPHISM_HPP +#define EGGLIB_SITEPOLYMORPHISM_HPP + + + +namespace egglib { + + + /** \brief Implements diversity analysis at the site level + * + * \ingroup polymorphism + * + * Data are loaded along with a population index. It is necessary to + * set the number of populations prior to use. + * + * Outgroup sequence must be loaded separetedly. There can be any + * number of outgroups, but they must be all consistent otherwise the + * site will be considered as not orientable. + * + */ + class SitePolymorphism { + + public: + + /** \brief Builds an object + * + */ + SitePolymorphism(); + + + /** \brief Builds an object + * + * \param npop number of populations + * + */ + SitePolymorphism(unsigned int npop); + + + /** \brief Destroys an object + * + */ + virtual ~SitePolymorphism(); + + + /** \brief Copy constructor + * + */ + SitePolymorphism(const SitePolymorphism& source); + + + /** \brief Assignment operator + * + */ + SitePolymorphism& operator=(const SitePolymorphism& source); + + + /** \brief Sets the number of populations + * + * NOTE THAT all previous data is lost. + * + */ + void numberOfPopulations(unsigned int npop); + + + /** \brief Adds a character + * + * \param populationIndex the index of the population from + * which is sampled this character (do not use "population + * label"). + * + * \param character the character value (it is assumed it + * represents a valid character. + * + */ + void load(unsigned int populationIndex, char character); + + + /** \brief Loads outgroup state + * + * There can be any number of outgroup states. Only + * characters that are considered as valid (whatever the list + * is) should be loaded. + * + */ + void outgroup(char state); + + + /** \brief Number of different alleles + * + */ + unsigned int numberOfAlleles() const; + + + /** \brief Gets an allele (unsecure) + * + * Assumes that the index provided lies in the valid range + * + */ + char allele(unsigned int index) const; + + + /** \brief Gets a frequency (unsecure) + * + * The sum of of frequencies of the allele over populations + * is computed. Not out-of-bounds check is performed. + * + */ + unsigned int alleleFrequency(unsigned int alleleIndex) const; + + + /** \brief Gets the frequency of an allele in one pop (unsecure) + * + * The frequency of the allele in the given population is + * returned. Not out-of-bounds check is performed. + * + */ + unsigned int alleleFrequency(unsigned int popIndex, unsigned int alleleIndex) const; + + + /** \brief Sums the frequency of derived allele(s) + * + * This method assumes that the site is orientable. It will + * use as outgroup the first outgroup character entered, + * assuming at least one was entered and that all (if more + * than one) were identical. + * + */ + unsigned int derivedAlleleFrequency() const; + + + /** \brief Number of sequences that were analyzed + * + */ + unsigned int ns() const; + + + /** \brief Gets the number of analyzed sequences for a population + * + * No out-of-bound check is performed + * + */ + unsigned int ns(unsigned int popIndex) const; + + + /** \brief Checks if the site can be oriented + * + * Returns true if at least one outgroup datum has been + * loaded, if all outgroup data are identical (regardless of + * their value) and if the outgroup allele is one of the + * allele in the sample. + * + */ + bool isOrientable() const; + + bool isPolymorphic(unsigned int popIndex) const; + bool hasSpecificAllele(unsigned int popIndex, bool restrictToDerived) const; + bool haveFixedDifference(unsigned int pop1, unsigned int pop2) const; + bool haveCommonAllele(unsigned int pop1, unsigned int pop2) const; + bool haveSharedAllele(unsigned int pop1, unsigned int pop2) const; + + + + + protected: + + // helpers + void init(); + void clear(); + void copy(const SitePolymorphism& site); + + + // data + unsigned int m_numberOfPopulations; + unsigned int m_numberOfStates; + char * m_states; + unsigned int ** m_frequencies; + unsigned int m_numberOfOutgroups; + char * m_outgroups; + unsigned int m_ns; + unsigned int * m_pop_ns; + + bool m_cache_orientable; + + }; +} + +#endif |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/Staden.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/Staden.hpp Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,140 @@ +/* + Copyright 2008-2009 St�phane De Mita, Mathieu Siol + + This file is part of EggLib. + + EggLib is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EggLib is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with EggLib. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef EGGLIB_STADEN_HPP +#define EGGLIB_STADEN_HPP + +#include <string> +#include <istream> +#include "Align.hpp" + +namespace egglib { + + /** \brief Parser of Staden output format + * + * \ingroup core + * + * The parser is available as a static method. It takes either a + * stream or a string containing data formatted by the program GAP4 + * of the Staden package (command 'dump contig to file'). + * + */ + class Staden { + + public: + + + /** \brief Parses a string + * + * \param string a string containing an alignment formatted + * by the program GAP4 of the Staden package. + * + * \param deleteConsensus if true, the sequence named + * "CONSENSUS" is deleted from the file (if it is present). + * + * \return An Align instance containing the data found in + * the Staden while, after recoding the character following + * the standard codes. + * + * This method opens a stream to the string and calls the + * overloaded method. + * + * The character replacement rules assume Staden default + * convention, as follows: + * - "-" codes for an unknown base and is replaced by "N". + * - "*" codes for an alignment gap and is replaced by "-". + * - A white space represents missing data and is replaced + * by "?". + * + */ + static Align parse(const std::string& string, bool deleteConsensus=true); + + + /** \brief Parses an open stream + * + * \param stream the open containing an alignment formatted + * by the program GAP4 of the Staden package. + * + * \param deleteConsensus if true, the sequence named + * "CONSENSUS" is deleted from the file (if it is present). + * + * \return An Align instance containing the data found in + * the Staden while, after recoding the character following + * the standard codes. + * + * The character replacement rules assume Staden default + * convention, as follows: + * - "-" codes for an unknown base and is replaced by "N". + * - "*" codes for an alignment gap and is replaced by "-". + * - A white space represents missing data and is replaced + * by "?". + * + */ + static Align parse(std::istream& stream, bool deleteConsensus=true); + + + private: + + /// Not allowed to instantiate this class + Staden() { } + + /// Not allowed to instantiate this class + Staden(const Staden& source) { } + + /// Not allowed to instantiate this class + ~Staden() { } + + + /* Gets the start position of sequences + * + * The functions gives total number of characters before the start of sequences + * and reads through until the next backspace (ignores the first line). + */ + static void getShift(); + + // Translates according to the Staden format + static char transforme(char); + + // Imports one sequence + static bool readOneSequence(); + + // Imports and concatenates one sequence + static bool readAppendOneSequence(); + + // Replaces dots by the matching character from CONSENSUS + static void undot(bool delete_consensus=true); + + // The number of characters before the start of sequences + static int shift; + + // The dynamically filled container (will result in an aligment) + static Container container; + + // The current position + static int currpos; + + // The reading stream + static std::istream* stream; + + // Stores unique 8 characters discriminating readings + static std::vector<std::string> ID; + }; +} + +#endif |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/include/egglib-cpp/config.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/egglib/egglib-2.1.5/include/egglib-cpp/config.h Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,80 @@ +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to 1 if you have the <inttypes.h> header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `bpp-core' library (-lbpp-core). */ +/* #undef HAVE_LIBBPP_CORE */ + +/* Define to 1 if you have the `bpp-seq' library (-lbpp-seq). */ +/* #undef HAVE_LIBBPP_SEQ */ + +/* Define to 1 if you have the <memory.h> header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `sqrt' function. */ +/* #undef HAVE_SQRT */ + +/* Define to 1 if stdbool.h conforms to C99. */ +#define HAVE_STDBOOL_H 1 + +/* Define to 1 if you have the <stdint.h> header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the <stdlib.h> header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the <strings.h> header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the <sys/types.h> header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the <unistd.h> header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if the system has the type `_Bool'. */ +#define HAVE__BOOL 1 + +/* Name of package */ +#define PACKAGE "egglib-cpp" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "demita@gmail.com" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "EggLib C++ library" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "EggLib C++ library 2.1.5" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "egglib-cpp" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.1.5" + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Version number of package */ +#define VERSION "2.1.5" + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +/* #undef inline */ +#endif + +/* Define to `unsigned int' if <sys/types.h> does not define. */ +/* #undef size_t */ |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 egglib/egglib-2.1.5/lib/libegglib-cpp.a |
b |
Binary file egglib/egglib-2.1.5/lib/libegglib-cpp.a has changed |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 hapmap2mlmm/HapmapToMLMMFiles.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hapmap2mlmm/HapmapToMLMMFiles.pl Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,117 @@ +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; + +my $usage = qq~Usage:$0 <args> [<opts>] +where <args> are: + -h, --hapmap <Hapmap input file> + -m, --map <Map output file> + -g, --geno <Genotype output file> + -p, --path <Path for transpose executable> +~; +$usage .= "\n"; + +my ($hapmap,$map,$geno,$path); + + +GetOptions( + "geno=s" => \$geno, + "map=s" => \$map, + "hapmap=s" => \$hapmap, + "path=s" => \$path, +); + + +die $usage + if ( !$geno || !$map || !$hapmap || !$path); + +my $TRANSPOSE_EXE = "$path/transpose.awk"; + +my @snps; +my %chrom_pos; +my $num_line = 0; +open(my $O,">geno_transposed"); +open(my $H,$hapmap); +while(<$H>) +{ + $num_line++; + my $line = $_; + chomp($line); + $line =~s/\r//g; + $line =~s/\n//g; + my @infos = split(/\t/,$line); + if ($num_line == 1) + { + print $O "Ind_id"; + for (my $i = 11; $i <= $#infos; $i++) + { + my $individual = $infos[$i]; + print $O " " . $individual; + } + print $O "\n"; + } + elsif ($num_line > 1) + { + my $snp = $infos[0]; + my $variation = $infos[1]; + my %scores; + if ($variation =~/(\w)\/(\w)/) + { + my $allele1 = $1; + my $allele2 = $2; + $scores{$allele1} = 0; + $scores{$allele2} = 1; + } + my $chrom = $infos[2]; + my $pos = $infos[3]; + $chrom_pos{$snp}{"chrom"} = $chrom; + $chrom_pos{$snp}{"pos"} = $pos; + push(@snps,$snp); + print $O "$snp"; + for (my $i = 11; $i <= $#infos; $i++) + { + my $genotype = $infos[$i]; + my @alleles = split("",$genotype); + if ($genotype ne "NN") + { + my $score = $scores{$alleles[0]} + $scores{$alleles[1]}; + print $O " $score"; + } + else + { + print $O " NA"; + } + } + print $O "\n"; + } +} +close($H); +close($O); + +open(my $M,">$map"); +print $M "SNP Chr Pos\n"; +foreach my $snp(@snps) +{ + print $M "$snp " . $chrom_pos{$snp}{"chrom"} . " ". $chrom_pos{$snp}{"pos"} . "\n"; +} +close($M); + +system("$TRANSPOSE_EXE geno_transposed >geno_transposed2"); + +open(my $F,">$geno"); +open(my $G,"geno_transposed2"); +while(<$G>) +{ + my $line = $_; + $line =~s/ /\t/g; + print $F $line; +} +close($G); +close($F); + +unlink("geno_transposed"); +unlink("geno_transposed2"); + + |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 hapmap2mlmm/HapmapToMLMMFiles.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hapmap2mlmm/HapmapToMLMMFiles.sh Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,11 @@ +#!/bin/bash +hapmap=$1 +map=$2 +geno=$3 + +directory=`dirname $0` + +perl $directory/HapmapToMLMMFiles.pl -h $hapmap -g $geno -m $map -p $directory + + + |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 hapmap2mlmm/HapmapToMLMMFiles.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hapmap2mlmm/HapmapToMLMMFiles.xml Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,158 @@ +<tool id="hapmap_to_mlmm_files" name="HapmapToMLMMFiles" version="1.1"> + <description>converts a hapmap file into MLMM input files</description> + <command interpreter="bash">./HapmapToMLMMFiles.sh $input $snp_info $genot + </command> + <inputs> + <param format="txt" name="input" type="data" label="Allelic file in Hapmap format" help="Allelic file in Hapmap format"/> + </inputs> + <outputs> + <data format="txt" name="snp_info" label="SNP Info file"/> + <data format="txt" name="genot" label="Genotyping file for MLMM"/> + </outputs> + <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <stdio> + <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> + <exit_code range="1:" level="fatal" /> + </stdio> + + <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> + <tests> + <!-- [HELP] Test files have to be in the ~/test-data directory --> + <!-- [HELP] Multiple tests can be defined with different parameters --> +<!-- + <test> + </test> +--> + </tests> + <help> + + + + +.. class:: infomark + +**Authors** Dereeper Alexis (alexis.dereeper@ird.fr), IRD, South Green platform + + | **Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). + +.. class:: infomark + +**Galaxy integration** South Green. + +--------------------------------------------------- + + +================= +HapmapToMLMMFiles +================= + +----------- +Description +----------- + + | HapmapToMLMMFiles converts a hapmap file into input files compatible with the MLMM software. + + +----------------- +Workflow position +----------------- + +**Upstream tool** + +=============== ====================== =========== +Name output file(s) format +=============== ====================== =========== +VCF to Hapmap Fasta alignment fasta +=============== ====================== =========== + + +**Downstream tool** + +=========== ========================== ======= +Name input file(s) format +=========== ========================== ======= +MLMM +=========== ========================== ======= + + + +---------- +Input file +---------- + +Hapmap file + Allelic file in Hapmap format + + + +------------ +Output files +------------ + +SNP Info file + +Genotyping file for MLMM + + + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +=========== + +Hapmap file +----------- + +:: + + rs# alleles chrom pos strand assembly# center protLSID assayLSID panel QCcode Ind1 Ind2 + SNP1 A/T 1 3102 + assembly NA NA NA speciesname NA AA AA AA + SNP2 A/T 1 4648 + assembly NA NA NA speciesname NA AA AA AA + + +Output files +============ + +SNP Info file +------------- + +:: + + SNP Chr Pos + SNP1 1 3102 + SNP2 1 4648 + SNP3 1 7601 + + +Genotyping file for MLMM +------------------------ + +:: + + Ind_id SNP1 SNP2 SNP3 SNP4 SNP5 SNP6 SNP7 SNP8 SNP9 SNP10 SNP11 SNP12 SNP13 SNP14 + Ind1 0 0 0 0 0 0 2 0 2 0 0 0 2 0 + Ind2 0 0 0 0 0 2 2 0 0 0 0 0 0 0 + + + </help> + <citations> + <!-- [HELP] As DOI or BibTex entry --> + <citation type="bibtex">@article{Dereeper03062015, +author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, +title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, +year = {2015}, +doi = {10.1093/nar/gkv351}, +abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, +URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, +eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, +journal = {Nucleic Acids Research} +} + + </citation> + + </citations> +</tool> |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 hapmap2mlmm/transpose.awk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hapmap2mlmm/transpose.awk Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,27 @@ +#!/usr/bin/gawk -f + +BEGIN { + max_x =0; + max_y =0; +} + +{ + max_y++; + for( i=1; i<=NF; i++ ) + { + if (i>max_x) max_x=i; + A[i,max_y] = $i; + } +} + +END { + for ( x=1; x<=max_x; x++ ) + { + for ( y=1; y<=max_y; y++ ) + { + if ( (x,y) in A ) printf "%s",A[x,y]; + if ( y!=max_y ) printf " "; + } + printf "\n"; + } +} |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 ped2bed/ped2bed.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ped2bed/ped2bed.sh Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -0,0 +1,20 @@ +#!/bin/bash +ped=$1 +map=$2 +bed=$3 +fam=$4 +bim=$5 +logs=$6 + +directory=`dirname $0` +mkdir tmpdir$$ +cp -rf $ped tmpdir$$/input.ped +cp -rf $map tmpdir$$/input.map + +plink --file tmpdir$$/input --out tmpdir$$/out --make-bed --noweb >>$logs 2>&1 + +mv tmpdir$$/out.bed $bed +mv tmpdir$$/out.fam $fam +mv tmpdir$$/out.bim $bim + + |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 ped2bed/ped2bed.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ped2bed/ped2bed.xml Fri Jul 10 04:39:30 2015 -0400 |
[ |
@@ -0,0 +1,196 @@ +<tool id="ped2bed" name="plink: ped2bed" version="1.24"> + <description>Convert ped to bed</description> + <requirements> + <requirement type="package" version="1.07">plink</requirement> + </requirements> + <command interpreter="bash">./ped2bed.sh $ped $map $bed $fam $bim $logs + </command> + <inputs> + <param format="txt" name="ped" type="data" label="Allelic file in PED format" help="Allelic file in PED format"/> + <param format="txt" name="map" type="data" label="Map file" help="Map file"/> + </inputs> + <outputs> + <data format="txt" name="bed" label="Bed file"/> + <data format="txt" name="fam" label="Fam file"/> + <data format="txt" name="bim" label="Bim file"/> + <data format="txt" name="logs" label="All Logs"/> + </outputs> + <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <stdio> + <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> + <exit_code range="1:" level="fatal" /> + </stdio> + + <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> + <tests> + <!-- [HELP] Test files have to be in the ~/test-data directory --> + <!-- [HELP] Multiple tests can be defined with different parameters --> +<!-- + <test> + </test> +--> + </tests> + <help> + + +.. class:: infomark + +**Authors** plink_ + +.. _plink: http://pngu.mgh.harvard.edu/purcell/plink/ + + | "PLINK: a toolset for whole-genome association and population-based linkage analysis.", **Purcell S, Neale B, Todd-Brown K, Thomas L, Ferreira MAR, Bender D, Maller J, Sklar P, de Bakker PIW, Daly MJ, Sham PC.**, American Journal of Human Genetics, 81, 2007. + +.. class:: infomark + +**Galaxy integration** South Green. + +.. class:: infomark + +**Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). + + +--------------------------------------------------- + + + +======= +Ped2Bed +======= + +----------- +Description +----------- + + | PLINK is a free, open-source whole genome association analysis toolset, designed to perform a range of basic, large-scale analyses in a computationally efficient manner. + | For further informations, please visite the plink website_. + +.. _website: http://pngu.mgh.harvard.edu/purcell/plink/ + + +----------------- +Workflow position +----------------- + +**Upstream tool** + +=============== ========================== =========== +Name output file(s) format +=============== ========================== =========== +VCFtools filter PED and map files ped and map +=============== ========================== =========== + + +**Downstream tool** + +=========== ========================== ======= +Name input file(s) format +=========== ========================== ======= +Admixture Bed, fam and bim file txt +=========== ========================== ======= + + +---------- +Input file +---------- + +PED file + Allelic file in PED format + +MAP file + + + +------------ +Output files +------------ + +Bed file + +Fam file + +Bim file + +All logs + Log file + + +------------ +Dependencies +------------ +plink + version 1.07 + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +=========== + +PED file +-------- + +:: + + AZUCENA AZUCENA 0 0 0 0 G G A A C C T T T + BULUPANDAK BULUPANDAK 0 0 0 0 G G A A A A T + +MAP file +-------- + +:: + + 0 Chr1:4299 0 4299 + 0 Chr1:26710 0 26710 + 0 Chr1:56184 0 56184 + 0 Chr1:93272 0 93272 + + +Output files +============ + +Bed file +-------- + +:: + + binary file + +Fam file +-------- + +:: + + AZUCENA AZUCENA 0 0 0 -9 + BULUPANDAK BULUPANDAK 0 0 0 -9 + +Bim file +-------- + +:: + + 0 Chr1:4299 0 4299 A G + 0 Chr8:18058 0 18058 C T + + + </help> + <citations> + <!-- [HELP] As DOI or BibTex entry --> + <citation type="bibtex">@article{Dereeper03062015, +author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, +title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, +year = {2015}, +doi = {10.1093/nar/gkv351}, +abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, +URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, +eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, +journal = {Nucleic Acids Research} +} + + </citation> + + </citations> +</tool> |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 snpEff/SnpEff.pl --- a/snpEff/SnpEff.pl Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,82 +0,0 @@ -#!/usr/bin/perl - -use strict; -use Getopt::Long; - -my $usage = qq~Usage:$0 <args> [<opts>] -where <args> are: - -i, --input <input VCF> - -o, --output <output> - -g, --gff <GFF annotation> - -f, --fasta <Fasta of chromosomes> - -h, --html <HTML output> -~; -$usage .= "\n"; - -my ($input,$output,$gff,$fasta,$html); - - -GetOptions( - "input=s" => \$input, - "output=s" => \$output, - "gff=s" => \$gff, - "fasta=s" => \$fasta, - "html=s" => \$html -); - - -die $usage - if ( !$input || !$output || !$fasta || !$gff || !$html); - - -if (!-e $gff){ - die "Error: GFF input does not exist\n" -} -if (!-e $fasta){ - die "Error: Fasta input does not exist\n" -} - -#my $SNPEFF_PATH = "/usr/local/bioinfo/galaxy/galaxy_dist/tools/SNiPlay/SnpEff/snpEff"; -my $SNPEFF_PATH = $ENV{SNPEFF_JAR_PATH}; - - -my $session = $$; -mkdir($session); -mkdir("$session/data"); -mkdir("$session/data/genomes"); -mkdir("$session/data/myspecies"); - -system("cp -rf $fasta $session/data/genomes/myspecies.fa"); -system("cp -rf $gff $session/data/myspecies/genes.gff"); - -open(my $C,"$SNPEFF_PATH/snpEff.config"); -open(my $C2,">$session/snpEff.config"); -while(<$C>) -{ - if (/data_dir/) - { - print $C2 "data_dir = ./data\n"; - } - elsif (/^genomes/) - { - print $C2 "genomes : \\n"; - print $C2 "myspecies, myspecies \\n"; - } - else - { - print $C2 $_; - } -} -print $C2 "myspecies.genome : myspecies\n"; -close($C); -close($C2); - - -my $build_cmd = "java -jar $SNPEFF_PATH/snpEff.jar build -c $session/snpEff.config -gff3 myspecies"; -system($build_cmd); - -my $eff_cmd = "java -jar $SNPEFF_PATH/snpEff.jar eff -c $session/snpEff.config -o vcf -no-downstream -no-upstream myspecies -s $html $input >$output"; -system($eff_cmd); - - -system("rm -rf $session"); |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 snpEff/snpEff-pipe.sh --- a/snpEff/snpEff-pipe.sh Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,14 +0,0 @@ -#!/bin/bash -vcf=$1 -genome=$2 -gff=$3 -output=$4 -html=$5 -log=$6 - -directory=`dirname $0` - -/usr/bin/perl $directory/SnpEff.pl -i $vcf -f $genome -g $gff -o $output -h $html >>$log 2>&1 - - - |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 snpEff/snpEff.xml --- a/snpEff/snpEff.xml Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,63 +0,0 @@ -<tool id="snpEff" name="SnpEff" version="4.0"> - <description>predicts SNP effect from a genomic VCF file</description> - <requirements> - <requirement type="package" version="4.0">snpEff</requirement> - </requirements> - - <!-- - You will need to change the path to wherever your installation is. - You can change the amount of memory used by snpEff, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory) - --> - <command interpreter="bash">./snpEff-pipe.sh $vcf $genome $gff $output $statsFile $log</command> - <inputs> - <param format="vcf" name="vcf" type="data" label="VCF input file" help="Positions must be genomic positions"/> - <param format="fasta" name="genome" type="data" label="Reference genome in Fasta" help=""/> - <param format="gff3" name="gff" type="data" label="GFF annotation of the genome" help=""/> - - </inputs> - <outputs> - <data format="vcf" name="output" label="Annotated VCF" /> - <data format="html" name="statsFile" label="HTML statistics output"/> - <data format="txt" name="log" label="Log file"/> - </outputs> - - <help> - - - - - -.. class:: infomark - -**Program encapsulated in Galaxy by Southgreen** - -.. class:: infomark - -**SnpEff version 4.0** - ------ - -============== - Please cite: -============== - -"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", **Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM.**, Fly (Austin). 2012 Apr-Jun;6(2):80-92. - ------ - -=========== - Overview: -=========== - -Genetic variant annotation and effect prediction toolbox. It annotates and predicts the effects of variants on genes (such as amino acid changes). - ------ - -For further informations, please visite the SnpEff_ website. - - -.. _SnpEff: http://snpeff.sourceforge.net/ - </help> - -</tool> - |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 tassel/tassel.sh --- a/tassel/tassel.sh Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,41 +0,0 @@ -#!/bin/bash - -analyseType=$1; -out1=$2; -out2=$3; -out3=$4; -log1=$5; -galaxyOutDir=$6; - - -mkdir $galaxyOutDir - -# Suppression des 6 premiers arguments de la liste des arguments $@ -shift; shift; shift; shift; shift; shift; - -if [[ $analyseType == glm ]] -then - run_pipeline.pl $* >> $log1 2>&1 - mv "$galaxyOutDir/TASSELGLM1.txt" $out1 - mv "$galaxyOutDir/TASSELGLM2.txt" $out2 -fi - -if [[ $analyseType == mlm ]] -then - run_pipeline.pl $* >> $log1 2>&1 - mv "$galaxyOutDir/TASSELMLM1.txt" $out1 - mv "$galaxyOutDir/TASSELMLM2.txt" $out2 - mv "$galaxyOutDir/TASSELMLM3.txt" $out3 -fi - -if [[ $analyseType == ld ]] -then - run_pipeline.pl $* >> $log1 2>&1 -fi - - -if [[ $analyseType == ck ]] -then - run_pipeline.pl $* >> $log1 2>&1 - mv "$galaxyOutDir/kinship.txt" $out1 -fi |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 tassel/tassel.xml --- a/tassel/tassel.xml Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,153 +0,0 @@ -<tool id="Tassel" name="Tassel" version="5.0"> - <description> Software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium. </description> - <requirements> - <requirement type="package" version="5.0">tassel</requirement> - </requirements> - <command interpreter="bash">./tassel.sh $analysis_opts.fonction_selector $output1 $output2 $output3 $log tmpdir$$/ -#if $analysis_opts.fonction_selector == "mlm": - -fork1 -h $hapmap -filterAlign -filterAlignMinFreq $filterAlignMinFreq - -fork2 -r $analysis_opts.trait - #if $analysis_opts.add_structure_file.additional_file_selector == "yes": - -fork3 -q $analysis_opts.add_structure_file.structure -excludeLastTrait - #end if - -fork4 -k $analysis_opts.kinship - -combineA -input1 -input2 - #if $analysis_opts.add_structure_file.additional_file_selector == "yes": - -input3 - #end if - -intersect -combineB -inputA -input4 -mlm -mlmVarCompEst $analysis_opts.mlmVarCompEst -mlmCompressionLevel $analysis_opts.mlmCompressionLevel -export tmpdir$$/TASSELMLM -runfork1 -runfork2 - #if $analysis_opts.add_structure_file.additional_file_selector == "yes": - -runfork3 - #end if - -runfork4 - -#else if $analysis_opts.fonction_selector == "glm": - -fork1 -h $hapmap -filterAlign -filterAlignMinFreq $filterAlignMinFreq - -fork2 -r $analysis_opts.trait - #if $analysis_opts.add_file.additional_file_selector == "yes": - -fork3 -q $analysis_opts.add_file.structure -excludeLastTrait - #end if - -combineA -input1 -input2 - #if $analysis_opts.add_file.additional_file_selector == "yes": - -input3 - #end if - -intersect -glm -export tmpdir$$/TASSELGLM -runfork1 -runfork2 - #if $analysis_opts.add_file.additional_file_selector == "yes": - -runfork3 - #end if -#else if $analysis_opts.fonction_selector == "ld": - -fork1 -h $hapmap - -ld -ldType All -ldd png -ldplotsize 3000 -o $output1 -runfork1 -#else if $analysis_opts.fonction_selector == "ck": - -fork1 -h $hapmap - -ck -export tmpdir$$/kinship -runfork1 -#end if - </command> - <inputs> - <param format="txt" name="hapmap" type="data" label="HapMap file"/> - <conditional name="analysis_opts"> - <param name="fonction_selector" type="select" label="Type of analysis"> - <option value="mlm" selected="True">MLM</option> - <option value="glm">GLM</option> - <option value="ld">Linkage Disequilibrium</option> - <option value="ck">Kinship</option> - </param> - <when value="glm"> - <param format="txt" name="trait" type="data" label="Trait file"/> - <conditional name="add_file"> - <param name="additional_file_selector" type="select" label="Add structure file"> - <option value="no" selected="True">no</option> - <option value="yes">yes</option> - </param> - <when value="yes"> - <param format="txt" name="structure" type="data" label="Structure file"/> - </when> - </conditional> - </when> - <when value="mlm"> - <param format="txt" name="trait" type="data" label="Trait file"/> - <param format="txt" name="kinship" type="data" label="Kinship file"/> - <conditional name="add_structure_file"> - <param name="additional_file_selector" type="select" label="Add structure file"> - <option value="no" selected="True">no</option> - <option value="yes">yes</option> - </param> - <when value="yes"> - <param format="txt" name="structure" type="data" label="Structure file"/> - </when> - </conditional> - <param name="mlmVarCompEst" type="select" label="Variance Component Estimation"> - <option value="P3D" selected="True">P3D</option> - <option value="EachMarker">EachMarker</option> - </param> - <param name="mlmCompressionLevel" type="select" label="Compression Level"> - <option value="Optimum" selected="True">Optimum</option> - <option value="Custom">Custom</option> - <option value="None">None</option> - </param> - </when> - <when value="ld"></when> - <when value="ck"></when> - </conditional> - <param type="text" name="filterAlignMinFreq" label="Filter minimal frequency allele" value="0.05"/> - </inputs> - <outputs> - <data format="txt" name="output1" label="Tassel output"> - <change_format> - <when input="analysis_opts['fonction_selector']" value="ld" format="png"/> - </change_format> - </data> - - <data format="txt" name="output2" label="Allele effects"> - <filter>analysis_opts['fonction_selector'] == "glm" or analysis_opts['fonction_selector'] == "mlm"</filter> - </data> - - <data format="txt" name="output3" label="Compression file"> - <filter>analysis_opts['fonction_selector'] == "mlm"</filter> - </data> - - <data format="txt" name="log" label="Log file"/> - </outputs> - <help> - -.. class:: infomark - -**Program encapsulated in Galaxy by Southgreen** - -.. class:: infomark - -**Tassel** - ------ - -========== - Authors: -========== - -**Terry Casstevens** - ------ - -========== - Overview -========== - -Software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium. - ------ - -For further informations, please visite the website of TASSEL_. - - -.. _TASSEL: http://www.maizegenetics.net/tassel/ - - </help> -<!-- -<tests> - <test> - <param name="input" value="genotyping_file.inp" /> - <output name="output" file="phase_output" /> - </test> -</tests> ---> -</tool> |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 tassel/tool-data/tool_dependencies.xml --- a/tassel/tool-data/tool_dependencies.xml Mon Mar 23 05:57:27 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,8 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="tassel" version="5.0"> - <repository changeset_revision="097d4c366e0d" name="package_tassel_5_0" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" /> - </package> -</tool_dependency> - - |
b |
diff -r 3e19d0dfcf3e -r 420b57c3c185 tool_dependencies.xml --- a/tool_dependencies.xml Mon Mar 23 05:57:27 2015 -0400 +++ b/tool_dependencies.xml Fri Jul 10 04:39:30 2015 -0400 |
b |
@@ -1,20 +1,6 @@ <?xml version="1.0"?> <tool_dependency> <package name="plink" version="1.07"> - <repository changeset_revision="65400c333b88" name="package_plink_1_07" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" /> - </package> - <package name="admixture" version="1.23"> - <repository changeset_revision="61e04b2aa621" name="package_admixture_1_23" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" /> - </package> - <package name="tassel" version="5.0"> - <repository changeset_revision="097d4c366e0d" name="package_tassel_5_0" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" /> - </package> - <package name="vcftools" version="0.1.12b"> - <repository changeset_revision="a655cb1dfc58" name="package_vcftools_0_1_12b" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu/" /> - </package> - <package name="snpEff" version="4.0"> - <repository name="package_snpeff_4_0" owner="iuc" changeset_revision="6bc55957927b" toolshed="http://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="65400c333b88" name="package_plink_1_07" owner="dereeper" toolshed="http://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency> - - |