Previous changeset 1:420b57c3c185 (2015-07-10) Next changeset 3:345f88a8f483 (2015-07-10) |
Commit message:
Uploaded |
added:
Rooting/rooting.xml~ check_gwas_inputs/CheckGWASInputs.xml~ |
removed:
MDSplot/MDSbasedOnIBSmatrix.pl MDSplot/MDSbasedOnIBSmatrix.pl.org MDSplot/mdsplot.sh MDSplot/mdsplot.xml MDSplot/test-data/analyse.ibs_matrix.txt MDSplot/test-data/analyse.log MDSplot/test-data/analyse.mds_plot.txt MDSplot/test-data/input.map MDSplot/test-data/input.ped PedToFasta/Ped2Fasta.pl PedToFasta/pedToFasta.xml PedToFasta/test-data/result.fa PedToFasta/test-data/sample.ped Rooting/Rooting.pl Rooting/Rootings_54.jar Rooting/rooting.xml Rooting/test-data/newick Rooting/test-data/out_tree Rooting/test-data/out_tree.log SNP_density/CalculateSlidingWindowsSNPdensitiesFromHapmap.pl SNP_density/calculateSlidingWindowsSNPdensitiesFromHapMap_wrapper.xml SNP_density/calculateSlidingWindowsSNPdensitiesFromHapmap.sh SNP_density/test-data/hapmap SNP_density/test-data/result.txt SNP_density/test-data/result_bysample.txt VCF2Hapmap/VCF2FastaAndHapmap.pl VCF2Hapmap/vcf2FastaAndHapmap.sh VCF2Hapmap/vcf2FastaAndHapmap.xml check_gwas_inputs/CheckGWASInputs.pl check_gwas_inputs/CheckGWASInputs.sh check_gwas_inputs/CheckGWASInputs.xml egglib/CalculateDiversityIndexes.pl egglib/CalculateDiversityIndexes.sh egglib/CalculateDiversityIndexes.xml egglib/egglib-2.1.5/bin/eggstats egglib/egglib-2.1.5/include/egglib-cpp/Align.hpp egglib/egglib-2.1.5/include/egglib-cpp/Arg.hpp egglib/egglib-2.1.5/include/egglib-cpp/BaseDiversity.hpp egglib/egglib-2.1.5/include/egglib-cpp/ChangeTypes.hpp egglib/egglib-2.1.5/include/egglib-cpp/CharMatrix.hpp egglib/egglib-2.1.5/include/egglib-cpp/Consensus.hpp egglib/egglib-2.1.5/include/egglib-cpp/Container.hpp egglib/egglib-2.1.5/include/egglib-cpp/Controller.hpp egglib/egglib-2.1.5/include/egglib-cpp/Convert.hpp egglib/egglib-2.1.5/include/egglib-cpp/Current.hpp egglib/egglib-2.1.5/include/egglib-cpp/DataMatrix.hpp egglib/egglib-2.1.5/include/egglib-cpp/Edge.hpp egglib/egglib-2.1.5/include/egglib-cpp/EggException.hpp egglib/egglib-2.1.5/include/egglib-cpp/FStatistics.hpp egglib/egglib-2.1.5/include/egglib-cpp/Fasta.hpp egglib/egglib-2.1.5/include/egglib-cpp/HFStatistics.hpp egglib/egglib-2.1.5/include/egglib-cpp/HaplotypeDiversity.hpp egglib/egglib-2.1.5/include/egglib-cpp/LinkageDisequilibrium.hpp egglib/egglib-2.1.5/include/egglib-cpp/MicrosatelliteDiversity.hpp egglib/egglib-2.1.5/include/egglib-cpp/Ms.hpp egglib/egglib-2.1.5/include/egglib-cpp/Mutation.hpp egglib/egglib-2.1.5/include/egglib-cpp/Mutator.hpp egglib/egglib-2.1.5/include/egglib-cpp/NucleotideDiversity.hpp egglib/egglib-2.1.5/include/egglib-cpp/ParamSet.hpp egglib/egglib-2.1.5/include/egglib-cpp/Population.hpp egglib/egglib-2.1.5/include/egglib-cpp/Random.hpp egglib/egglib-2.1.5/include/egglib-cpp/SitePolymorphism.hpp egglib/egglib-2.1.5/include/egglib-cpp/Staden.hpp egglib/egglib-2.1.5/include/egglib-cpp/config.h egglib/egglib-2.1.5/lib/libegglib-cpp.a hapmap2mlmm/HapmapToMLMMFiles.pl hapmap2mlmm/HapmapToMLMMFiles.sh hapmap2mlmm/HapmapToMLMMFiles.xml hapmap2mlmm/transpose.awk ped2bed/ped2bed.sh ped2bed/ped2bed.xml tool_dependencies.xml |
b |
diff -r 420b57c3c185 -r feb40a9a8eae MDSplot/MDSbasedOnIBSmatrix.pl --- a/MDSplot/MDSbasedOnIBSmatrix.pl Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,110 +0,0 @@ -#!/usr/bin/perl - -use strict; -use Switch; -use Getopt::Long; -use Bio::SeqIO; - -my $PLINK_EXE= "plink"; - -my $usage = qq~Usage:$0 <args> [<opts>] -where <args> are: - -i, --in <input> - -o, --out <output> -~; -$usage .= "\n"; - -my ($in,$out); - - -GetOptions( - "in=s" => \$in, - "out=s" => \$out -); - -die $usage - if ( !$in || !$out); - - -my $plink_command = $PLINK_EXE . " --file $in --noweb --cluster --matrix --mds-plot 2 --out $out >>$in.plink.log 2>&1"; -system($plink_command); - -my $awk_cmd = "awk \{\'print \$1\'\} $in.ped"; -my $inds = `$awk_cmd`; -my @individuals = split("\n",$inds); - -my %populations; -if (-e "$in.individual_info.txt") -{ - open(my $I,"$in.individual_info.txt"); - while(<$I>) - { - my $line = $_; - $line =~s/\n//g; - $line =~s/\r//g; - my ($ind,$pop) = split(/;/,$line); - $populations{$ind} = $pop; - } - close($I); -} - -open(my $OUT,">$out.mds_plot.txt"); -my $go = 0; -open(my $O,"$out.mds"); -while(<$O>) -{ - if ($go) - { - my $line = $_; - $line =~s/\n//g; - $line =~s/\r//g; - my @i = split(/\s+/,$line); - if ($line =~/^ /) - { - my $ind = $i[1]; - my $pop = "Pop1"; - if ($populations{$ind}) - { - $pop = $populations{$ind}; - } - print $OUT "$pop $ind ".$i[4]." ".$i[5]."\n"; - } - if ($line =~/^\w/) - { - my $ind = $i[0]; - my $pop = "Pop1"; - if ($populations{$ind}) - { - $pop = $populations{$ind}; - } - print $OUT "$pop $ind ".$i[3]." ".$i[4]."\n"; - } - - } - if (/C1/){$go = 1;} -} -close($O); -close($OUT); - - -my $j = 0; -open(my $IBS,">$out.ibs_matrix.txt"); -print $IBS "Individuals " . join("\t",@individuals)."\n"; -open(my $O2,"$out.mibs"); -while(<$O2>) -{ - my $line = $_; - $line =~s/\n//g; - $line =~s/\r//g; - my @i = split(/\s+/,$line); - print $IBS $individuals[$j]. " ". join("\t",@i)."\n"; - $j++; -} -close($O2); -close($IBS); - - - - - - |
b |
diff -r 420b57c3c185 -r feb40a9a8eae MDSplot/MDSbasedOnIBSmatrix.pl.org --- a/MDSplot/MDSbasedOnIBSmatrix.pl.org Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,77 +0,0 @@ -#!/usr/bin/perl - -use strict; -use Switch; -use Getopt::Long; -use Bio::SeqIO; - -my $PLINK_EXE= "/apps/www/sniplay.cirad.fr/tools/plink/plink-1.07-x86_64/plink"; - -my $usage = qq~Usage:$0 <args> [<opts>] -where <args> are: - -i, --in <input> - -o, --out <output> -~; -$usage .= "\n"; - -my ($in,$out); - - -GetOptions( - "in=s" => \$in, - "out=s" => \$out -); - -die $usage - if ( !$in || !$out); - - -my $plink_command = $PLINK_EXE . " --file $in --noweb --cluster --matrix --mds-plot 2 --out $out >>$in.plink.log 2>&1"; -system($plink_command); - -my $awk_cmd = "awk \{\'print \$1\'\} $in.ped"; -my $inds = `$awk_cmd`; -my @individuals = split("\n",$inds); - - -open(my $OUT,">$out.mds_plot.txt"); -my $go = 0; -open(my $O,"$out.mds"); -while(<$O>) -{ - if ($go) - { - my $line = $_; - $line =~s/\n//g; - $line =~s/\r//g; - my @i = split(/\s+/,$line); - my $ind = $i[1]; - print $OUT "$ind ".$i[4]." ".$i[5]."\n"; - } - if (/C1/){$go = 1;} -} -close($O); -close($OUT); - - -my $j = 0; -open(my $IBS,">$out.ibs_matrix.txt"); -print $IBS "Individuals " . join("\t",@individuals)."\n"; -open(my $O2,"$out.mibs"); -while(<$O2>) -{ - my $line = $_; - $line =~s/\n//g; - $line =~s/\r//g; - my @i = split(/\s+/,$line); - print $IBS $individuals[$j]. " ". join("\t",@i)."\n"; - $j++; -} -close($O2); -close($IBS); - - - - - - |
b |
diff -r 420b57c3c185 -r feb40a9a8eae MDSplot/mdsplot.sh --- a/MDSplot/mdsplot.sh Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,23 +0,0 @@ -#!/bin/bash - -tool_path=$(dirname $0) -ped=$1 -map=$2 -fileout_label=$3 -fileout_matrix=$4 -fileout_plot=$5 -fileout_log=$6 - -rsync -a $ped input.ped -rsync -a $map input.map - -perl $tool_path/MDSbasedOnIBSmatrix.pl --in input --out $fileout_label - -rm -f input.ped input.map - -cp $fileout_label.ibs_matrix.txt $fileout_matrix -cp $fileout_label.mds_plot.txt $fileout_plot -cp input.plink.log $fileout_log - - -rm -f $fileout_label.ibs_matrix.txt $fileout_label.mds_plot.txt input.plink.log |
b |
diff -r 420b57c3c185 -r feb40a9a8eae MDSplot/mdsplot.xml --- a/MDSplot/mdsplot.xml Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,232 +0,0 @@ -<tool id="sniplay_mdsplot" name="MDS plot" version="1.1.1"> - - <!-- [REQUIRED] Tool description displayed after the tool name --> - <description> IBS matrix / multi-dimensional scaling</description> - - <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> - <requirements> - <requirement type="binary">perl</requirement> - <requirement type="package" version="1.07">plink</requirement> - </requirements> - - <!-- [OPTIONAL] Command to be executed to get the tool's version string --> - <version_command> -<!-- - tool_binary -v ---> - </version_command> - - <!-- [REQUIRED] The command to execute --> - <command interpreter="bash"> - mdsplot.sh $fileped $filemap $fileout_label $fileout_matrix $fileout_plot $fileout_log - </command> - - <!-- [REQUIRED] Input files and tool parameters --> - <inputs> - <param name="fileped" type="data" format="txt" optional="false" label="PED input" /> - <param name="filemap" type="data" format="txt" optional="false" label="MAP input" help="4 columns tabular file: chromosome, snp id, genetic distance, bp position"/> - <param name="fileout_label" type="text" value="analyse" label="Output name" help="Output name for tabular files" /> - </inputs> - - <!-- [REQUIRED] Output files --> - <outputs> - <data name="fileout_matrix" type="data" format="tabular" label="${fileout_label}.ibs_matrix.txt" /> - <data name="fileout_plot" type="data" format="tabular" label="${fileout_label}.mds_plot.txt" /> - <data name="fileout_log" type="data" format="txt" label="${fileout_label}.log" /> - </outputs> - - <!-- [STRONGLY RECOMMANDED] Exit code rules --> - <stdio> - <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> - <exit_code range="1:" level="fatal" /> - </stdio> - - <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> - <tests> - <!-- [HELP] Test files have to be in the ~/test-data directory --> - - <test> - <param name="fileped" value="input.ped" /> - <param name="filemap" value="input.map" /> - <output name="fileout_matrix" file="output.ibs_matrix.txt" /> - <output name="fileout_plot" file="output.mds_plot.txt" /> - <output name="fileout_log" file="output.log" /> - </test> - - <!-- [HELP] Multiple tests can be defined with different parameters --> -<!-- - <test> - </test> ---> - </tests> - - <!-- [OPTIONAL] Help displayed in Galaxy --> - <help> - - -.. class:: infomark - -**Authors** plink_ - -.. _plink: http://pngu.mgh.harvard.edu/purcell/plink/ - - | "PLINK: a toolset for whole-genome association and population-based linkage analysis.", **Purcell S, Neale B, Todd-Brown K, Thomas L, Ferreira MAR, Bender D, Maller J, Sklar P, de Bakker PIW, Daly MJ, Sham PC.**, American Journal of Human Genetics, 81, 2007. - - -.. class:: infomark - -**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique. - -.. class:: infomark - -**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr - -.. class:: infomark - -**Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). - ---------------------------------------------------- - - - -======== -MDS plot -======== - ------------ -Description ------------ - - Compute an IBS matrix and a multi-dimensional scaling. - - ------------------ -Workflow position ------------------ - -**Upstream tool** - -=============== ========================== =============== -Name output file(s) format -=============== ========================== =============== -VCFtools Filter PED and MAP file tabular and MAP -=============== ========================== =============== - - - ----------- -Input file ----------- - -PED file - -MAP file - 4 columns tabular file: chromosome, snp id, genetic distance, bp position - - ----------- -Parameters ----------- - -Output name - Output base name for the ouput files - - ------------- -Output files ------------- - -Output_name.ibs_matrix.txt - Tabular file with IBS matrix - -Output_name.mds_plot.txt - File to construct mds plot - -Output_name.log - Log file - ------------- -Dependencies ------------- -plink - version 1.07 - ---------------------------------------------------- - ---------------- -Working example ---------------- - -Input files -=========== - -PED file ------------ - -:: - - IRAT112 1 0 0 1 1 1 1 4 4 ... - IAC25 1 0 0 1 1 1 1 4 4 ... - CIRAD409 1 0 0 1 1 3 3 1 1 ... - - -MAP file ------------ - -:: - - Chr1 Chr1:4299 0 4299 - Chr1 Chr1:26710 0 26710 - Chr1 Chr1:56184 0 56184 - Chr1 Chr1:93272 0 93272 - - - -Parameters -========== - -Output name -> densities - - -Output files -============ - -densities.ibs_matrix.txt ------------------------- - -:: - - Individuals IRAT112 IAC25 IAC165 KARASUKARASURANKASU DOURADOPRECOCE ... - IRAT112 1 0.93691 0.937407 0.734724 0.943368 ... - IAC25 0.93691 1 0.958768 0.723299 0.965723 ... - - -densities.mds_plot.txt ----------------------- - -:: - - IRAT112 -0.0969382 0.0376036 - IAC25 -0.0918126 0.0501177 - - - - </help> - <citations> - <!-- [HELP] As DOI or BibTex entry --> - <citation type="bibtex">@article{Dereeper03062015, -author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, -title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, -year = {2015}, -doi = {10.1093/nar/gkv351}, -abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, -URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, -eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, -journal = {Nucleic Acids Research} -} - - </citation> - - </citations> - -</tool> |
b |
diff -r 420b57c3c185 -r feb40a9a8eae MDSplot/test-data/analyse.ibs_matrix.txt --- a/MDSplot/test-data/analyse.ibs_matrix.txt Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,94 +0,0 @@\n-Individuals\tIRAT112\tIAC25\tIAC165\tKARASUKARASURANKASU\tDOURADOPRECOCE\tCUIABANA\tBICOBRANCO\tCAAWA/FORTUNA6\tCANELADEFERRO\tCIRAD358\tCOLOMBIA1\tCIRAD409\tBAGANANASALAO\tBAKUNGH\tFOHISOMOTRA\tKAKANI2\tGUARANI\tDOURADOAGULHA\tIRAT13\tDAWASANRED\tGRAZI\tIRAT144\tIAC47\tMOROBEREKAN\tIRAT362\tGIZA171\tIDSA77\tIRAT216\tIRAT177\tCHAPHUMA\tCIRAD392\tARROZCEBADA\tINDANE\tDINORADO\t63-104\tIR63380-16\tCANAROXA\tCICIHBETON\tIR68704-145-1-1-B\tHAWMOM\tIRAT364\tIRAT212\tKHAODAM\t62667\tIRAT234\tNIPPONBARE_D\tJUMULA2\tCUBA65\tBABER\tCIRAD488\tBENGALYVAKARINA\tESPERANZA\tBULUPANDAK\tARIAS\tGOMPA2\tCT13582-15-5-M\tIRAT335\tM202\tIR65907-188-1-B\tIR71525-19-1-1\tCNA-7_BO_1_1_33-13-6-1\tIRAT257\tIRAT109\tKANIRANGA\tIR66421-096-2-1-1\tGOGO\tGEMJYAJYANAM\tDANGREY\tBINULAWAN\tCAIAPO\tIGUAPECATETO\tIRAT170\tIRAT380\tARAGUAIA\tGOGOLEMPAK\tJAOHAW\tCHALOYOE\tGANIGI\tGUNDILKUNING\tIRAT2\tIRAT366\tIRAT104\tDAVAO\tHD1-4\tCURINCA\tDAM\tCIRAD403\tJUMALI\tJIMBRUKJOLOWORO\tGOGOLEMPUK\tCIRAD394\tIR63372-08\tIR60080-46A\n-IRAT112\t1\t0.93691\t0.937407\t0.734724\t0.943368\t0.819672\t0.903626\t0.850969\t0.842524\t0.802285\t0.717834\t0.836562\t0.804272\t0.832588\t0.81073\t0.614009\t0.946846\t0.943368\t0.944362\t0.688525\t0.807253\t0.712866\t0.929955\t0.782414\t0.902136\t0.692499\t0.817188\t0.796821\t0.943865\t0.697963\t0.790859\t0.787879\t0.704918\t0.833582\t0.917039\t0.769001\t0.938897\t0.682563\t0.814704\t0.707402\t0.859911\t0.787879\t0.709389\t0.945355\t0.883259\t0.703924\t0.697963\t0.843517\t0.725286\t0.725782\t0.84302\t0.876304\t0.69995\t0.74466\t0.651267\t0.836066\t0.958271\t0.651764\t0.755092\t0.859911\t0.861898\t0.905614\t0.892201\t0.739692\t0.828117\t0.80924\t0.650273\t0.644312\t0.860407\t0.853949\t0.922504\t0.944858\t0.81073\t0.908097\t0.756085\t0.674118\t0.744163\t0.785892\t0.698957\t0.944858\t0.80775\t0.857427\t0.838053\t0.931446\t0.831098\t0.721311\t0.798808\t0.592151\t0.739692\t0.757079\t0.767014\t0.781918\t0.899155\n-IAC25\t0.93691\t1\t0.958768\t0.723299\t0.965723\t0.825137\t0.93691\t0.836562\t0.850969\t0.772976\t0.713363\t0.807253\t0.791853\t0.819175\t0.814208\t0.602583\t0.934426\t0.966716\t0.904123\t0.676105\t0.817685\t0.706408\t0.948336\t0.811724\t0.892697\t0.693989\t0.819672\t0.784401\t0.902633\t0.677596\t0.806259\t0.812221\t0.717337\t0.813214\t0.876801\t0.757576\t0.923497\t0.706905\t0.806259\t0.69995\t0.833582\t0.777447\t0.693989\t0.904123\t0.896672\t0.703428\t0.694486\t0.813214\t0.728763\t0.747144\t0.82464\t0.864878\t0.710383\t0.758073\t0.643815\t0.834575\t0.925981\t0.674118\t0.745653\t0.839543\t0.845504\t0.900149\t0.851962\t0.764034\t0.817685\t0.814704\t0.655738\t0.644809\t0.819175\t0.834575\t0.90313\t0.90462\t0.838053\t0.928465\t0.751615\t0.666667\t0.733731\t0.800298\t0.707402\t0.902633\t0.805266\t0.846995\t0.822653\t0.955787\t0.809737\t0.700944\t0.788376\t0.591654\t0.750124\t0.751615\t0.790363\t0.769498\t0.879781\n-IAC165\t0.937407\t0.958768\t1\t0.722802\t0.929459\t0.839543\t0.932439\t0.862891\t0.85544\t0.786388\t0.739692\t0.817685\t0.790363\t0.817685\t0.822653\t0.596125\t0.968703\t0.929459\t0.920517\t0.677596\t0.832091\t0.704918\t0.9846\t0.822156\t0.92002\t0.688525\t0.834078\t0.794834\t0.921013\t0.685047\t0.805763\t0.823646\t0.717834\t0.826627\t0.895181\t0.769995\t0.945852\t0.688525\t0.825633\t0.694486\t0.846001\t0.77993\t0.697466\t0.920517\t0.90611\t0.694983\t0.689021\t0.809737\t0.723299\t0.745653\t0.810233\t0.87233\t0.692002\t0.76155\t0.636364\t0.849975\t0.91853\t0.66766\t0.755092\t0.866865\t0.855936\t0.916542\t0.868356\t0.746647\t0.822156\t0.813214\t0.651267\t0.632389\t0.82464\t0.844014\t0.927471\t0.921013\t0.832588\t0.963736\t0.774963\t0.661202\t0.743169\t0.796821\t0.702931\t0.921013\t0.800795\t0.864382\t0.80924\t0.98609\t0.821162\t0.709389\t0.797814\t0.588177\t0.735718\t0.774963\t0.783905\t0.780924\t0.901143\n-KARASUKARASURANKASU\t0.734724\t0.723299\t0.722802\t1\t0.73075\t0.748137\t0.745653\t0.766518\t0.747144\t0.796324\t0.673125\t0.738202\t0.774466\t0.753105\t0.780924\t0.630899\t0.731247\t0.73075\t0.769498\t0.802782\t0.769498\t0.677099\t0.73224\t0.749627\t0.754098\t0.817685\t0.715847\t0.721311\t0.769001\t0.837059\t0.777943\t0.775956\t0.827124\t0.740189\t0.778937\t0.707402\t0.743169\t0.675609\t0.704421\t0.80775\t0.780427\t0.748137\t0.879285\t0.770492\t0.759066\t0.828117\t0.731744\t0.743169\t0.761053\t0.669151\t0.764531\t0.71237\t0.693989\t0.737705\t0.668157\t0.735718\t0.723795\t0.781918\t0.728266\t0.764531\t0.750621\t0.741679\t0.731247\t0.689'..b'\t0.74764\t0.723795\t0.61997\t0.77546\t0.692002\t0.730253\t0.850969\t0.703428\t0.672628\t0.654247\t0.640835\t0.722305\t0.694983\t0.726279\t0.74615\t0.727769\t0.724292\t0.71535\t0.653751\t0.740189\t0.724292\t0.656731\t0.687531\t0.811227\t0.726279\t0.688525\t0.723795\t0.800795\t0.680079\t0.663189\t0.748137\t0.658718\t0.663189\t0.791356\t0.721311\t0.737705\t0.759563\t0.631396\t0.682067\t0.741679\t0.647789\t0.671634\t0.77844\t0.689021\t0.774466\t0.682563\t0.865872\t0.707899\t0.814208\t0.616493\t0.60457\t0.774963\t0.726776\t0.736711\t0.724292\t0.733234\t0.709389\t0.77993\t0.691008\t0.679583\t0.775956\t0.727769\t0.723299\t0.796821\t0.777943\t0.763537\t0.734724\t0.651267\t0.678589\t0.643815\t0.600099\t1\t0.77993\t0.680576\t0.641828\t0.739195\n-GOGOLEMPUK\t0.757079\t0.751615\t0.774963\t0.710383\t0.756085\t0.791356\t0.783905\t0.780924\t0.771485\t0.801788\t0.63686\t0.69995\t0.750124\t0.79533\t0.835072\t0.597615\t0.789369\t0.756085\t0.782911\t0.727769\t0.801788\t0.685544\t0.787382\t0.852459\t0.776453\t0.693989\t0.669647\t0.673125\t0.782414\t0.71535\t0.717834\t0.79533\t0.730253\t0.754595\t0.77546\t0.685047\t0.790363\t0.748634\t0.706905\t0.729757\t0.795827\t0.765524\t0.725782\t0.783905\t0.815201\t0.697466\t0.689518\t0.729757\t0.73075\t0.693492\t0.795827\t0.715847\t0.777943\t0.738202\t0.645802\t0.72926\t0.762047\t0.643318\t0.674118\t0.813711\t0.751118\t0.782911\t0.733731\t0.840537\t0.772976\t0.789866\t0.73224\t0.718331\t0.780427\t0.743169\t0.761053\t0.783408\t0.722802\t0.749627\t0.998013\t0.734228\t0.721808\t0.838053\t0.785892\t0.783408\t0.808246\t0.853949\t0.759066\t0.787879\t0.660705\t0.728763\t0.675112\t0.577745\t0.77993\t1\t0.677099\t0.675112\t0.791356\n-CIRAD394\t0.767014\t0.790363\t0.783905\t0.761053\t0.778937\t0.725782\t0.783905\t0.742176\t0.750621\t0.693492\t0.725286\t0.755589\t0.71535\t0.742673\t0.705912\t0.677099\t0.764531\t0.778937\t0.803775\t0.700944\t0.727273\t0.718331\t0.781421\t0.717337\t0.811227\t0.834078\t0.755092\t0.759563\t0.803279\t0.692499\t0.865872\t0.736711\t0.685544\t0.76155\t0.813214\t0.787382\t0.784401\t0.674118\t0.73075\t0.680079\t0.723299\t0.794337\t0.706905\t0.804769\t0.793343\t0.838549\t0.783905\t0.729757\t0.722802\t0.748137\t0.752111\t0.770492\t0.676602\t0.714357\t0.733234\t0.783905\t0.742176\t0.79235\t0.745653\t0.735221\t0.813711\t0.766021\t0.764531\t0.699454\t0.790859\t0.761053\t0.690512\t0.685544\t0.738698\t0.789866\t0.76304\t0.804272\t0.824143\t0.758569\t0.676105\t0.69846\t0.723795\t0.693989\t0.680576\t0.803279\t0.655241\t0.752608\t0.728266\t0.781918\t0.745156\t0.702931\t0.763537\t0.645306\t0.680576\t0.677099\t1\t0.773472\t0.781421\n-IR63372-08\t0.781918\t0.769498\t0.780924\t0.735221\t0.76006\t0.712866\t0.778937\t0.796821\t0.74764\t0.697466\t0.868356\t0.944362\t0.747144\t0.729757\t0.717834\t0.605564\t0.781421\t0.761053\t0.834575\t0.707899\t0.728266\t0.642822\t0.77844\t0.702434\t0.835072\t0.700944\t0.945852\t0.981123\t0.835072\t0.700447\t0.763537\t0.741679\t0.710383\t0.74466\t0.836066\t0.93542\t0.798311\t0.619473\t0.846001\t0.698957\t0.757079\t0.721808\t0.720815\t0.835569\t0.773472\t0.71237\t0.683557\t0.742673\t0.707899\t0.690512\t0.719324\t0.769498\t0.61699\t0.697466\t0.634873\t0.85544\t0.759066\t0.690015\t0.963239\t0.767014\t0.849478\t0.757079\t0.798311\t0.660705\t0.837556\t0.750124\t0.663686\t0.672628\t0.745653\t0.764034\t0.753105\t0.835072\t0.760556\t0.79235\t0.674118\t0.687531\t0.711873\t0.691008\t0.627919\t0.835072\t0.670144\t0.746647\t0.72926\t0.77993\t0.938897\t0.709886\t0.981123\t0.592648\t0.641828\t0.675112\t0.773472\t1\t0.812221\n-IR60080-46A\t0.899155\t0.879781\t0.901143\t0.753105\t0.867362\t0.807253\t0.869349\t0.878291\t0.845007\t0.808743\t0.738202\t0.861898\t0.787879\t0.817188\t0.84004\t0.617486\t0.924491\t0.868356\t0.946846\t0.702931\t0.799801\t0.769001\t0.898659\t0.818679\t0.934426\t0.704918\t0.801788\t0.82613\t0.946349\t0.722305\t0.799305\t0.812221\t0.714357\t0.837059\t0.919523\t0.797317\t0.925484\t0.702931\t0.846001\t0.725782\t0.861401\t0.84302\t0.742673\t0.947839\t0.865872\t0.711376\t0.719324\t0.835072\t0.725782\t0.741182\t0.80775\t0.858917\t0.721311\t0.762047\t0.672628\t0.882265\t0.880278\t0.680079\t0.806259\t0.942871\t0.891207\t0.923\t0.903626\t0.743169\t0.832588\t0.813711\t0.674615\t0.66468\t0.842027\t0.871336\t0.87233\t0.947342\t0.812221\t0.871833\t0.790363\t0.718331\t0.743666\t0.798311\t0.722305\t0.947342\t0.79533\t0.910581\t0.837556\t0.901143\t0.785892\t0.726776\t0.828117\t0.603577\t0.739195\t0.791356\t0.781421\t0.812221\t1\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae MDSplot/test-data/analyse.log --- a/MDSplot/test-data/analyse.log Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,56 +0,0 @@ - -@----------------------------------------------------------@ -| PLINK! | v1.07 | 10/Aug/2009 | -|----------------------------------------------------------| -| (C) 2009 Shaun Purcell, GNU General Public License, v2 | -|----------------------------------------------------------| -| For documentation, citation & bug-report instructions: | -| http://pngu.mgh.harvard.edu/purcell/plink/ | -@----------------------------------------------------------@ - -Skipping web check... [ --noweb ] -Writing this text to log file [ analyse.log ] -Analysis started: Tue Jan 20 09:30:56 2015 - -Options in effect: - --file input - --noweb - --cluster - --matrix - --mds-plot 2 - --out analyse - -2013 (of 2013) markers to be included from [ input.map ] -93 individuals read from [ input.ped ] -93 individuals with nonmissing phenotypes -Assuming a disease phenotype (1=unaff, 2=aff, 0=miss) -Missing phenotype value is also -9 -0 cases, 93 controls and 0 missing -93 males, 0 females, and 0 of unspecified sex -Before frequency and genotyping pruning, there are 2013 SNPs -Converting data to SNP-major format -93 founders and 0 non-founders found -Total genotyping rate in remaining individuals is 1 -0 SNPs failed missingness test ( GENO > 1 ) -0 SNPs failed frequency test ( MAF < 0 ) -Converting data to Individual-major format -After frequency and genotyping pruning, there are 2013 SNPs -After filtering, 0 cases, 93 controls and 0 missing -After filtering, 93 males, 0 females, and 0 of unspecified sex - - **Warning** this analysis typically requires whole-genome level data - to give accurate results - -Clustering individuals based on genome-wide IBS -Merge distance p-value constraint = 0 -IBS(g) calculation: 0 of 4278 IBS(g) calculation: 100 of 4278 IBS(g) calculation: 200 of 4278 IBS(g) calculation: 300 of 4278 IBS(g) calculation: 400 of 4278 IBS(g) calculation: 500 of 4278 IBS(g) calculation: 600 of 4278 IBS(g) calculation: 700 of 4278 IBS(g) calculation: 800 of 4278 IBS(g) calculation: 900 of 4278 IBS(g) calculation: 1000 of 4278 IBS(g) calculation: 1100 of 4278 IBS(g) calculation: 1200 of 4278 IBS(g) calculation: 1300 of 4278 IBS(g) calculation: 1400 of 4278 IBS(g) calculation: 1500 of 4278 IBS(g) calculation: 1600 of 4278 IBS(g) calculation: 1700 of 4278 IBS(g) calculation: 1800 of 4278 IBS(g) calculation: 1900 of 4278 IBS(g) calculation: 2000 of 4278 IBS(g) calculation: 2100 of 4278 IBS(g) calculation: 2200 of 4278 IBS(g) calculation: 2300 of 4278 IBS(g) calculation: 2400 of 4278 IBS(g) calculation: 2500 of 4278 IBS(g) calculation: 2600 of 4278 IBS(g) calculation: 2700 of 4278 IBS(g) calculation: 2800 of 4278 IBS(g) calculation: 2900 of 4278 IBS(g) calculation: 3000 of 4278 IBS(g) calculation: 3100 of 4278 IBS(g) calculation: 3200 of 4278 IBS(g) calculation: 3300 of 4278 IBS(g) calculation: 3400 of 4278 IBS(g) calculation: 3500 of 4278 IBS(g) calculation: 3600 of 4278 IBS(g) calculation: 3700 of 4278 IBS(g) calculation: 3800 of 4278 IBS(g) calculation: 3900 of 4278 IBS(g) calculation: 4000 of 4278 IBS(g) calculation: 4100 of 4278 IBS(g) calculation: 4200 of 4278 Writing IBS similarity matrix to [ analyse.mibs ] -Of these, 4278 are pairable based on constraints -Writing cluster progress to [ analyse.cluster0 ] -Writing cluster solution (1) [ analyse.cluster1 ] -Writing cluster solution (2) [ analyse.cluster2 ] -Writing cluster solution (3) [ analyse.cluster3 ] -Writing MDS solution to [ analyse.mds ] -MDS plot of individuals (not clusters) - -Analysis finished: Tue Jan 20 09:30:57 2015 - |
b |
diff -r 420b57c3c185 -r feb40a9a8eae MDSplot/test-data/analyse.mds_plot.txt --- a/MDSplot/test-data/analyse.mds_plot.txt Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,93 +0,0 @@ -IRAT112 -0.0969382 0.0376036 -IAC25 -0.0918126 0.0501177 -IAC165 -0.104815 0.0453876 -KARASUKARASURANKASU 0.107782 -0.0683546 -DOURADOPRECOCE -0.0831697 0.0516412 -CUIABANA 0.0327877 0.0497752 -BICOBRANCO -0.0554177 0.0501857 -CAAWA/FORTUNA6 -0.052355 0.0170584 -CANELADEFERRO -0.0142283 0.0598394 -CIRAD358 0.0497841 0.0645487 -COLOMBIA1 -0.110347 -0.172742 -CIRAD409 -0.113614 -0.105898 -BAGANANASALAO 0.0247582 0.0186246 -BAKUNGH 0.0199022 0.0613692 -FOHISOMOTRA 0.0344309 0.0697592 -KAKANI2 0.147713 -0.0780455 -GUARANI -0.0892761 0.0387007 -DOURADOAGULHA -0.0831373 0.0514523 -IRAT13 -0.0757215 0.00312413 -DAWASANRED 0.131309 -0.102364 -GRAZI 0.0177091 0.0870472 -IRAT144 0.0146443 0.0067734 -IAC47 -0.0968934 0.0477667 -MOROBEREKAN 0.0395544 0.0952695 -IRAT362 -0.0921075 -0.00860852 -GIZA171 0.14498 -0.0932001 -IDSA77 -0.122827 -0.120219 -IRAT216 -0.120205 -0.136803 -IRAT177 -0.0762395 0.00230332 -CHAPHUMA 0.149543 -0.0680637 -CIRAD392 0.0187265 -0.047646 -ARROZCEBADA 0.0110416 0.0787137 -INDANE 0.108691 -0.0338065 -DINORADO -0.00819237 0.0386578 -63-104 -0.053974 -0.00204494 -IR63380-16 -0.0968028 -0.0931822 -CANAROXA -0.0756689 0.0253328 -CICIHBETON 0.108857 0.142633 -IR68704-145-1-1-B -0.103292 -0.071382 -HAWMOM 0.129651 -0.0473323 -IRAT364 -0.0118523 0.0472973 -IRAT212 0.0231616 -0.00146178 -KHAODAM 0.123936 -0.0654516 -62667 -0.0752552 0.00278556 -IRAT234 -0.0346607 0.0551203 -NIPPONBARE_D 0.133199 -0.0963396 -JUMULA2 0.104558 -0.0524994 -CUBA65 -0.0124835 0.013679 -BABER 0.090299 -0.0460696 -CIRAD488 -0.0201481 -0.0262103 -BENGALYVAKARINA 0.028631 0.0637662 -ESPERANZA -0.108888 -0.00812147 -BULUPANDAK 0.119762 0.143912 -ARIAS 0.0327948 0.0827878 -GOMPA2 0.133341 -0.0543414 -CT13582-15-5-M -0.0922561 -0.0543988 -IRAT335 -0.0886849 0.0522465 -M202 0.13463 -0.106462 -IR65907-188-1-B -0.0755893 -0.136821 -IR71525-19-1-1 -0.0353444 0.0478985 -CNA-7_BO_1_1_33-13-6-1 -0.0823478 -0.0435581 -IRAT257 -0.0606191 0.0564989 -IRAT109 -0.1006 -0.00959445 -KANIRANGA 0.0705459 0.129888 -IR66421-096-2-1-1 -0.00859728 -0.0570793 -GOGO 0.0232414 0.0351889 -GEMJYAJYANAM 0.170032 -0.0857315 -DANGREY 0.175792 -0.113683 -BINULAWAN -0.0208225 0.0611919 -CAIAPO -0.0149329 -0.0122987 -IGUAPECATETO -0.0740464 0.0552386 -IRAT170 -0.0756928 0.00297643 -IRAT380 -0.0210318 -0.00092536 -ARAGUAIA -0.123443 0.0273298 -GOGOLEMPAK 0.0598291 0.107462 -JAOHAW 0.17563 -0.0587865 -CHALOYOE 0.0904761 -0.0565325 -GANIGI 0.0526286 0.0960475 -GUNDILKUNING 0.116788 0.130234 -IRAT2 -0.0762849 0.00262428 -IRAT366 0.0273652 0.13551 -IRAT104 -0.00310702 0.0636479 -DAVAO -0.00538403 0.0564886 -HD1-4 -0.0960109 0.0459137 -CURINCA -0.128876 -0.121141 -DAM 0.129029 -0.0660183 -CIRAD403 -0.117849 -0.134651 -JUMALI 0.123992 -0.0843623 -JIMBRUKJOLOWORO 0.0549906 0.135017 -GOGOLEMPUK 0.0597267 0.10733 -CIRAD394 0.0040454 -0.0885135 -IR63372-08 -0.0951857 -0.140804 -IR60080-46A -0.0732581 0.0197832 |
b |
diff -r 420b57c3c185 -r feb40a9a8eae MDSplot/test-data/input.map --- a/MDSplot/test-data/input.map Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,2013 +0,0 @@\n-Chr1 Chr1:4299 0 4299\n-Chr1 Chr1:26710 0 26710\n-Chr1 Chr1:56184 0 56184\n-Chr1 Chr1:93272 0 93272\n-Chr1 Chr1:93274 0 93274\n-Chr1 Chr1:146252 0 146252\n-Chr1 Chr1:171433 0 171433\n-Chr1 Chr1:186286 0 186286\n-Chr1 Chr1:198907 0 198907\n-Chr1 Chr1:205550 0 205550\n-Chr1 Chr1:206108 0 206108\n-Chr1 Chr1:214680 0 214680\n-Chr1 Chr1:214775 0 214775\n-Chr1 Chr1:215711 0 215711\n-Chr1 Chr1:217307 0 217307\n-Chr1 Chr1:238821 0 238821\n-Chr1 Chr1:240448 0 240448\n-Chr1 Chr1:240579 0 240579\n-Chr1 Chr1:300528 0 300528\n-Chr1 Chr1:312532 0 312532\n-Chr1 Chr1:337330 0 337330\n-Chr1 Chr1:351841 0 351841\n-Chr1 Chr1:353617 0 353617\n-Chr1 Chr1:405193 0 405193\n-Chr1 Chr1:405217 0 405217\n-Chr1 Chr1:409167 0 409167\n-Chr1 Chr1:415515 0 415515\n-Chr1 Chr1:424861 0 424861\n-Chr1 Chr1:425116 0 425116\n-Chr1 Chr1:429660 0 429660\n-Chr1 Chr1:439393 0 439393\n-Chr1 Chr1:456979 0 456979\n-Chr1 Chr1:479030 0 479030\n-Chr1 Chr1:479033 0 479033\n-Chr1 Chr1:525146 0 525146\n-Chr1 Chr1:535685 0 535685\n-Chr1 Chr1:536093 0 536093\n-Chr1 Chr1:572628 0 572628\n-Chr1 Chr1:577906 0 577906\n-Chr1 Chr1:630114 0 630114\n-Chr1 Chr1:653644 0 653644\n-Chr1 Chr1:660019 0 660019\n-Chr1 Chr1:660070 0 660070\n-Chr1 Chr1:715091 0 715091\n-Chr1 Chr1:715130 0 715130\n-Chr1 Chr1:725844 0 725844\n-Chr1 Chr1:735420 0 735420\n-Chr1 Chr1:735757 0 735757\n-Chr1 Chr1:735810 0 735810\n-Chr1 Chr1:746855 0 746855\n-Chr1 Chr1:753267 0 753267\n-Chr1 Chr1:754061 0 754061\n-Chr1 Chr1:769014 0 769014\n-Chr1 Chr1:782878 0 782878\n-Chr1 Chr1:787761 0 787761\n-Chr1 Chr1:789186 0 789186\n-Chr1 Chr1:792359 0 792359\n-Chr1 Chr1:805716 0 805716\n-Chr1 Chr1:841793 0 841793\n-Chr1 Chr1:845425 0 845425\n-Chr1 Chr1:847587 0 847587\n-Chr1 Chr1:853188 0 853188\n-Chr1 Chr1:853218 0 853218\n-Chr1 Chr1:854818 0 854818\n-Chr1 Chr1:855663 0 855663\n-Chr1 Chr1:859613 0 859613\n-Chr1 Chr1:859635 0 859635\n-Chr1 Chr1:917609 0 917609\n-Chr1 Chr1:917933 0 917933\n-Chr1 Chr1:925316 0 925316\n-Chr1 Chr1:930060 0 930060\n-Chr1 Chr1:930069 0 930069\n-Chr1 Chr1:937007 0 937007\n-Chr1 Chr1:937014 0 937014\n-Chr1 Chr1:938444 0 938444\n-Chr1 Chr1:938619 0 938619\n-Chr1 Chr1:961472 0 961472\n-Chr1 Chr1:961537 0 961537\n-Chr1 Chr1:963122 0 963122\n-Chr1 Chr1:963313 0 963313\n-Chr1 Chr1:973080 0 973080\n-Chr1 Chr1:986764 0 986764\n-Chr1 Chr1:1004926 0 1004926\n-Chr1 Chr1:1009979 0 1009979\n-Chr1 Chr1:1011056 0 1011056\n-Chr1 Chr1:1011714 0 1011714\n-Chr1 Chr1:1012017 0 1012017\n-Chr1 Chr1:1012026 0 1012026\n-Chr1 Chr1:1012111 0 1012111\n-Chr1 Chr1:1026043 0 1026043\n-Chr1 Chr1:1083377 0 1083377\n-Chr1 Chr1:1095742 0 1095742\n-Chr1 Chr1:1124378 0 1124378\n-Chr1 Chr1:1125022 0 1125022\n-Chr1 Chr1:1167717 0 1167717\n-Chr1 Chr1:1172746 0 1172746\n-Chr1 Chr1:1173064 0 1173064\n-Chr1 Chr1:1180585 0 1180585\n-Chr1 Chr1:1182388 0 1182388\n-Chr1 Chr1:1184650 0 1184650\n-Chr1 Chr1:1188838 0 1188838\n-Chr1 Chr1:1197390 0 1197390\n-Chr1 Chr1:1197550 0 1197550\n-Chr1 Chr1:1197663 0 1197663\n-Chr1 Chr1:1208561 0 1208561\n-Chr1 Chr1:1229816 0 1229816\n-Chr1 Chr1:1250047 0 1250047\n-Chr1 Chr1:1286631 0 1286631\n-Chr1 Chr1:1303497 0 1303497\n-Chr1 Chr1:1306055 0 1306055\n-Chr1 Chr1:1306058 0 1306058\n-Chr1 Chr1:1306085 0 1306085\n-Chr1 Chr1:1318689 0 1318689\n-Chr1 Chr1:1358972 0 1358972\n-Chr1 Chr1:1359766 0 1359766\n-Chr1 Chr1:1359769 0 1359769\n-Chr1 Chr1:1365854 0 1365854\n-Chr1 Chr1:1404921 0 1404921\n-Chr1 Chr1:1407386 0 1407386\n-Chr1 Chr1:1443131 0 1443131\n-Chr1 Chr1:1446645 0 1446645\n-Chr1 Chr1:1486728 0 1486728\n-Chr1 Chr1:1492913 0 1492913\n-Chr1 Chr1:1496524 0 1496524\n-Chr1 Chr1:1509728 0 1509728\n-Chr1 Chr1:1515087 0 1515087\n-Chr1 Chr1:1546579 0 1546579\n-Chr1 Chr1:1551208 0 1551208\n-Chr1 Chr1:1563026 0 1563026\n-Chr1 Chr1:1563029 0 1563029\n-Chr1 Chr1:1565548 0 1565548\n-Chr1 Chr1:1598133 0 1598133\n-Chr1 Chr1:1627278 0 1627278\n-Chr1 Chr1:1627327 0 1627327\n-Chr1 Chr1:1632085 0 1632085\n-Chr1 Chr1:1655772 0 1655772\n-Chr1 Chr1:1655775 0 1655775\n-Chr1 Chr1:1671697 0 1671697\n-Chr1 Chr1:1685316 0 1685316\n-Chr1 Chr1:1687456 0 1687456\n-Chr1 Chr1:1703238 0 1703238\n-Chr1 Chr1:1703264 0 1703264\n-Chr1 Chr1:1722207 0 1722207\n-Chr1 Chr'..b'-Chr1 Chr1:41945638 0 41945638\n-Chr1 Chr1:41954934 0 41954934\n-Chr1 Chr1:41958052 0 41958052\n-Chr1 Chr1:41961218 0 41961218\n-Chr1 Chr1:41961221 0 41961221\n-Chr1 Chr1:41965592 0 41965592\n-Chr1 Chr1:41965659 0 41965659\n-Chr1 Chr1:41968928 0 41968928\n-Chr1 Chr1:41968936 0 41968936\n-Chr1 Chr1:41970211 0 41970211\n-Chr1 Chr1:41998459 0 41998459\n-Chr1 Chr1:42001355 0 42001355\n-Chr1 Chr1:42047135 0 42047135\n-Chr1 Chr1:42047157 0 42047157\n-Chr1 Chr1:42061862 0 42061862\n-Chr1 Chr1:42063149 0 42063149\n-Chr1 Chr1:42081350 0 42081350\n-Chr1 Chr1:42081405 0 42081405\n-Chr1 Chr1:42088625 0 42088625\n-Chr1 Chr1:42089983 0 42089983\n-Chr1 Chr1:42097607 0 42097607\n-Chr1 Chr1:42106144 0 42106144\n-Chr1 Chr1:42138453 0 42138453\n-Chr1 Chr1:42150639 0 42150639\n-Chr1 Chr1:42155417 0 42155417\n-Chr1 Chr1:42156013 0 42156013\n-Chr1 Chr1:42164107 0 42164107\n-Chr1 Chr1:42165734 0 42165734\n-Chr1 Chr1:42169797 0 42169797\n-Chr1 Chr1:42169819 0 42169819\n-Chr1 Chr1:42187340 0 42187340\n-Chr1 Chr1:42192288 0 42192288\n-Chr1 Chr1:42196386 0 42196386\n-Chr1 Chr1:42227135 0 42227135\n-Chr1 Chr1:42229251 0 42229251\n-Chr1 Chr1:42231379 0 42231379\n-Chr1 Chr1:42242069 0 42242069\n-Chr1 Chr1:42244104 0 42244104\n-Chr1 Chr1:42276315 0 42276315\n-Chr1 Chr1:42301791 0 42301791\n-Chr1 Chr1:42302347 0 42302347\n-Chr1 Chr1:42324275 0 42324275\n-Chr1 Chr1:42332621 0 42332621\n-Chr1 Chr1:42352040 0 42352040\n-Chr1 Chr1:42352085 0 42352085\n-Chr1 Chr1:42352093 0 42352093\n-Chr1 Chr1:42352397 0 42352397\n-Chr1 Chr1:42354188 0 42354188\n-Chr1 Chr1:42354191 0 42354191\n-Chr1 Chr1:42357534 0 42357534\n-Chr1 Chr1:42357591 0 42357591\n-Chr1 Chr1:42367404 0 42367404\n-Chr1 Chr1:42367442 0 42367442\n-Chr1 Chr1:42419046 0 42419046\n-Chr1 Chr1:42419769 0 42419769\n-Chr1 Chr1:42421769 0 42421769\n-Chr1 Chr1:42423110 0 42423110\n-Chr1 Chr1:42427556 0 42427556\n-Chr1 Chr1:42460597 0 42460597\n-Chr1 Chr1:42460615 0 42460615\n-Chr1 Chr1:42469025 0 42469025\n-Chr1 Chr1:42471750 0 42471750\n-Chr1 Chr1:42479925 0 42479925\n-Chr1 Chr1:42487487 0 42487487\n-Chr1 Chr1:42487514 0 42487514\n-Chr1 Chr1:42548763 0 42548763\n-Chr1 Chr1:42583428 0 42583428\n-Chr1 Chr1:42586452 0 42586452\n-Chr1 Chr1:42594255 0 42594255\n-Chr1 Chr1:42596478 0 42596478\n-Chr1 Chr1:42602822 0 42602822\n-Chr1 Chr1:42602885 0 42602885\n-Chr1 Chr1:42610308 0 42610308\n-Chr1 Chr1:42612885 0 42612885\n-Chr1 Chr1:42616231 0 42616231\n-Chr1 Chr1:42620187 0 42620187\n-Chr1 Chr1:42620190 0 42620190\n-Chr1 Chr1:42620878 0 42620878\n-Chr1 Chr1:42620881 0 42620881\n-Chr1 Chr1:42631741 0 42631741\n-Chr1 Chr1:42634538 0 42634538\n-Chr1 Chr1:42664013 0 42664013\n-Chr1 Chr1:42665624 0 42665624\n-Chr1 Chr1:42666354 0 42666354\n-Chr1 Chr1:42669650 0 42669650\n-Chr1 Chr1:42669653 0 42669653\n-Chr1 Chr1:42673304 0 42673304\n-Chr1 Chr1:42674707 0 42674707\n-Chr1 Chr1:42682058 0 42682058\n-Chr1 Chr1:42682061 0 42682061\n-Chr1 Chr1:42690508 0 42690508\n-Chr1 Chr1:42706257 0 42706257\n-Chr1 Chr1:42711112 0 42711112\n-Chr1 Chr1:42762398 0 42762398\n-Chr1 Chr1:42763941 0 42763941\n-Chr1 Chr1:42763944 0 42763944\n-Chr1 Chr1:42766241 0 42766241\n-Chr1 Chr1:42777810 0 42777810\n-Chr1 Chr1:42780446 0 42780446\n-Chr1 Chr1:42825046 0 42825046\n-Chr1 Chr1:42826857 0 42826857\n-Chr1 Chr1:42880271 0 42880271\n-Chr1 Chr1:42916070 0 42916070\n-Chr1 Chr1:42916090 0 42916090\n-Chr1 Chr1:42920553 0 42920553\n-Chr1 Chr1:42928342 0 42928342\n-Chr1 Chr1:42959997 0 42959997\n-Chr1 Chr1:42968423 0 42968423\n-Chr1 Chr1:43046967 0 43046967\n-Chr1 Chr1:43048104 0 43048104\n-Chr1 Chr1:43065469 0 43065469\n-Chr1 Chr1:43068624 0 43068624\n-Chr1 Chr1:43072051 0 43072051\n-Chr1 Chr1:43073361 0 43073361\n-Chr1 Chr1:43079457 0 43079457\n-Chr1 Chr1:43079480 0 43079480\n-Chr1 Chr1:43093204 0 43093204\n-Chr1 Chr1:43100601 0 43100601\n-Chr1 Chr1:43123958 0 43123958\n-Chr1 Chr1:43132577 0 43132577\n-Chr1 Chr1:43141118 0 43141118\n-Chr1 Chr1:43141179 0 43141179\n-Chr1 Chr1:43154143 0 43154143\n-Chr1 Chr1:43158899 0 43158899\n-Chr1 Chr1:43179527 0 43179527\n-Chr1 Chr1:43214669 0 43214669\n-Chr1 Chr1:43229591 0 43229591\n-Chr1 Chr1:43249859 0 43249859\n-Chr1 Chr1:43269458 0 43269458\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae MDSplot/test-data/input.ped --- a/MDSplot/test-data/input.ped Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,93 +0,0 @@\n-IRAT112\t1\t0\t0\t1\t1\t1 1\t4 4\t2 2\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t3 3\t4 4\t2 2\t1 1\t3 3\t1 1\t1 1\t4 4\t1 1\t1 1\t2 2\t1 1\t4 4\t1 1\t1 1\t4 4\t2 2\t4 4\t1 1\t1 1\t4 4\t3 3\t1 1\t1 1\t1 1\t1 1\t4 4\t2 2\t1 1\t4 4\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t3 3\t4 4\t4 4\t2 2\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t2 2\t4 4\t2 2\t1 1\t1 1\t3 3\t3 3\t4 4\t4 4\t3 3\t3 3\t2 2\t1 1\t2 2\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t4 4\t3 3\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t3 3\t4 4\t4 4\t3 3\t4 4\t1 1\t3 3\t3 3\t3 3\t1 1\t2 2\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t3 3\t2 2\t4 4\t4 4\t1 1\t3 3\t2 2\t3 3\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t3 3\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t1 1\t2 2\t4 4\t4 4\t1 1\t4 4\t2 2\t3 3\t3 3\t1 1\t3 3\t4 4\t1 1\t3 3\t4 4\t1 1\t4 4\t2 2\t2 2\t1 1\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t2 2\t2 2\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t3 3\t3 3\t1 1\t4 4\t1 1\t2 2\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t3 3\t4 4\t2 2\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t2 2\t1 1\t2 2\t4 4\t2 2\t3 3\t4 4\t1 1\t4 4\t2 2\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t3 3\t4 4\t4 4\t1 1\t2 2\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t4 4\t3 3\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t2 2\t4 4\t1 1\t3 3\t2 2\t2 2\t3 3\t3 3\t1 1\t2 2\t1 1\t3 3\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t2 2\t2 2\t1 1\t4 4\t4 4\t1 1\t2 2\t4 4\t1 1\t4 4\t1 1\t2 2\t1 1\t2 2\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t3 3\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t3 3\t1 1\t4 4\t3 3\t2 2\t1 1\t4 4\t4 4\t3 3\t1 1\t2 2\t2 2\t1 1\t3 3\t4 4\t1 1\t1 1\t3 3\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t2 2\t4 4\t4 4\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t2 2\t4 4\t2 2\t2 2\t3 3\t4 4\t3 3\t2 2\t1 1\t1 1\t2 2\t1 1\t4 4\t3 3\t2 2\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t2 2\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t3 3\t1 1\t2 2\t1 1\t1 1\t2 2\t1 1\t1 1\t2 2\t1 1\t2 2\t3 3\t4 4\t1 1\t2 2\t1 1\t2 2\t4 4\t4 4\t1 1\t1 1\t4 4\t3 3\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t3 3\t4 4\t3 3\t4 4\t3 3\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t3 3\t4 4\t1 1\t4 4\t3 3\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t3 3\t4 4\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t2 2\t4 4\t2 2\t1 1\t3 3\t1 1\t4 4\t1 1\t2 2\t1 1\t3 3\t2 2\t4 4\t4 4\t1 1\t4 4\t3 3\t4 4\t4 4\t4 4\t1 1\t3 3\t4 4\t4 4\t4 4\t3 3\t4 4\t4 4\t1 1\t2 2\t1 1\t4 4\t4 4\t2 2\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t3 3\t2 2\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t3 3\t2 2\t1 1\t3 3\t4 4\t2 2\t2 2\t4 4\t2 2\t2 2\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t3 3\t4 4\t1 1\t1 1\t1 1\t3 3\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t3 3\t1 1\t4 4\t3 3\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t2 2\t1 1\t4 4\t3 3\t4 4\t1 1\t3 3\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t3 3\t1 1\t4 4\t1 1\t2 2\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t2 2\t4 4\t1 1\t3 3\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t2 2\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t3 3\t1 1\t1 1\t1 1\t4 4\t1 1\t3 3\t2 2\t2 2\t2 2\t3 3\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t3 3\t4 4\t3 3\t2 2\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t3 3\t4 4\t4 4\t3 3\t4 4\t3 3\t3 3\t3 3\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t4 4\t1 1\t4 4\t3 3\t3 3\t3 3\t4 4\t3 3\t4 4\t4 4\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t3 3\t1 1\t3 3\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t4 4\t4 4\t2 2\t1 1\t4 4\t3 3\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t1 1\t3 3\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t2 2\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t4 4\t4 4\t4 4\t1 1\t3 3\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t3 3\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t3 3\t1 1\t1 1\t4 4\t3 3\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t2 2\t1 1\t4 4\t4 4\t4 4\t3 3\t1 1\t2 2\t1 1\t1 1\t4 4\t4 4\t2 2\t1 1\t1 1\t2 2\t4 4\t1 1\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t3 3\t3 3\t2 2\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t3 3\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t2 2\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t2 2\t4 4\t1 1\t3 3\t3 3\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t2 2\t3 3\t1 1\t3 3\t1 1\t4 4\t4 4\t2 2\t4 4\t4 4\t1 1\t3 3\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t3 3'..b' 1\t4 4\t1 1\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t3 3\t1 1\t3 3\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t3 3\t2 2\t4 4\t1 1\t4 4\t1 1\t4 4\t2 2\t3 3\t4 4\t1 1\t4 4\t1 1\t4 4\t2 2\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t4 4\t2 2\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t4 4\t3 3\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t3 3\t2 2\t4 4\t4 4\t4 4\t4 4\t1 1\t4 4\t2 2\t1 1\t4 4\t1 1\t2 2\t4 4\t1 1\t3 3\t1 1\t1 1\t3 3\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t3 3\t3 3\t2 2\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t4 4\t4 4\t1 1\t4 4\t4 4\t3 3\t1 1\t3 3\t4 4\t3 3\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t2 2\t4 4\t2 2\t2 2\t1 1\t4 4\t4 4\t2 2\t4 4\t4 4\t4 4\t3 3\t1 1\t3 3\t3 3\t3 3\t4 4\t4 4\t2 2\t1 1\t2 2\t4 4\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t1 1\t2 2\t1 1\t3 3\t1 1\t4 4\t1 1\t4 4\t1 1\t2 2\t3 3\t4 4\t1 1\t3 3\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t2 2\t1 1\t3 3\t3 3\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t2 2\t4 4\t2 2\t4 4\t1 1\t2 2\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t3 3\t1 1\t4 4\t4 4\t3 3\t3 3\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t3 3\t3 3\t2 2\t1 1\t4 4\t3 3\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t2 2\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t3 3\t4 4\t4 4\t1 1\t3 3\t2 2\t2 2\t1 1\t1 1\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t2 2\t1 1\t3 3\t1 1\t1 1\t4 4\t4 4\t2 2\t4 4\t3 3\t3 3\t1 1\t2 2\t2 2\t4 4\t2 2\t3 3\t1 1\t1 1\t3 3\t3 3\t1 1\t4 4\t1 1\t1 1\t3 3\t4 4\t2 2\t4 4\t4 4\t3 3\t2 2\t3 3\t3 3\t4 4\t4 4\t2 2\t2 2\t4 4\t2 2\t4 4\t4 4\t4 4\t2 2\t4 4\t4 4\t1 1\t4 4\t4 4\t3 3\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t3 3\t1 1\t1 1\t3 3\t1 1\t4 4\t3 3\t3 3\t1 1\t4 4\t1 1\t3 3\t3 3\t3 3\t1 1\t4 4\t4 4\t1 1\t4 4\t3 3\t4 4\t4 4\t3 3\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t3 3\t4 4\t4 4\t1 1\t3 3\t4 4\t1 1\t1 1\t2 2\t4 4\t3 3\t2 2\t3 3\t1 1\t1 1\t4 4\t4 4\t4 4\t3 3\t2 2\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t2 2\t1 1\t1 1\t4 4\t1 1\t3 3\t1 1\t3 3\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t4 4\t4 4\t1 1\t4 4\t3 3\t1 1\t3 3\t3 3\t1 1\t3 3\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t4 4\t4 4\t2 2\t4 4\t2 2\t4 4\t2 2\t1 1\t3 3\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t4 4\t4 4\t2 2\t1 1\t2 2\t4 4\t1 1\t1 1\t1 1\t3 3\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t2 2\t3 3\t4 4\t1 1\t4 4\t3 3\t3 3\t3 3\t1 1\t4 4\t3 3\t3 3\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t2 2\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t2 2\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t2 2\t4 4\t4 4\t3 3\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t2 2\t1 1\t4 4\t4 4\t4 4\t1 1\t2 2\t4 4\t1 1\t1 1\t3 3\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t2 2\t4 4\t4 4\t3 3\t1 1\t1 1\t4 4\t2 2\t1 1\t3 3\t1 1\t1 1\t4 4\t4 4\t3 3\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t2 2\t3 3\t4 4\t2 2\t4 4\t4 4\t2 2\t1 1\t2 2\t1 1\t2 2\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t3 3\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t3 3\t4 4\t2 2\t1 1\t2 2\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t3 3\t1 1\t1 1\t4 4\t4 4\t1 1\t2 2\t4 4\t1 1\t1 1\t2 2\t1 1\t4 4\t4 4\t4 4\t4 4\t3 3\t3 3\t4 4\t4 4\t2 2\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t3 3\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t3 3\t4 4\t3 3\t4 4\t4 4\t1 1\t3 3\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t4 4\t2 2\t4 4\t3 3\t2 2\t4 4\t4 4\t4 4\t4 4\t4 4\t4 4\t4 4\t1 1\t4 4\t3 3\t1 1\t4 4\t1 1\t2 2\t3 3\t2 2\t4 4\t1 1\t1 1\t4 4\t3 3\t2 2\t4 4\t2 2\t1 1\t3 3\t4 4\t1 1\t1 1\t1 1\t3 3\t1 1\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t3 3\t4 4\t2 2\t4 4\t1 1\t1 1\t4 4\t2 2\t4 4\t4 4\t4 4\t3 3\t1 1\t1 1\t3 3\t1 1\t2 2\t1 1\t3 3\t2 2\t1 1\t2 2\t1 1\t1 1\t1 1\t2 2\t1 1\t4 4\t3 3\t1 1\t3 3\t1 1\t1 1\t3 3\t4 4\t1 1\t3 3\t4 4\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t2 2\t3 3\t3 3\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t2 2\t1 1\t4 4\t2 2\t2 2\t4 4\t4 4\t1 1\t2 2\t4 4\t3 3\t4 4\t4 4\t4 4\t1 1\t3 3\t4 4\t2 2\t1 1\t4 4\t3 3\t1 1\t4 4\t2 2\t1 1\t4 4\t3 3\t1 1\t1 1\t4 4\t2 2\t4 4\t4 4\t4 4\t1 1\t4 4\t3 3\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t1 1\t1 1\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t3 3\t1 1\t1 1\t4 4\t3 3\t1 1\t4 4\t2 2\t4 4\t1 1\t2 2\t1 1\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t2 2\t4 4\t3 3\t1 1\t2 2\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t1 1\t3 3\t1 1\t2 2\t3 3\t2 2\t3 3\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t3 3\t4 4\t2 2\t1 1\t1 1\t4 4\t4 4\t4 4\t2 2\t4 4\t4 4\t4 4\t4 4\t2 2\t1 1\t3 3\t4 4\t4 4\t4 4\t1 1\t1 1\t3 3\t1 1\t3 3\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t3 3\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t3 3\t3 3\t1 1\t1 1\t2 2\t4 4\t1 1\t1 1\t2 2\t4 4\t3 3\t2 2\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae PedToFasta/Ped2Fasta.pl --- a/PedToFasta/Ped2Fasta.pl Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,70 +0,0 @@ - -#!/usr/bin/perl - -use strict; -use Getopt::Long; - -my $usage = qq~Usage:$0 <args> [<opts>] - -where <args> are: - - -i, --in <PED input> - -o, --out <Fasta output> -~; -$usage .= "\n"; - -my ($input,$out); - - - -GetOptions( - "in=s" => \$input, - "out=s" => \$out, -); - - -die $usage - if ( !$input || !$out); - - -my %IUPAC = -( - '00'=> "?", - 'AA'=> "A", - 'CC'=> "C", - 'GG'=> "G", - 'TT'=> "T", - 'AG'=> "R", - 'GA'=> "R", - 'CT'=> "Y", - 'TC'=> "Y", - 'TG'=> "K", - 'GT'=> "K", - 'CG'=> "S", - 'GC'=> "S", - 'AT'=> "W", - 'TA'=> "W", - 'AC'=> "M", - 'CA'=> "M", -); - -open(my $O,">$out"); -open(my $P,$input) or die "File does not exist"; -while(<$P>) -{ - my $line = $_; - $line =~s/\r//g; - $line =~s/\n//g; - my @infos = split("\t",$_); - my $ind = $infos[0]; - print $O ">$ind\n"; - for (my $i = 6; $i <= $#infos; $i= $i+2) - { - my $code = $infos[$i].$infos[$i+1]; - my $letter = $IUPAC{$code}; - print $O $letter; - } - print $O "\n"; -} -close($P); -close($O); |
b |
diff -r 420b57c3c185 -r feb40a9a8eae PedToFasta/pedToFasta.xml --- a/PedToFasta/pedToFasta.xml Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,171 +0,0 @@ -<tool id="sniplay_pedToFasta" name="Ped2Fasta" version="1.0.0"> - - <!-- [REQUIRED] Tool description displayed after the tool name --> - <description> Convert PED file to Fasta File </description> - - <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> - <requirements> - <requirement type="binary">perl</requirement> - </requirements> - - <!-- [OPTIONAL] Command to be executed to get the tool's version string --> - <version_command> -<!-- - tool_binary -v ---> - </version_command> - - <!-- [REQUIRED] The command to execute --> - <command interpreter="perl"> - Ped2Fasta.pl --in $filein --out $fileout - </command> - - <!-- [REQUIRED] Input files and tool parameters --> - <inputs> - <param name="filein" type="data" format="txt" optional="false" label="PED input" /> - <param name="fileout_label" type="text" value="filtered" optional="false" label="Output file name"/> - </inputs> - - <!-- [REQUIRED] Output files --> - <outputs> - <data name="fileout" type="data" format="fasta" label="${fileout_label}.fa" /> - </outputs> - - <!-- [STRONGLY RECOMMANDED] Exit code rules --> - <stdio> - <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> - <exit_code range="1:" level="fatal" /> - </stdio> - - <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> - <tests> - <!-- [HELP] Test files have to be in the ~/test-data directory --> - <test> - <param name="filein" value="sample.ped" /> - <output name="fileout" file="result.fa" /> - </test> - </tests> - - <!-- [OPTIONAL] Help displayed in Galaxy --> - <help> - -.. class:: infomark - -**Authors** Dereeper Alexis (alexis.dereeper@ird.fr), IRD, South Green platform - - | **Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). - -.. class:: infomark - -**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique. - -.. class:: infomark - -**Support** For any questions, please send an e-mail to support.abims@sb-roscoff.fr - ---------------------------------------------------- - -========= -Ped2Fasta -========= - ------------ -Description ------------ - - Convert PED file to Fasta File - ------------------ -Workflow position ------------------ - -**Upstream tool** - -=============== ========================== ======= -Name output file(s) format -=============== ========================== ======= -VCFtools Filter VCF file VCF -=============== ========================== ======= - - -**Downstream tool** - -=========== ========================== ======= -Name input file(s) format -=========== ========================== ======= -Readseq Fasta alignment fasta -=========== ========================== ======= - - ----------- -Input file ----------- - -PED file - PED file usually from VCF tools - ----------- -Parameters ----------- - -Output file name - Prefix for the output fasta file - ------------- -Output files ------------- - -Fasta file - PED file conversion - ---------------------------------------------------- - ---------------- -Working example ---------------- - -Input files -=========== - -PED file ---------- - -:: - - CATB1 CATB1 0 0 0 0 C T T A C T A T A T A G G A - -Parameters -========== - -Output name -> pedFile - -Output files -============ - -pedFile.fa ----------- - -:: - - YWYWWRRSYYMKRRKMYRKSRKYRYRYKRKRSKKSYRWYSYRRYRRRWYWWYYWRRYRSRWSSRMYRRKSWMSKWRRYYWMYKYWRSYRWRYMWYYYMKYKYWRYRYRY - - - </help> - - <citations> - <!-- [HELP] As DOI or BibTex entry --> - <citation type="bibtex">@article{Dereeper03062015, -author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, -title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, -year = {2015}, -doi = {10.1093/nar/gkv351}, -abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, -URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, -eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, -journal = {Nucleic Acids Research} -} - - </citation> - - </citations> -</tool> |
b |
diff -r 420b57c3c185 -r feb40a9a8eae PedToFasta/test-data/result.fa --- a/PedToFasta/test-data/result.fa Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->CATB1 -YWYWWRRSYYMKRRKMYRKSRKYRYRYKRKRSKKSYRWYSYRRYRRRWYWWYYWRRYRSRWSSRMYRRKSWMSKWRRYYWMYKYWRSYRWRYMWYYYMKYKYWRYRYRYYKKYWRMMWKYYSYYWRYSKRWRSRRRYYWYYYKYYKYRMKKRYYRKYKKWWKRYWRMYRSWYWYYWRRRRRYRSRKRWKWRKKYYYKMKYYYSWSSYRRKRMSKMKRRKRWRRRRYWRKKYWRKRYWWWKMRRRYRYYWWRYYRKKWYYRRWYYRKYSRRYWRYWSWYYWWKYYKYWYRMRYRYYRRWYYYSRYRRKRRYMMSSRRYYYSRMWRRRRSWRYYMYYRYWYRRSSYYMYRKKRKYWKRRYYRYRMMWYWRYSSWYYYRYWWWWWRYRRYRYWWMMYRYYYRSYYRRRRRYYYRYYRRKKMYMRRRRSYKYRRRYWYRRYWRYSYYRYRRYSRMMKRKYYYMMRWYKWYMRYRSWRYRYMYMMRRYSRYRRRSRRSRRMKRKYYYRRYMYYRYRRKYRRRRRYYRYYWYYSRRMKRYYYSYRRYYWRMYSRYYYMMRRYYRWKMWWWYRRWYYYSWRRYRYRYRRRYRYRYRYYYYRRRWMYKMWRRYYYYRMYSSRRRWYRWRRRWYRWYRKWYYMKRYRMYRYMRRYRYYYYYRMRRWYYRSYYSYRYYYYSWKYMRYYYYYYYRRKWYYMYRYMWYYYSYYMWYMWSYWWWYRYYYSYRYMMRRRYKYRWRYYRRRKYYYKRYYRYYMYRMRYYYKWYWWYYKYRYWWRYRMSSYYYYYKYRYRRYYRYYRYRKRYYKYYYWYRYRRYYYRMYYRRRYWRRKRYYYYRRRYRYYKYYRRRRWYKWRRRMYWRYRRSWSSYYYYRYYYYYSRYYMRRYSRWMYRYYYRRYRYWYYYMWWRRYMRYYYRRMYRRSKRRYYSRYKRRRMRWYYMWYYMRWYYWWYYYKSYKYYYYYYYWYYSRWSYYYYRRYSRMYYYRRYSWYYYYWRYYSRYYKRRYYYMWYYYRWWYKYMYRRRMYYSRKYMMYYRRRWRYRRRMYKYRKRYRYRSYYRYSRMYWWWWWWYKYRSYYWKMRWWYWSMMSMRKYRSWWMKWYYYWKRRKRWRWYWWWYSYRSYYYYYRRWMMRWYRYYWYKRMWRYRYYRRWWRYMYYYRYYYMRYYYRRYRWRYRYWWKWMRWRYYRRRYWSYWRSRYWYSRYKRRYRRKRYRYWSWWYRWRRRKYKYRYYWYKKRYYYRRYYYYYRKRYKYMYYRWYKRWMYRRKYRRRYRYRWYSYRYKYMSYYRWRMWKSRSYYRWSRRRRYSMSRRWKRRRYYYRRYWWYYRRYMYMRRYWWRYYYRMRKKYKWMYYRMYYKMWMYRWRKYRSRMKSYRSYYRWMMSSKRYMYKYRWRRRRRYYSRKYRYYYYRYRRKRYYYWYYWYYWRYRKMRKRRYRYWWMMRRRYYYYWSWSRMWWYRWYSYYWRYYRRYRRYYRRRWYSSRYRYYSRRKWYSMYSSKYYRMRRRYWMRYRKRRRRKYYWSYRYYYYRRMRRYWRRRRYSYRYYRRWSRMSRYSRWYRSRYRKKWYRYYWRWRRYMKRKRKKRKYMMKYRMRRRYRRYYRMWWRRRYYWWYRKRYYYRYYYYYRYYWRRYYMYSSWRRKYRRYYRRKYSMWMWRYRMRRYRMYRRWRWRKRYSKYSSRYSRYYMRRRRWKYRSRRKKYRRMMYRMRWRRRKRWRRYYKWYRKRRRMKRRSYYWSRKRRRMYWWWWKRYRRYYMWMYRRYRRMYYSWYMMYRYSKWWRYWYRWYWWYRRYRSSMYKMSYRSRRYYYYMKWWMKSRMRYRSRYMRRRYWRYMKYRYSWWMSKYYSWYRYRRRMYYWSRWWYYRYMSSYYYRYYWYYMRYSMWRYRYYRKRRRRMRYRRWYSYYWYYMWYRYYRWRYYYSRRYRRRMMMRSYKRWWKRYYRRKYRYWRRRYWRRRYMYYYWYWYYWRRRYRYRSRWMRRRWYYYKWYRRYSRKMRRRRSKYMWKRYMKKRSMKYYRRRRKRYRKYRYMYYYYRRRMKKMRWYYKWYMWYSWWRYRKYKWWRKYRRYMYRWRYWWRYWMRRRRRRRKRSYMKYRRKRKWRYYWKSSYYWYMSYSYRWRRRMRWMRWSWYSRYYYWWRRYRSWWSKSRYYYYYYMYRYSRWSWYYYMYWYRRYWRYYYWSYRYYRKWRYYRYRWKYMSYRRRKRRWYYWKSKSYMSRRYYYYYYYYRYRWSWMYWRKYMRWYRKYRRMMYYRMYMWYKWYYRWRRRYRYYRYYYRYSYYWWYYYRYRRRSRRYRMYMKYKYRYSYKYWRWRRYSYRWKRYYWRYYYYKYMWYMWRYMWSRKRWYRMRYYRYKRSRRRWRMRRKYRYRYRYRRYRKYYRRYRRMSKRKRWKRWMYMYKRWRSRYYYKRWYRRRKRYRRYWWKYYYRMRYRRMRWSYSYWWRYMWYKRKSYRMYWKSKSRWYYYRRRSRYRYKRYWWWRMWYYSYRRYKYYYSWYKRMYRKRRRYYWSKYSKYYMYYYYKRKYYYSSRRRYRSWYRYRKMSYRRWMRSYKRYKKWYYYYMKRYWYYRYRRMRSMMYSMKYYYSRKYRYMYSRKYKYYYKRKYWYKYYYRYRRKYWRRRYYSRYRWWRRRWYMRWRRRRRYSRWKYRMWYYYRYRYRRRRRYRRRYYYYWYYRRYYYRMKYYYWWMWRRRRYRYRRRWMKRRYWKKRYRYMYMYYRYYWRYRRWRWRMYYSRWRYRYYRMKRMWSRRKRRYYRYRKWWMSYRRRRSWKYSMKYWRSYSYSSYRKRRKSYYYRWRYWYYRYRRRYYYRYRYRRYRWWYYRYRSYRRYRMRKYSYWYMWYRYYRYYYSWYYRMRKRYSYWYRWYRYRYRYYYYRRMYRYWYYYYRYRSMRYKRYRYKMYYYYYRSWSMRRYRSWYYWWYYWYRYRRYSRRYYRYKYRYRYYYRRRSYKRMYRMYWWSRWYYMYKYRRMYWRKYRYRRYYRKRYYWYYWYRRRRSRYSYYRYYRWYRRWKRRYRWSYRRYYRYYRYRRRYRKMYWYMSRYRYYWSWKRWYYMSMRWRRYKYYYRYRKWYYRRKYRRMWMWYRSKKKKYRSKWWMYYSMYRRMWRKMKRYYRRKMRKKYYWRKWKYYRRRRMYRWRRWKSRKKYMRWYRSRRKYKSWRSRRRYKYWYWYYMYRYYRRYSRRRYRYKWRRYYKMWWKYYWMRMYRRRYSYYRWMYYRYSSYYYRSRWWRSKRYRRMKYWRRSRRYYSRYRYYRYRYMSYRRYMRYRYYRMRWYRYMWKWRYKYRRKRRKSYYMRRYYYYYYMYRRRRKYYKRRWYYWMRYYRRRYYYKRYYRYYRYMKRRYRYWWYRRRRYRYYSWMMWYWMYWSYYRWWWYRRRRYWYKSKRYSSYYYWRRRMMRSYYKYRKYYMYKRYYYKRYSMYYWYRSWRRWRYKRYRYYYRYRKYYYRYKYYYYRSYKWYMWSMRRYRMKRMRYSRYSRYYYRMYKRRRWRYYRRRRYYWYYWRRKWKKRYRWRWYSMYRRSMRWWWSMRMSRKYWRRYYYWYRYSYRMWMRSWRYKKRRWSSYYYYYRYYYMYYYYMRYKYYRWYYYRYYYYYYRMYYYRRRYYYYRRYKWRYRRRYWWYKMRKSYYYYYRYWRRRRKSYSYM |
b |
diff -r 420b57c3c185 -r feb40a9a8eae PedToFasta/test-data/sample.ped --- a/PedToFasta/test-data/sample.ped Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,1 +0,0 @@\n-CATB1\tCATB1\t0\t0\t0\t0\tC\tT\tT\tA\tC\tT\tA\tT\tA\tT\tA\tG\tG\tA\tC\tG\tC\tT\tT\tC\tA\tC\tT\tG\tA\tG\tA\tG\tT\tG\tA\tC\tC\tT\tA\tG\tT\tG\tC\tG\tA\tG\tT\tG\tT\tC\tA\tG\tC\tT\tG\tA\tT\tC\tG\tT\tG\tA\tG\tT\tG\tA\tG\tC\tT\tG\tT\tG\tC\tG\tC\tT\tA\tG\tA\tT\tC\tT\tG\tC\tT\tC\tG\tA\tA\tG\tC\tT\tG\tA\tA\tG\tG\tA\tA\tT\tT\tC\tA\tT\tA\tT\tC\tT\tC\tT\tA\tT\tA\tG\tG\tA\tC\tT\tA\tG\tG\tC\tA\tG\tA\tT\tG\tC\tC\tG\tG\tA\tC\tA\tT\tC\tA\tG\tA\tG\tT\tG\tG\tC\tA\tT\tA\tC\tG\tC\tG\tT\tA\tT\tG\tA\tG\tA\tC\tT\tC\tT\tA\tT\tC\tA\tC\tT\tT\tG\tT\tC\tA\tT\tG\tA\tC\tG\tT\tC\tG\tA\tT\tA\tA\tG\tC\tT\tC\tA\tT\tA\tC\tT\tC\tT\tT\tC\tA\tC\tG\tT\tC\tT\tT\tG\tC\tT\tA\tT\tA\tG\tC\tT\tA\tG\tC\tT\tA\tG\tC\tT\tT\tC\tT\tG\tG\tT\tC\tT\tT\tA\tA\tG\tC\tA\tA\tC\tA\tT\tT\tG\tC\tT\tC\tT\tC\tG\tC\tT\tT\tC\tT\tA\tA\tG\tT\tC\tC\tG\tT\tG\tA\tG\tA\tT\tG\tA\tG\tC\tA\tG\tA\tG\tG\tA\tC\tT\tT\tC\tT\tA\tC\tT\tT\tC\tT\tC\tT\tG\tT\tC\tT\tC\tG\tT\tT\tC\tA\tG\tA\tC\tT\tG\tG\tT\tG\tA\tC\tT\tC\tT\tA\tG\tT\tG\tT\tC\tT\tG\tT\tG\tA\tT\tT\tA\tT\tG\tG\tA\tC\tT\tT\tA\tA\tG\tC\tA\tT\tC\tG\tA\tC\tG\tA\tT\tC\tT\tT\tA\tT\tC\tT\tC\tT\tA\tG\tA\tA\tG\tA\tG\tA\tG\tA\tG\tT\tC\tG\tA\tG\tC\tA\tG\tG\tT\tG\tA\tA\tT\tT\tG\tA\tT\tA\tG\tG\tT\tT\tG\tC\tT\tT\tC\tT\tC\tG\tT\tA\tC\tG\tT\tC\tT\tT\tC\tT\tC\tG\tC\tT\tA\tC\tG\tC\tG\tC\tT\tA\tG\tG\tA\tT\tG\tG\tA\tA\tC\tC\tG\tG\tT\tA\tC\tT\tG\tA\tG\tG\tA\tG\tT\tA\tG\tA\tT\tG\tA\tG\tA\tA\tG\tG\tA\tT\tC\tA\tT\tA\tG\tG\tT\tT\tG\tC\tT\tT\tA\tA\tG\tT\tG\tG\tA\tT\tC\tT\tA\tA\tT\tT\tA\tG\tT\tA\tC\tG\tA\tG\tA\tA\tG\tC\tT\tA\tG\tC\tT\tT\tC\tT\tA\tA\tT\tA\tG\tT\tC\tT\tC\tA\tG\tT\tG\tG\tT\tT\tA\tT\tC\tT\tC\tA\tG\tG\tA\tT\tA\tT\tC\tT\tC\tA\tG\tT\tG\tC\tT\tC\tG\tA\tG\tG\tA\tC\tT\tA\tT\tA\tG\tT\tC\tA\tT\tG\tC\tA\tT\tT\tC\tC\tT\tA\tT\tA\tT\tG\tT\tT\tC\tT\tC\tG\tT\tT\tC\tA\tT\tT\tC\tG\tA\tC\tA\tA\tG\tC\tT\tA\tG\tT\tC\tC\tT\tA\tG\tA\tG\tA\tT\tC\tT\tC\tT\tT\tC\tG\tC\tG\tA\tC\tT\tA\tG\tG\tA\tT\tG\tA\tG\tG\tA\tC\tT\tA\tC\tC\tA\tG\tC\tC\tG\tG\tA\tA\tG\tC\tT\tC\tT\tC\tT\tC\tG\tA\tG\tA\tC\tA\tT\tG\tA\tG\tA\tA\tG\tA\tG\tG\tC\tA\tT\tG\tA\tC\tT\tT\tC\tC\tA\tC\tT\tT\tC\tA\tG\tT\tC\tA\tT\tC\tT\tG\tA\tG\tA\tG\tC\tC\tG\tC\tT\tC\tT\tA\tC\tT\tC\tA\tG\tT\tG\tG\tT\tA\tG\tT\tG\tC\tT\tT\tA\tG\tT\tA\tG\tG\tA\tC\tT\tT\tC\tG\tA\tT\tC\tA\tG\tC\tA\tA\tC\tA\tT\tT\tC\tT\tA\tG\tA\tT\tC\tG\tC\tG\tC\tT\tA\tC\tT\tT\tC\tT\tC\tA\tG\tT\tC\tA\tT\tA\tT\tT\tA\tA\tT\tT\tA\tG\tA\tT\tC\tG\tA\tA\tG\tT\tC\tG\tA\tC\tT\tA\tT\tA\tT\tA\tC\tA\tC\tC\tT\tA\tG\tT\tC\tT\tC\tT\tC\tA\tG\tC\tG\tC\tT\tC\tT\tG\tA\tG\tA\tG\tA\tG\tA\tG\tA\tC\tT\tT\tC\tC\tT\tA\tG\tC\tT\tT\tC\tA\tG\tA\tG\tG\tT\tG\tT\tC\tA\tC\tT\tC\tA\tG\tA\tG\tA\tG\tA\tA\tG\tC\tG\tT\tC\tT\tG\tT\tC\tG\tA\tA\tG\tG\tA\tT\tC\tT\tA\tT\tC\tA\tG\tG\tA\tT\tC\tT\tA\tA\tG\tC\tT\tG\tC\tC\tT\tC\tT\tG\tA\tT\tC\tG\tA\tG\tA\tC\tT\tG\tC\tA\tG\tA\tC\tA\tC\tG\tT\tG\tA\tG\tT\tT\tC\tT\tC\tT\tC\tC\tA\tC\tA\tG\tA\tA\tT\tC\tT\tT\tG\tT\tA\tT\tC\tA\tC\tA\tG\tT\tC\tG\tA\tC\tG\tA\tT\tA\tG\tT\tC\tA\tG\tT\tC\tA\tC\tC\tT\tC\tA\tA\tC\tG\tA\tA\tG\tT\tC\tG\tC\tG\tA\tC\tT\tG\tA\tG\tA\tA\tG\tG\tC\tA\tG\tA\tG\tG\tC\tA\tG\tA\tG\tA\tC\tG\tT\tG\tA\tG\tT\tC\tT\tC\tT\tT\tC\tG\tA\tA\tG\tC\tT\tA\tC\tC\tT\tC\tT\tA\tG\tC\tT\tG\tA\tG\tA\tG\tT\tC\tT\tG\tA\tG\tA\tG\tA\tG\tA\tG\tA\tC\tT\tT\tC\tG\tA\tC\tT\tT\tC\tA\tT\tC\tT\tT\tC\tG\tC\tG\tA\tG\tA\tA\tC\tG\tT\tG\tA\tC\tT\tC\tT\tT\tC\tC\tG\tT\tC\tG\tA\tA\tG\tT\tC\tT\tC\tA\tT\tG\tA\tC\tA\tT\tC\tG\tC\tG\tA\tC\tT\tC\tT\tC\tT\tC\tA\tC\tA\tG\tA\tG\tA\tC\tT\tC\tT\tG\tA\tT\tA\tT\tG\tA\tC\tA\tT\tT\tA\tA\tT\tC\tT\tA\tG\tG\tA\tT\tA\tC\tT\tT\tC\tT\tC\tC\tG\tT\tA\tG\tA\tG\tA\tT\tC\tA\tG\tT\tC\tG\tA\tC\tT\tA\tG\tA\tG\tG\tA\tT\tC\tG\tA\tT\tC\tA\tG\tC\tT\tG\tA\tT\tC\tC\tT\tC\tT\tT\tC\tG\tA\tA\tG\tG\tA\tA\tT\tC\tA\tT\tC\tT\tG\tC\tA\tA\tT\tA\tG\tG\tA\tC\tT\tC\tT\tC\tT\tC\tT\tG\tA\tC\tA\tT\tC\tC\tG\tG\tC\tA\tG\tG\tA\tG\tA\tT\tA\tC\tT\tG\tA\tT\tA\tA\tG\tG\tA\tA\tG\tA\tT\tT\tC\tA\tG\tA\tT\tT\tC\tA\tG\tT\tG\tA\tT\tC\tT\tC\tT\tC\tA\tG\tT\tG\tA\tC\tT\tG\tA\tC\tA\tC\tT\tG\tA\tC\tT\tC\tA\tG\tA\tG\tA\tC\tT\tA\tG\tT\tC\tT\tC\tC\tT\tC\tT\tC\tT\tG\tA\tA\tC\tA\tG\tA\tG\tA\tT\tC\tT\tC\tT\tG\tA\tG\tC\tC\tT\tT\tC\tC\tG\tT\tC\tA\tG\tT\tC\tC\tT\tT\tC\tC\tT\tG\tC\tA\tT\tG\tT\tC\tT\tA\tC\tA\tG\tC\tT\tT\tC\tC\tT\tC\tT\tC\tT\tT\tC\tC\tT\tG\tA\tG\tA\tG\tT\tA\tT\tC\tT\tT\tC\tC\tA\tC\tT\tG\tA\tC\tT\tC\tA\tT\tA\tC\tT\tC\tT\tT\tC\tC\tG\tC\tT\tT\tC\tA\tC\tT\tA\tC\tT\tA\tC\tA\tT\tC\tG\tT\tC\tT\tA\tA\tT\tT\tA\tT\tC\tG\tA\tC\tT\tC\tT\tT\tC\tC\tG\tT\tC\tG\tA\tC\tT\tA\tC\tC\tA\tG\tA\tG\tA\tG\tA\tT\tC\tT\tG\tT\tC\tA\tG\tA\tT\tG\tA\tT\tC\tC\tT\tG\tA\tA\tG\tA\tG\tT\tG\tC\tT\tC\tT\tT\tC\tT\tG\tG\tA\tC\tT\tC\tT\tG\tA\tC\tT\tC\tT\tC\tA\tT\tC\tG\tA\tC\tA\tG\tA\tC\tT\tT\tC\tT\tC\tG\tT\tA\tT\tC\tT\tT\tA\tT\tA\tT\tC\tC\tT\tT\tG\tC\tT\tG\tA\tT\tC\tA\tT\tT\tA\tA\tG\tT\tC\tG\tA\tA\tC\tG\tC\tG\tC\tC\tT\tT\tC\tC\tT\tC\tT\tC\tT\tT\tG\tT\tC\tA\tG\tT\tC\tA\tG\tA\tG\tC\tT\tC\tT\tA\tG\tC\tT\tT\tC\tG\tA\tC\tT\tG\tA\tG\tT\tG\tA\tC\tT\tC\tT\tG\tT\tC\tT\tC\tT\tC\tT\tT\tA\tC\tT\tA\tG\tT\tC\tG\tA\tG\tA\tC\tT\tT\tC\tC\tT\tG\tA\tA\tC\tC\tT\tC\tT\tA\tG\tG\tA\tG\tA\tC\tT\tA\tT\tA\tG\tA\tG\tG\tT\tA\tG\tC\tT\tC\tT\tC\tT\tC\tT\tA\tG\tA\tG\tG\tA\tC\tT\tG\tA\tC\tT\tC\tT\tG\tT\tC\tT\tT\tC\tG\tA\tG\tA\tG\tA\tG\tA\tT\tA\tC\tT\tG\tT\tT\tA\tG\tA\tG\tA\tG\tA\tC\tA\tT\tC\tA\tT\tG\tA\tT\tC\tA\tG\tG\tA\tC\tG\tA\tT\tC\tG\tG\tC\tC\tT\tC\tT\tC\tT\tC\tT\tG\tA\tT\tC\tC\tT\tC\tT\tT\tC\tC\tT\tC\tG\tA\tG\tT\tC\tT\tC\tC\tA\tG\tA\tA\tG\tC\tT\tC\tG\tG\tA\tA\tT\tA\tC\tT\tC\tA\tG\tC\tT\tC\tT\tC\tT\tG\tA\tG\tA\tC\tT\tA\tG\tC\tT\tT\tA\tC\tT\tT\tC\tC\tT\tA\tC\tA\tT\tA\tT\tG\tA\tG\tA\tC\tT\tA\tC\tG\tA\tT\tC\tC\tT\tC\tT\tG\tA\tG\tA\tA\tC\tC\tT\tG\tA\tG\tA\tG\tC\tT\tG\tG\tA\tA\tG\tC\tT\tC\tT\tG\tC\tG\tA\tT\tC\tG\tT\tG\tA\tG\tA\tG\tA\tA\tC\tA\tG\tT\tA\tT\tC\tT\tC\tC\tA\tT\tA\tC\tT\tC\tT\tA\tC\tG\tA\tT\tA\tC\tT\tC\tT\tT\tA\tT\tA\tT\tC\tT\tC\tT\tC\tG\tT\tC\tG\tT\tC\tT\tG\tT\tC\tC\tT\tC\tT\tT\tC\tC\tT\tC\tT\tT\tC\tT\tA\tC\tT\tT\tC\tC\tG\tA\tG\tT\tA\tC\tG\tC\tT\tC\tT\tC\tT\tT\tC\tG\tA\tG\tA\tT\tC\tG\tC\tA\tG\tA\tC\tT\tC\tC\tT\tT\tC\tG\tA\tG\tA\tT\tC\tC\tG\tA\tT\tC\tT\tC\tT\tT\tC\tC\tT\tA\tT\tG\tA\tC\tT\tC\tT\tC\tG\tA\tG\tT\t'..b'\tG\tG\tA\tA\tG\tG\tA\tC\tT\tC\tG\tG\tA\tT\tA\tG\tT\tT\tC\tA\tG\tA\tC\tT\tA\tC\tT\tC\tT\tT\tC\tA\tG\tT\tC\tA\tG\tT\tC\tG\tA\tA\tG\tA\tG\tA\tG\tG\tA\tC\tT\tA\tG\tG\tA\tA\tG\tC\tT\tC\tT\tT\tC\tC\tT\tA\tT\tT\tC\tC\tT\tA\tG\tG\tA\tC\tT\tT\tC\tT\tC\tG\tA\tC\tA\tG\tT\tT\tC\tT\tC\tT\tC\tT\tA\tA\tT\tC\tA\tA\tT\tA\tG\tA\tG\tA\tG\tA\tG\tC\tT\tA\tG\tC\tT\tA\tG\tA\tG\tA\tG\tA\tT\tA\tC\tT\tG\tG\tA\tA\tG\tT\tC\tT\tA\tG\tT\tT\tG\tA\tG\tT\tC\tA\tG\tT\tC\tC\tA\tC\tT\tC\tA\tT\tC\tT\tC\tA\tG\tC\tT\tT\tC\tT\tA\tA\tG\tC\tT\tA\tG\tG\tA\tT\tA\tG\tA\tA\tT\tA\tG\tA\tC\tT\tC\tC\tT\tG\tC\tG\tA\tT\tA\tG\tA\tC\tT\tA\tG\tT\tC\tC\tT\tG\tA\tC\tA\tG\tT\tG\tA\tC\tA\tA\tT\tC\tG\tG\tA\tA\tG\tG\tT\tA\tG\tG\tA\tC\tT\tT\tC\tG\tA\tC\tT\tA\tG\tG\tT\tA\tT\tT\tA\tA\tC\tC\tG\tC\tT\tG\tA\tG\tA\tA\tG\tA\tG\tC\tG\tT\tA\tG\tT\tC\tT\tG\tC\tA\tC\tT\tG\tT\tC\tT\tA\tA\tG\tC\tG\tC\tT\tG\tC\tT\tC\tC\tG\tG\tC\tC\tT\tG\tA\tT\tG\tG\tA\tA\tG\tT\tG\tC\tG\tC\tT\tC\tT\tT\tC\tA\tG\tA\tT\tA\tG\tT\tC\tA\tT\tC\tT\tC\tT\tG\tA\tC\tT\tG\tA\tG\tA\tA\tG\tC\tT\tT\tC\tT\tC\tA\tG\tT\tC\tG\tA\tT\tC\tA\tG\tA\tG\tC\tT\tG\tA\tA\tT\tA\tT\tT\tC\tT\tC\tG\tA\tT\tC\tA\tG\tG\tC\tC\tT\tG\tA\tA\tG\tT\tC\tG\tA\tA\tC\tA\tG\tG\tT\tT\tC\tG\tC\tC\tT\tT\tA\tC\tT\tC\tA\tA\tT\tC\tT\tG\tA\tC\tT\tT\tC\tG\tA\tT\tC\tT\tC\tT\tC\tC\tG\tA\tT\tT\tC\tC\tT\tG\tA\tA\tC\tG\tA\tT\tG\tG\tA\tT\tC\tG\tC\tC\tT\tA\tT\tC\tT\tA\tG\tA\tT\tT\tC\tA\tG\tC\tT\tG\tA\tC\tT\tG\tA\tC\tT\tT\tC\tC\tT\tC\tT\tG\tA\tG\tA\tA\tC\tC\tT\tG\tA\tC\tT\tT\tA\tC\tT\tC\tT\tT\tC\tT\tC\tG\tA\tC\tT\tG\tA\tG\tC\tA\tC\tG\tA\tT\tC\tT\tG\tG\tA\tC\tT\tG\tA\tT\tC\tT\tG\tC\tA\tC\tT\tC\tT\tT\tC\tC\tT\tT\tC\tA\tG\tC\tG\tT\tA\tC\tG\tA\tC\tA\tG\tA\tG\tC\tT\tG\tA\tG\tC\tA\tT\tT\tC\tC\tT\tT\tA\tA\tT\tT\tC\tC\tT\tT\tA\tC\tT\tG\tA\tC\tT\tG\tA\tA\tG\tT\tC\tC\tG\tG\tA\tG\tA\tT\tC\tT\tC\tA\tG\tC\tT\tT\tG\tC\tT\tA\tG\tC\tT\tA\tG\tT\tC\tT\tC\tT\tC\tA\tG\tG\tA\tG\tA\tC\tG\tC\tT\tT\tG\tA\tG\tC\tA\tC\tT\tA\tG\tC\tA\tT\tC\tA\tT\tT\tA\tG\tC\tA\tG\tT\tA\tT\tC\tC\tT\tC\tA\tT\tC\tT\tG\tT\tC\tA\tG\tA\tG\tC\tA\tC\tT\tA\tT\tA\tG\tG\tT\tT\tC\tG\tA\tC\tT\tG\tA\tG\tA\tC\tT\tC\tT\tG\tA\tG\tT\tG\tA\tT\tC\tT\tC\tT\tA\tC\tT\tC\tT\tA\tT\tC\tT\tG\tA\tA\tG\tG\tA\tG\tA\tG\tC\tA\tG\tT\tC\tC\tG\tC\tT\tC\tT\tG\tA\tC\tT\tC\tT\tG\tA\tA\tT\tC\tT\tG\tA\tA\tG\tT\tA\tT\tG\tG\tA\tG\tA\tC\tT\tG\tA\tT\tA\tG\tC\tC\tT\tG\tA\tG\tA\tT\tC\tC\tT\tG\tA\tT\tC\tT\tC\tG\tA\tC\tT\tG\tA\tG\tA\tG\tA\tT\tC\tA\tG\tT\tG\tA\tC\tC\tT\tT\tA\tT\tC\tC\tA\tG\tC\tG\tA\tC\tT\tG\tA\tC\tT\tC\tT\tT\tA\tC\tG\tT\tA\tT\tG\tG\tA\tT\tA\tC\tT\tC\tT\tA\tC\tG\tC\tA\tC\tG\tA\tA\tT\tA\tG\tA\tG\tT\tC\tG\tT\tC\tT\tC\tT\tT\tC\tA\tG\tT\tC\tA\tG\tT\tG\tA\tT\tT\tC\tC\tT\tG\tA\tA\tG\tG\tT\tT\tC\tA\tG\tG\tA\tA\tC\tA\tT\tA\tC\tT\tA\tC\tT\tA\tG\tG\tC\tT\tG\tG\tT\tG\tT\tG\tT\tT\tC\tA\tG\tG\tC\tT\tG\tT\tA\tA\tT\tA\tC\tT\tC\tC\tT\tG\tC\tC\tA\tC\tT\tA\tG\tA\tG\tA\tC\tT\tA\tA\tG\tT\tG\tA\tC\tT\tG\tG\tA\tT\tC\tT\tC\tA\tG\tG\tA\tT\tG\tC\tA\tA\tG\tG\tT\tG\tT\tC\tT\tC\tT\tT\tA\tG\tA\tT\tG\tA\tT\tT\tG\tC\tT\tC\tT\tA\tG\tG\tA\tG\tA\tG\tA\tA\tC\tC\tT\tG\tA\tT\tA\tA\tG\tG\tA\tA\tT\tT\tG\tG\tC\tA\tG\tT\tG\tG\tT\tT\tC\tA\tC\tG\tA\tT\tA\tT\tC\tG\tA\tC\tG\tA\tG\tG\tA\tG\tT\tC\tT\tG\tT\tC\tG\tA\tT\tG\tA\tG\tC\tG\tA\tA\tG\tG\tA\tT\tC\tG\tT\tC\tT\tA\tT\tT\tC\tA\tT\tT\tC\tT\tC\tA\tC\tT\tC\tG\tA\tT\tC\tC\tT\tG\tA\tG\tA\tC\tT\tC\tG\tG\tA\tG\tA\tA\tG\tC\tT\tG\tA\tT\tC\tT\tG\tA\tT\tA\tG\tA\tG\tC\tT\tT\tC\tG\tT\tA\tC\tT\tA\tT\tA\tG\tT\tC\tT\tC\tT\tT\tA\tA\tC\tG\tA\tC\tA\tC\tT\tA\tG\tG\tA\tA\tG\tC\tT\tC\tG\tC\tT\tC\tT\tG\tA\tT\tA\tC\tA\tT\tC\tC\tT\tA\tG\tT\tC\tG\tC\tC\tG\tC\tT\tC\tT\tC\tT\tG\tA\tC\tG\tG\tA\tA\tT\tA\tT\tG\tA\tC\tG\tG\tT\tA\tG\tC\tT\tA\tG\tA\tG\tC\tA\tG\tT\tC\tT\tA\tT\tA\tG\tA\tG\tC\tG\tG\tA\tA\tG\tC\tT\tC\tT\tG\tC\tG\tA\tC\tT\tA\tG\tT\tC\tT\tC\tA\tG\tT\tC\tG\tA\tC\tT\tA\tC\tC\tG\tC\tT\tA\tG\tA\tG\tC\tT\tC\tA\tG\tA\tT\tC\tA\tG\tC\tT\tT\tC\tG\tA\tC\tA\tA\tG\tA\tT\tT\tC\tA\tG\tC\tT\tC\tA\tT\tA\tG\tT\tT\tA\tG\tA\tT\tC\tT\tG\tC\tT\tG\tA\tG\tA\tG\tT\tG\tA\tG\tA\tG\tT\tG\tC\tC\tT\tC\tT\tA\tC\tA\tG\tG\tA\tC\tT\tC\tT\tC\tT\tC\tT\tC\tT\tT\tC\tC\tA\tC\tT\tG\tA\tG\tA\tG\tA\tG\tA\tG\tT\tT\tC\tT\tC\tG\tT\tA\tG\tG\tA\tA\tT\tT\tC\tT\tC\tA\tT\tA\tC\tA\tG\tT\tC\tC\tT\tG\tA\tG\tA\tA\tG\tC\tT\tT\tC\tT\tC\tG\tT\tG\tA\tT\tC\tC\tT\tA\tG\tT\tC\tT\tC\tA\tG\tC\tT\tA\tC\tT\tG\tG\tA\tA\tG\tC\tT\tA\tG\tC\tT\tT\tA\tT\tA\tC\tT\tG\tA\tA\tG\tA\tG\tG\tA\tC\tT\tG\tA\tC\tT\tT\tC\tC\tG\tA\tT\tA\tC\tC\tA\tT\tA\tC\tT\tT\tA\tC\tA\tT\tC\tA\tT\tC\tG\tT\tC\tT\tC\tG\tA\tT\tA\tT\tA\tT\tA\tT\tC\tG\tA\tA\tG\tA\tG\tG\tA\tC\tT\tT\tA\tC\tT\tG\tT\tG\tC\tT\tG\tG\tA\tT\tC\tG\tC\tG\tC\tC\tT\tC\tT\tT\tC\tA\tT\tG\tA\tG\tA\tG\tA\tA\tC\tA\tC\tA\tG\tC\tG\tT\tC\tT\tC\tT\tG\tC\tT\tA\tG\tG\tT\tC\tT\tC\tT\tC\tA\tC\tT\tT\tG\tA\tG\tT\tC\tC\tT\tT\tC\tT\tG\tG\tA\tC\tT\tG\tC\tC\tA\tT\tC\tT\tC\tT\tA\tT\tC\tG\tA\tG\tC\tA\tT\tG\tA\tA\tG\tT\tA\tA\tG\tC\tT\tG\tT\tA\tG\tC\tT\tA\tG\tT\tC\tT\tC\tT\tC\tA\tG\tT\tC\tA\tG\tG\tT\tC\tT\tT\tC\tT\tC\tA\tG\tC\tT\tT\tG\tC\tT\tC\tT\tC\tT\tC\tT\tG\tA\tG\tC\tC\tT\tG\tT\tT\tA\tC\tT\tA\tC\tT\tA\tG\tC\tA\tC\tG\tA\tA\tG\tC\tT\tA\tG\tC\tA\tT\tG\tG\tA\tA\tC\tA\tG\tT\tC\tG\tC\tG\tA\tC\tT\tC\tG\tA\tG\tC\tT\tC\tT\tT\tC\tG\tA\tC\tA\tT\tC\tT\tG\tA\tG\tG\tA\tG\tA\tA\tT\tA\tG\tC\tT\tC\tT\tG\tA\tG\tA\tG\tA\tA\tG\tT\tC\tT\tC\tA\tT\tT\tC\tT\tC\tA\tT\tA\tG\tG\tA\tT\tG\tA\tT\tG\tT\tT\tG\tA\tG\tT\tC\tA\tG\tA\tT\tA\tG\tA\tT\tC\tT\tC\tG\tC\tA\tC\tT\tG\tA\tG\tA\tG\tC\tC\tA\tG\tA\tA\tT\tT\tA\tA\tT\tC\tG\tC\tA\tA\tG\tC\tA\tC\tG\tG\tA\tG\tT\tC\tT\tT\tA\tA\tG\tG\tA\tC\tT\tT\tC\tC\tT\tA\tT\tC\tT\tA\tG\tT\tC\tC\tG\tC\tT\tA\tG\tA\tC\tT\tA\tA\tC\tA\tG\tG\tC\tA\tT\tA\tG\tC\tT\tT\tG\tT\tG\tA\tG\tA\tG\tT\tA\tC\tG\tG\tC\tC\tT\tT\tC\tT\tC\tC\tT\tT\tC\tG\tA\tT\tC\tT\tC\tT\tC\tC\tA\tC\tT\tC\tT\tT\tC\tT\tC\tA\tC\tA\tG\tC\tT\tG\tT\tT\tC\tT\tC\tA\tG\tT\tA\tC\tT\tT\tC\tC\tT\tA\tG\tC\tT\tC\tT\tC\tT\tC\tT\tC\tT\tC\tT\tG\tA\tA\tC\tC\tT\tC\tT\tC\tT\tG\tA\tG\tA\tG\tA\tC\tT\tT\tC\tC\tT\tT\tC\tA\tG\tA\tG\tC\tT\tT\tG\tT\tA\tG\tA\tC\tT\tG\tA\tA\tG\tG\tA\tC\tT\tA\tT\tA\tT\tT\tC\tT\tG\tA\tC\tG\tA\tG\tT\tC\tG\tC\tT\tT\tC\tT\tC\tT\tC\tT\tC\tG\tA\tC\tT\tT\tA\tG\tA\tG\tA\tA\tG\tA\tG\tT\tG\tG\tC\tT\tC\tC\tG\tT\tC\tC\tA\tG\tA\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae Rooting/Rooting.pl --- a/Rooting/Rooting.pl Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,59 +0,0 @@ -#!/usr/bin/perl - -use strict; -use Switch; -use Getopt::Long; -use Bio::SeqIO; -use Cwd ; -use FindBin qw ( $Bin $Script ); - -my $CURRENT_DIR = $Bin; - -my $ROOTING_EXE = "java -jar ". $CURRENT_DIR . "/Rootings_54.jar"; - -my $usage = qq~Usage:$0 <args> [<opts>] -where <args> are: - -i, --input <newick input> - -o, --output <newick output> -~; -$usage .= "\n"; - -my ($input,$outfile); - - -GetOptions( - "input=s" => \$input, - "output=s" => \$outfile -); - - -die $usage - if ( !$input || !$outfile); - -my $treefile = $input; - - -# replace negative values by 0 -open(T,$treefile); -open(T2,">$treefile.2"); -while(<T>) -{ - my $line = $_; - $line =~s/\-\d+\.*\d*\,/0,/g; - $line =~s/\-\d+\.*\d*\)/0\)/g; - print T2 $line; -} -close(T); -close(T2); - -my $rooting_command = $ROOTING_EXE . " -input $treefile.2 -output $treefile.all -midpoint $treefile.midpoint >>$treefile.rooting.log 2>&1"; -system($rooting_command); - -unlink("$treefile.all"); -unlink("$treefile.2"); -rename("$treefile.midpoint",$outfile); - - - - - |
b |
diff -r 420b57c3c185 -r feb40a9a8eae Rooting/Rootings_54.jar |
b |
Binary file Rooting/Rootings_54.jar has changed |
b |
diff -r 420b57c3c185 -r feb40a9a8eae Rooting/rooting.xml --- a/Rooting/rooting.xml Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,193 +0,0 @@ -<tool id="sniplay_rooting" name="Rooting" version="1.0.2"> - - <!-- [REQUIRED] Tool description displayed after the tool name --> - <description> Midpoint rooting of newick tree </description> - - <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> - <requirements> - <requirement type="binary">perl</requirement> - <requirement type="package">Rootings_54.jar</requirement> - </requirements> - - <!-- [OPTIONAL] Command to be executed to get the tool's version string --> - <version_command> -<!-- - tool_binary -v ---> - </version_command> - - <!-- [REQUIRED] The command to execute --> - <command interpreter="perl"> - Rooting.pl -i $filein -o $fileout && mv ${filein}.rooting.log $fileout_log - </command> - - <!-- [REQUIRED] Input files and tool parameters --> - <inputs> - <param name="filein" type="data" format="txt" optional="false" label="Fasta alignment input" /> - <param name="fileout_label" type="text" value="out tree" label="Output name" help="Output name for files" /> - </inputs> - - <!-- [REQUIRED] Output files --> - <outputs> - <data name="fileout_log" type="data" format="txt" label="${fileout_label}.log" /> - <data name="fileout" type="data" format="txt" label="${fileout_label}" /> - </outputs> - - <!-- [STRONGLY RECOMMANDED] Exit code rules --> - <stdio> - <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> - <exit_code range="1:" level="fatal" /> - </stdio> - - <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> - <tests> - <!-- [HELP] Test files have to be in the ~/test-data directory --> - - <test> - <param name="filein" value="newick" /> - <output name="fileout" file="out_tree" /> - <output name="fileout_log" file="out_tree.log" /> - </test> - - <!-- [HELP] Multiple tests can be defined with different parameters --> -<!-- - <test> - </test> ---> - </tests> - - <!-- [OPTIONAL] Help displayed in Galaxy --> - <help> - - -.. class:: infomark - -**Authors** Jean-François Dufayard, CIRAD, South Green platform - - -.. class:: infomark - -**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique. - -.. class:: infomark - -**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr - -.. class:: infomark - -**Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). - ---------------------------------------------------- - - - - -======= -Rooting -======= - ------------ -Description ------------ - - Compute a midpoint newick rooted tree. - - ------------------ -Workflow position ------------------ - -**Upstream tool** - -=========== ========================== ======= -Name output file(s) format -=========== ========================== ======= -fastme Newick tree Newick -=========== ========================== ======= - - - ----------- -Input file ----------- - -Newick file - - ----------- -Parameters ----------- - -Output name - Output base name for the ouput files - - ------------- -Output files ------------- - -Output_name - Resulting tree rooted in newick format - -Output_name.log - Log file - ------------- -Dependencies ------------- -Rooting - CIRAD tool. Contact jean-francois.dufayard@cirad.fr - ---------------------------------------------------- - ---------------- -Working example ---------------- - -Input files -=========== - -Newick file ------------ - -:: - - (((((((((((((((((((((((((GOGOLEMPUK:0.001198,GOGOLEMPAK:0.002128):0.030378,TREMBESE:0.013258):0.055246,(((JIMBRUKJOL:0.045219,KETANKONIR:0.035298):0.006267, ... - - -Parameters -========== - -Output name -> out tree - - -Output files -============ - -out tree --------- - -:: - - (ref:0.9384270000000001,(((((((((((((((((((((((((((((((((((IRAT257:0.044246,IRAT112:0.023421):0.009006,ARAGUAIA:0.093061):0.004662... - - - </help> - <citations> - <!-- [HELP] As DOI or BibTex entry --> - <citation type="bibtex">@article{Dereeper03062015, -author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, -title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, -year = {2015}, -doi = {10.1093/nar/gkv351}, -abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, -URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, -eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, -journal = {Nucleic Acids Research} -} - - </citation> - - </citations> - -</tool> |
b |
diff -r 420b57c3c185 -r feb40a9a8eae Rooting/rooting.xml~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Rooting/rooting.xml~ Fri Jul 10 10:38:11 2015 -0400 |
[ |
@@ -0,0 +1,193 @@ +<tool id="sniplay_rooting" name="Rooting" version="1.0.2"> + + <!-- [REQUIRED] Tool description displayed after the tool name --> + <description> Midpoint rooting of newick tree </description> + + <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> + <requirements> + <requirement type="binary">perl</requirement> + <requirement type="package">Rootings_54.jar</requirement> + </requirements> + + <!-- [OPTIONAL] Command to be executed to get the tool's version string --> + <version_command> +<!-- + tool_binary -v +--> + </version_command> + + <!-- [REQUIRED] The command to execute --> + <command interpreter="perl"> + Rooting.pl -i $filein -o $fileout && mv ${filein}.rooting.log $fileout_log + </command> + + <!-- [REQUIRED] Input files and tool parameters --> + <inputs> + <param name="filein" type="data" format="txt" optional="false" label="Fasta alignment input" /> + <param name="fileout_label" type="text" value="out tree" label="Output name" help="Output name for files" /> + </inputs> + + <!-- [REQUIRED] Output files --> + <outputs> + <data name="fileout_log" type="data" format="txt" label="${fileout_label}.log" /> + <data name="fileout" type="data" format="txt" label="${fileout_label}" /> + </outputs> + + <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <stdio> + <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> + <exit_code range="1:" level="fatal" /> + </stdio> + + <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> + <tests> + <!-- [HELP] Test files have to be in the ~/test-data directory --> + + <test> + <param name="filein" value="newick" /> + <output name="fileout" file="out_tree" /> + <output name="fileout_log" file="out_tree.log" /> + </test> + + <!-- [HELP] Multiple tests can be defined with different parameters --> +<!-- + <test> + </test> +--> + </tests> + + <!-- [OPTIONAL] Help displayed in Galaxy --> + <help> + + +.. class:: infomark + +**Authors** Jean-François Dufayard, CIRAD, South Green platform + + +.. class:: infomark + +**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique. + +.. class:: infomark + +**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr + +.. class:: infomark + +**Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). + +--------------------------------------------------- + + + + +======= +Rooting +======= + +----------- +Description +----------- + + Compute a midpoint newick rooted tree. + + +----------------- +Workflow position +----------------- + +**Upstream tool** + +=========== ========================== ======= +Name output file(s) format +=========== ========================== ======= +fastme Newick tree Newick +=========== ========================== ======= + + + +---------- +Input file +---------- + +Newick file + + +---------- +Parameters +---------- + +Output name + Output base name for the ouput files + + +------------ +Output files +------------ + +Output_name + Resulting tree rooted in newick format + +Output_name.log + Log file + +------------ +Dependencies +------------ +Rooting + CIRAD tool. Contact jean-francois.dufayard@cirad.fr + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +=========== + +Newick file +----------- + +:: + + (((((((((((((((((((((((((GOGOLEMPUK:0.001198,GOGOLEMPAK:0.002128):0.030378,TREMBESE:0.013258):0.055246,(((JIMBRUKJOL:0.045219,KETANKONIR:0.035298):0.006267, ... + + +Parameters +========== + +Output name -> out tree + + +Output files +============ + +out tree +-------- + +:: + + (ref:0.9384270000000001,(((((((((((((((((((((((((((((((((((IRAT257:0.044246,IRAT112:0.023421):0.009006,ARAGUAIA:0.093061):0.004662... + + + </help> + <citations> + <!-- [HELP] As DOI or BibTex entry --> + <citation type="bibtex">@article{Dereeper03062015, +author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, +title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, +year = {2015}, +doi = {10.1093/nar/gkv351}, +abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, +URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, +eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, +journal = {Nucleic Acids Research} +} + + </citation> + + </citations> + +</tool> |
b |
diff -r 420b57c3c185 -r feb40a9a8eae Rooting/test-data/newick --- a/Rooting/test-data/newick Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ -(((((((((((((((((((((((((GOGOLEMPUK:0.001198,GOGOLEMPAK:0.002128):0.030378,TREMBESE:0.013258):0.055246,(((JIMBRUKJOL:0.045219,KETANKONIR:0.035298):0.006267,KANIRANGA:0.042518):0.020121,(VARYMADINI:0.039903,BENGALYVAK:0.032305):0.012408):0.004740):0.001841,(((YANCAOUSSA:0.006364,PATEBLANCM:0.008832):0.000858,MOROBEREKA:0.004615):0.013114,VARYMALADY:0.026086):0.024153):0.015663,WAB706-3-4:0.050598):0.004174,(MAINTIMOLO:0.044761,(FOHISOMOTR:0.005656,VARYSOMOTR:0.006949):0.030574):0.009217):0.006248,IRAT364:0.047033):0.003654,((IRAT366:0.033773,CIRAD358:0.093382):0.015822,(((MANANELATR:0.040437,(ARROZCEBAD:0.027110,GRAZI:0.022678):0.000763):0.009870,KOMOJAMANI:0.049216):0.006713,((TSIPALA89:-0.000887,VARYLAVADE:0.037778):0.044182,MITSANGANA:0.082664):0.068968):0.000971):0.009442):0.002985,(((((((((JUMALI:0.077650,LAMBAYQUE1:0.170057):0.027742,(GOMPA2:0.049418,KAKANI2:0.052752):0.024526):0.081123,JUMULA2:0.075284):0.027607,ref:0.722923):0.015245,((((NPE826:0.019317,NPE253:0.017161):0.033863,((GIZA171:0.023894,NIPPONBARE:0.027998):0.013954,M202:0.053155):0.016691):0.020398,NEPHOAVANG:0.097913):0.011042,BABER:0.155657):0.024483):0.043699,((((((((DAM:0.054359,((((VIETNAM3:0.030951,SENG:0.038876):0.007914,(NABESHI:0.010252,KARASUKARA:0.010553):0.032650):0.012575,VIETNAM1:0.049323):0.003110,(PEHPINUO:0.070737,(CHUAN3:0.020631,CHUAN4:0.024135):0.014450):0.010970):0.020849):0.002227,(((((JAOHAW:0.044517,KHAODAM:0.031174):0.017389,MAHAE:0.041633):0.006695,(KHAOKAPXAN:0.005767,HAWMOM:0.009429):0.031367):0.015885,CHAPHUMA:0.071266):0.005503,KU115:0.043819):0.007631):0.009768,NHTA10:0.061745):0.006241,INDANE:0.079646):0.007679,(((GEMJYAJYAN:0.012403,YANGKUMRED:0.014267):0.008479,DANGREY:0.024493):0.020552,DAWASANRED:0.030369):0.065249):0.006240,CHALOYOE:0.058075):0.012844,NHTA5:0.138764):0.005686,YUNLU7:0.079119):0.012627):0.020665,CUIABANA:0.066182):0.018877,((GANIGI:0.040448,PULULAPA:0.039840):0.020258,GOGO:0.060904):0.005609):0.001416,(((((GUNDILKUNI:0.019719,(CICIHBETON:0.021620,BULUPANDAK:0.016924):0.008554):0.028704,PADIBOENAR:0.052396):0.002882,(REKETMAUN:0.059430,((POENOETHIT:0.042637,KETANLUMBU:0.035696):0.016617,MOLOK:0.055674):0.015506):0.001164):0.024179,PADIKASALL:0.093595):0.063014,RATHAL:0.090729):0.015609):0.008281):0.005079,RT1031-69:0.043520):0.002593,MANDRIRAVI:0.049833):0.005647,IRAT234:0.037316):0.007912,((((EARLYMUTAN:0.015218,(IAC25:0.007986,(DOURADOAGU:0.001563,DOURADOPRE:0.001763):0.010062):0.002688):0.011250,(((IAC47:0.007321,(IAC165:0.000500,VARYLAVA90:0.001650):0.006263):0.003063,HD1-4:0.015932):0.006876,GUARANI:0.013831):0.005817):0.007843,IGUAPECATE:0.036421):0.002898,BICOBRANCO:0.031026):0.008082):0.001457,((((((((IR60080-46:0.102166,IR71525-19:0.037083):0.071364,IR68704-14:0.144370):0.009188,((IR66421-10:0.104309,IR66421-09:0.112363):0.044923,CNA-7_BO_1:0.097756):0.030658):0.043160,(((((((CIRAD403:0.043568,MARAVILHA:0.105095):0.009135,CURINCA:0.140813):0.023139,CIRAD409:0.038172):0.045565,PCT4_SA_4_:0.177389):0.024197,(PCT11_0_0_:0.075858,P5589-1-1-:0.044714):0.022844):0.006949,ESPERANZA:0.077139):0.022033,((CT13582-15:0.086985,((((((IRAT216:0.017956,IR47684-05:0.006483):0.014180,((IR63372-08:0.008257,IR65907-18:0.049347):0.060968,IR65907-20:0.025334):0.004085):0.028888,COLOMBIA1:0.106747):0.009488,WAB56-50:0.048646):0.019308,IDSA77:0.026889):0.022776,WAB56-125:0.057970):0.025793):0.017149,IRAT362:0.030612):0.021338):0.005273):0.007474,((IR65261-19:0.088569,IR63380-16:0.060828):0.017377,ORYZICASAB:0.059793):0.020771):0.010205,((((IRAT2:0.006656,(62667:0.002564,(IRAT177:0.003065,IRAT13:0.006564):0.001422):0.001435):0.014034,IRAT109:0.079089):0.006041,IRAT170:0.013368):0.020262,63-104:0.022825):0.010789):0.003440,(((((KUROKA:0.096901,IRAT144:0.032372):0.040630,CIRAD488:0.062971):0.026055,(IRAT212:0.041268,PRIMAVERA:0.059109):0.012806):0.027141,((CIRAD392:0.025487,IRAT380:0.024721):0.028822,CIRAD394:0.055046):0.034180):0.011060,CAIAPO:0.072220):0.009469):0.006119,((ARAGUAIA:0.084755,(IRAT335:0.077784,IRAT112:0.027346):0.006445):0.007072,IRAT257:0.036873):0.020075):0.002575):0.004673,((IR47686-09:0.045565,IRAT104:0.041351):0.019777,IR71524-44:0.058741):0.008141):0.006303,((OS4:0.021016,OS6:0.015670):0.010953,(CANAROXA:0.018385,KINANDANGP:0.047364):0.022379):0.007502):0.001209,TRESMESES:0.028972):0.002988,PACHOLINHA:0.042509):0.002897,CANELADEFE:0.050931):0.011439,CAAWA/FORT:0.055630):0.006838,((LUDAN:0.054230,((CUBA65:0.037658,(IR53236-27:0.026784,AZUCENA:0.025318):0.020859):0.017556,DAVAO:0.045931):0.005774):0.010335,(BINULAWAN:0.059589,MALAGKITPI:0.062856):0.004216):0.000088):0.004898,DINORADO:0.065206):0.004869,(BAGANANASA:0.049455,BAKUNGH:0.048193):0.021502):0.004581,((SPEAKER:0.057882,(KEDAYAN:0.065817,ARIAS:0.074161):0.019154):0.012584,(TANDUI:0.078177,KETANMENAH:0.079797):0.013169):0.003598):0.031721,PALAWAN:0.032789,KENDINGA5H:0.028863); - |
b |
diff -r 420b57c3c185 -r feb40a9a8eae Rooting/test-data/out_tree --- a/Rooting/test-data/out_tree Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,1 +0,0 @@ -(ref:0.575788,(((((((((((((((((((((((PALAWAN:0.032789,KENDINGA5H:0.028863):0.031721,((SPEAKER:0.057882,(KEDAYAN:0.065817,ARIAS:0.074161):0.019154):0.012584,(TANDUI:0.078177,KETANMENAH:0.079797):0.013169):0.003598):0.004581,(BAGANANASA:0.049455,BAKUNGH:0.048193):0.021502):0.004869,DINORADO:0.065206):0.004898,((LUDAN:0.05423,((CUBA65:0.037658,(IR53236-27:0.026784,AZUCENA:0.025318):0.020859):0.017556,DAVAO:0.045931):0.005774):0.010335,(BINULAWAN:0.059589,MALAGKITPI:0.062856):0.004216):8.8E-5):0.006838,CAAWA/FORT:0.05563):0.011439,CANELADEFE:0.050931):0.002897,PACHOLINHA:0.042509):0.002988,TRESMESES:0.028972):0.001209,((OS4:0.021016,OS6:0.01567):0.010953,(CANAROXA:0.018385,KINANDANGP:0.047364):0.022379):0.007502):0.006303,((IR47686-09:0.045565,IRAT104:0.041351):0.019777,IR71524-44:0.058741):0.008141):0.004673,((((((((IR60080-46:0.102166,IR71525-19:0.037083):0.071364,IR68704-14:0.14437):0.009188,((IR66421-10:0.104309,IR66421-09:0.112363):0.044923,CNA-7_BO_1:0.097756):0.030658):0.04316,(((((((CIRAD403:0.043568,MARAVILHA:0.105095):0.009135,CURINCA:0.140813):0.023139,CIRAD409:0.038172):0.045565,PCT4_SA_4_:0.177389):0.024197,(PCT11_0_0_:0.075858,P5589-1-1-:0.044714):0.022844):0.006949,ESPERANZA:0.077139):0.022033,((CT13582-15:0.086985,((((((IRAT216:0.017956,IR47684-05:0.006483):0.01418,((IR63372-08:0.008257,IR65907-18:0.049347):0.060968,IR65907-20:0.025334):0.004085):0.028888,COLOMBIA1:0.106747):0.009488,WAB56-50:0.048646):0.019308,IDSA77:0.026889):0.022776,WAB56-125:0.05797):0.025793):0.017149,IRAT362:0.030612):0.021338):0.005273):0.007474,((IR65261-19:0.088569,IR63380-16:0.060828):0.017377,ORYZICASAB:0.059793):0.020771):0.010205,((((IRAT2:0.006656,(62667:0.002564,(IRAT177:0.003065,IRAT13:0.006564):0.001422):0.001435):0.014034,IRAT109:0.079089):0.006041,IRAT170:0.013368):0.020262,63-104:0.022825):0.010789):0.00344,(((((KUROKA:0.096901,IRAT144:0.032372):0.04063,CIRAD488:0.062971):0.026055,(IRAT212:0.041268,PRIMAVERA:0.059109):0.012806):0.027141,((CIRAD392:0.025487,IRAT380:0.024721):0.028822,CIRAD394:0.055046):0.03418):0.01106,CAIAPO:0.07222):0.009469):0.006119,((ARAGUAIA:0.084755,(IRAT335:0.077784,IRAT112:0.027346):0.006445):0.007072,IRAT257:0.036873):0.020075):0.002575):0.001457,((((EARLYMUTAN:0.015218,(IAC25:0.007986,(DOURADOAGU:0.001563,DOURADOPRE:0.001763):0.010062):0.002688):0.01125,(((IAC47:0.007321,(IAC165:5.0E-4,VARYLAVA90:0.00165):0.006263):0.003063,HD1-4:0.015932):0.006876,GUARANI:0.013831):0.005817):0.007843,IGUAPECATE:0.036421):0.002898,BICOBRANCO:0.031026):0.008082):0.007912,IRAT234:0.037316):0.005647,MANDRIRAVI:0.049833):0.002593,RT1031-69:0.04352):0.005079,((((((((GOGOLEMPUK:0.001198,GOGOLEMPAK:0.002128):0.030378,TREMBESE:0.013258):0.055246,(((JIMBRUKJOL:0.045219,KETANKONIR:0.035298):0.006267,KANIRANGA:0.042518):0.020121,(VARYMADINI:0.039903,BENGALYVAK:0.032305):0.012408):0.00474):0.001841,(((YANCAOUSSA:0.006364,PATEBLANCM:0.008832):8.58E-4,MOROBEREKA:0.004615):0.013114,VARYMALADY:0.026086):0.024153):0.015663,WAB706-3-4:0.050598):0.004174,(MAINTIMOLO:0.044761,(FOHISOMOTR:0.005656,VARYSOMOTR:0.006949):0.030574):0.009217):0.006248,IRAT364:0.047033):0.003654,((IRAT366:0.033773,CIRAD358:0.093382):0.015822,(((MANANELATR:0.040437,(ARROZCEBAD:0.02711,GRAZI:0.022678):7.63E-4):0.00987,KOMOJAMANI:0.049216):0.006713,((TSIPALA89:0.0,VARYLAVADE:0.037778):0.044182,MITSANGANA:0.082664):0.068968):9.71E-4):0.009442):0.002985):0.008281,(((((GUNDILKUNI:0.019719,(CICIHBETON:0.02162,BULUPANDAK:0.016924):0.008554):0.028704,PADIBOENAR:0.052396):0.002882,(REKETMAUN:0.05943,((POENOETHIT:0.042637,KETANLUMBU:0.035696):0.016617,MOLOK:0.055674):0.015506):0.001164):0.024179,PADIKASALL:0.093595):0.063014,RATHAL:0.090729):0.015609):0.001416,((GANIGI:0.040448,PULULAPA:0.03984):0.020258,GOGO:0.060904):0.005609):0.018877,CUIABANA:0.066182):0.020665,((((((((DAM:0.054359,((((VIETNAM3:0.030951,SENG:0.038876):0.007914,(NABESHI:0.010252,KARASUKARA:0.010553):0.03265):0.012575,VIETNAM1:0.049323):0.00311,(PEHPINUO:0.070737,(CHUAN3:0.020631,CHUAN4:0.024135):0.01445):0.01097):0.020849):0.002227,(((((JAOHAW:0.044517,KHAODAM:0.031174):0.017389,MAHAE:0.041633):0.006695,(KHAOKAPXAN:0.005767,HAWMOM:0.009429):0.031367):0.015885,CHAPHUMA:0.071266):0.005503,KU115:0.043819):0.007631):0.009768,NHTA10:0.061745):0.006241,INDANE:0.079646):0.007679,(((GEMJYAJYAN:0.012403,YANGKUMRED:0.014267):0.008479,DANGREY:0.024493):0.020552,DAWASANRED:0.030369):0.065249):0.00624,CHALOYOE:0.058075):0.012844,NHTA5:0.138764):0.005686,YUNLU7:0.079119):0.012627):0.043699,((((NPE826:0.019317,NPE253:0.017161):0.033863,((GIZA171:0.023894,NIPPONBARE:0.027998):0.013954,M202:0.053155):0.016691):0.020398,NEPHOAVANG:0.097913):0.011042,BABER:0.155657):0.024483):0.015245,(((JUMALI:0.07765,LAMBAYQUE1:0.170057):0.027742,(GOMPA2:0.049418,KAKANI2:0.052752):0.024526):0.081123,JUMULA2:0.075284):0.027607):0.14713500000000007); |
b |
diff -r 420b57c3c185 -r feb40a9a8eae SNP_density/CalculateSlidingWindowsSNPdensitiesFromHapmap.pl --- a/SNP_density/CalculateSlidingWindowsSNPdensitiesFromHapmap.pl Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,124 +0,0 @@ -#!/usr/bin/perl - -use strict; -use Switch; -use Getopt::Long; -use Bio::SeqIO; - -my $usage = qq~Usage:$0 <args> [<opts>] -where <args> are: - -i, --input <Hapmap input> - -o, --out <output in tabular format> - -s, --step <step (in bp)> -~; -$usage .= "\n"; - -my ($input,$out,$step); - -GetOptions( - "input=s" => \$input, - "out=s" => \$out, - "step=s" => \$step, -); - - -die $usage - if ( !$input || !$step || !$out ); - -my $max_chr_num = 100; - -my %counts; -my %counts_by_ind; -open(my $HAPMAP,$input); -my $headers= <$HAPMAP>; -$headers=~s/\n//g; -$headers=~s/\r//g; -my @ind_names = split(/\t/,$headers); -my @individual_names; -for (my $i = 12; $i <= $#ind_names; $i++) -{ - push(@individual_names,$ind_names[$i]); -} -my %maximums; -while(<$HAPMAP>) -{ - my $line = $_; - $line=~s/\n//g; - $line=~s/\r//g; - my @infos = split(/\t/,$line); - my $chrom = $infos[2]; - my $position = $infos[3]; - if ($position > $maximums{$chrom}){$maximums{$chrom}=$position;} - my $classe_position = int($position/$step); - $counts{$chrom}{$classe_position}++; - - my $ref_allele = $infos[11]; - for (my $i = 12; $i <= $#infos; $i++) - { - if (!$counts_by_ind{$chrom}{$classe_position}{$i}){$counts_by_ind{$chrom}{$classe_position}{$i} = 0;} - if ($infos[$i] ne $ref_allele) - { - $counts_by_ind{$chrom}{$classe_position}{$i}++; - } - } -} -close($HAPMAP); - -####################################################### -# global -####################################################### -open(my $OUT,">$out"); -print $OUT "Chromosome Position SNPs\n"; -my $chr_num = 0; -foreach my $chrom(sort keys(%counts)) -{ - $chr_num++; - my $ref_counts = $counts{$chrom}; - my %final_counts = %$ref_counts; - my $x = 0; - #foreach my $classe_position(sort {$a<=>$b} keys(%final_counts)) - for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++) - { - my $nb = 0; - if ($counts{$chrom}{$classe_position}) - { - $nb = $counts{$chrom}{$classe_position}; - } - $x += $step; - print $OUT "$chrom $x $nb\n"; - } - if ($chr_num >= $max_chr_num){last;} -} -close($OUT); - -####################################################### -# For each individual -####################################################### -open(my $OUT2,">$out.by_sample"); -$chr_num = 0; -print $OUT2 "Chromosome ".join("\t",@individual_names) . "\n"; -foreach my $chrom(sort keys(%counts_by_ind)) -{ - $chr_num++; - my $ref_counts = $counts_by_ind{$chrom}; - my %final_counts = %$ref_counts; - for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++) - { - print $OUT2 "$chrom"; - my $num_ind = 12; - foreach my $indiv(@individual_names) - { - my $val = 0; - - if ($counts_by_ind{$chrom}{$classe_position}{$num_ind}) - { - $val = $counts_by_ind{$chrom}{$classe_position}{$num_ind}; - } - print $OUT2 " $val"; - $num_ind++; - } - print $OUT2 "\n"; - } - if ($chr_num >= $max_chr_num){last;} -} -close($OUT2); |
b |
diff -r 420b57c3c185 -r feb40a9a8eae SNP_density/calculateSlidingWindowsSNPdensitiesFromHapMap_wrapper.xml --- a/SNP_density/calculateSlidingWindowsSNPdensitiesFromHapMap_wrapper.xml Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,207 +0,0 @@ -<tool id="sniplay_density" name="SNP density" version="1.2.0"> - - <!-- [REQUIRED] Tool description displayed after the tool name --> - <description> Calculate SNP densities along chromosome from HapMap</description> - - <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> - <requirements> - <requirement type="binary">perl</requirement> - </requirements> - - <!-- [OPTIONAL] Command to be executed to get the tool's version string --> - <version_command> -<!-- - tool_binary -v ---> - </version_command> - - <!-- [REQUIRED] The command to execute --> - <command interpreter="bash"> - calculateSlidingWindowsSNPdensitiesFromHapmap.sh $filein $fileout $fileout_bysample $step - </command> - - <!-- [REQUIRED] Input files and tool parameters --> - <inputs> - <param name="filein" type="data" format="txt" optional="false" label="Hapmap input" /> - <param name="step" type="integer" value="200000" label="Step" help="Step in bp"/> - <param name="fileout_label" type="text" value="densities" label="Output name" help="Output name for tabular files" /> - </inputs> - - <!-- [REQUIRED] Output files --> - <outputs> - <data name="fileout" type="data" format="tabular" label="${fileout_label}" /> - <data name="fileout_bysample" type="data" format="tabular" label="${fileout_label}.by_sample" /> - </outputs> - - <!-- [STRONGLY RECOMMANDED] Exit code rules --> - <stdio> - <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> - <exit_code range="1:" level="fatal" /> - </stdio> - - <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> - <tests> - <!-- [HELP] Test files have to be in the ~/test-data directory --> -<!-- - <test> - <param name="input" value="input_test.txt" /> - <output name="output" file="output_test.txt" /> - </test> ---> - <!-- [HELP] Multiple tests can be defined with different parameters --> -<!-- - <test> - </test> ---> - </tests> - - <!-- [OPTIONAL] Help displayed in Galaxy --> - <help> - -.. class:: infomark - -**Authors** Dereeper Alexis (alexis.dereeper@ird.fr), IRD, South Green platform - - | **Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). - - -.. class:: infomark - -**Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique. - -.. class:: infomark - -**Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr - ---------------------------------------------------- - - - -============= -SNP densities -============= - ------------ -Description ------------ - - Calculate SNP densities along chromosome from HapMap - - ------------------ -Workflow position ------------------ - -**Upstream tool** - -=============== ====================== =========== -Name output file(s) format -=============== ====================== =========== -VCF to Hapmap Hapmap file hapmap -=============== ====================== =========== - - ----------- -Input file ----------- - -Hapmap file - File with SNPs - - ----------- -Parameters ----------- - -Step - Step in bp for the window to calculate SNP density - -Output name - Output base name for the two ouput files - - ------------- -Output files ------------- - -Output_name - Tabular file with SNP density in each postion - -Output_name.by_sample - Tabular file with SNP density for each sample - - ---------------------------------------------------- - ---------------- -Working example ---------------- - -Input files -=========== - -hapmap file ------------ - -:: - - rs# alleles chrom pos strand assembly# center protLSID assayLSID panelLSID QCcode ref BA58 BA59 BD54 - chr1:1774 [G/T] chr1 1774 Cc01_g00010:47 exon NON_SYNONYMOUS_CODING gCg/gAg A/E 25.0% 4 GG TT TT TT - chr1:1640 [G/A] chr1 1640 Cc01_g00010:127 exon NON_SYNONYMOUS_CODING Ccg/Tcg P/S 37.5% 4 GG GG AA GA - chr1:1629 [A/C] chr1 1629 Cc01_g00010:138 exon SYNONYMOUS_CODING ctT/ctG L/L 37.5% 4 AA CC CC AC - chr1:1628 [C/G] chr1 1628 Cc01_g00010:139 exon NON_SYNONYMOUS_CODING Ggg/Cgg G/R 12.5% 4 CC CC CC CG - chr1:1619 [T/G] chr1 1619 Cc01_g00010:148 exon NON_SYNONYMOUS_CODING Aaa/Caa K/Q 37.5% 4 TT TT GG TG - chr1:1405 [C/T] chr1 1405 Cc01_g00010:362 exon NON_SYNONYMOUS_CODING cGg/cAg R/Q 16.7% 3 CC CC NN CT - -Parameters -========== - -Step -> 200000 - -Output name -> densities - - -Output files -============ - -densities ---------- - -:: - - Chromosome Position SNPs - chr1 200000 355 - chr1 400000 228 - chr1 600000 63 - chr1 800000 191 - - -densities.by_sample -------------------- - -:: - - Chromosome BA58 BA59 BD54 - chr1 220 197 225 - chr1 130 119 133 - chr1 43 43 40 - chr1 139 167 141 - - </help> - <citations> - <!-- [HELP] As DOI or BibTex entry --> - <citation type="bibtex">@article{Dereeper03062015, -author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, -title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, -year = {2015}, -doi = {10.1093/nar/gkv351}, -abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, -URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, -eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, -journal = {Nucleic Acids Research} -} - - </citation> - - </citations> -</tool> |
b |
diff -r 420b57c3c185 -r feb40a9a8eae SNP_density/calculateSlidingWindowsSNPdensitiesFromHapmap.sh --- a/SNP_density/calculateSlidingWindowsSNPdensitiesFromHapmap.sh Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,13 +0,0 @@ -#!/bin/bash - -tool_path=$(dirname $0) -hapmap=$1 -fileout=$2 -fileout_bysample=$3 -step=$4 - -perl $tool_path/CalculateSlidingWindowsSNPdensitiesFromHapmap.pl -i $hapmap -o $fileout -s $step - -cp $fileout.by_sample $fileout_bysample -rm $fileout.by_sample - |
b |
diff -r 420b57c3c185 -r feb40a9a8eae SNP_density/test-data/hapmap --- a/SNP_density/test-data/hapmap Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,3584 +0,0 @@\n-rs#\talleles\tchrom\tpos\tstrand\tassembly#\tcenter\tprotLSID\tassayLSID\tpanelLSID\tQCcode\tref\tBA58\tBA59\tBD54\n-chr1:1774\t[G/T]\tchr1\t1774\tCc01_g00010:47\texon\tNON_SYNONYMOUS_CODING\tgCg/gAg\tA/E\t25.0%\t4\tGG\tTT\tTT\tTT\n-chr1:1640\t[G/A]\tchr1\t1640\tCc01_g00010:127\texon\tNON_SYNONYMOUS_CODING\tCcg/Tcg\tP/S\t37.5%\t4\tGG\tGG\tAA\tGA\n-chr1:1629\t[A/C]\tchr1\t1629\tCc01_g00010:138\texon\tSYNONYMOUS_CODING\tctT/ctG\tL/L\t37.5%\t4\tAA\tCC\tCC\tAC\n-chr1:1628\t[C/G]\tchr1\t1628\tCc01_g00010:139\texon\tNON_SYNONYMOUS_CODING\tGgg/Cgg\tG/R\t12.5%\t4\tCC\tCC\tCC\tCG\n-chr1:1619\t[T/G]\tchr1\t1619\tCc01_g00010:148\texon\tNON_SYNONYMOUS_CODING\tAaa/Caa\tK/Q\t37.5%\t4\tTT\tTT\tGG\tTG\n-chr1:1405\t[C/T]\tchr1\t1405\tCc01_g00010:362\texon\tNON_SYNONYMOUS_CODING\tcGg/cAg\tR/Q\t16.7%\t3\tCC\tCC\tNN\tCT\n-chr1:1398\t[T/G]\tchr1\t1398\tCc01_g00010:369\texon\tSYNONYMOUS_CODING\tgcA/gcC\tA/A\t16.7%\t3\tTT\tTT\tNN\tTG\n-chr1:1394\t[G/C]\tchr1\t1394\tCc01_g00010:373\texon\tNON_SYNONYMOUS_CODING\tCgt/Ggt\tR/G\t16.7%\t3\tGG\tGG\tNN\tGC\n-chr1:1379\t[A/T]\tchr1\t1379\tCc01_g00010:388\texon\tNON_SYNONYMOUS_CODING\tTac/Aac\tY/N\t16.7%\t3\tAA\tAA\tNN\tAT\n-chr1:1352\t[T/C]\tchr1\t1352\tCc01_g00010:415\texon\tNON_SYNONYMOUS_CODING\tAct/Gct\tT/A\t33.3%\t3\tTT\tCC\tNN\tCC\n-chr1:1252\t[T/A]\tchr1\t1252\tCc01_g00010:515\texon\tNON_SYNONYMOUS_CODING\taAg/aTg\tK/M\t12.5%\t4\tTT\tTT\tTT\tTA\n-chr1:17176\t[T/G]\tchr1\t17176\tCc01_g00030:504\texon\tSYNONYMOUS_CODING\tggT/ggG\tG/G\t12.5%\t4\tTT\tTT\tTG\tTT\n-chr1:18041\t[A/G]\tchr1\t18041\tCc01_g00030:661\texon\tNON_SYNONYMOUS_CODING\tAtc/Gtc\tI/V\t37.5%\t4\tAA\tAA\tGG\tAG\n-chr1:18442\t[C/A]\tchr1\t18442\tCc01_g00030:929\texon\tNON_SYNONYMOUS_CODING\ttCc/tAc\tS/Y\t25.0%\t4\tCC\tCC\tCA\tCA\n-chr1:18478\t[T/C]\tchr1\t18478\tCc01_g00030:965\texon\tNON_SYNONYMOUS_CODING\tcTa/cCa\tL/P\t25.0%\t4\tTT\tTT\tTC\tTC\n-chr1:18554\t[T/G]\tchr1\t18554\tCc01_g00030:1041\texon\tSYNONYMOUS_CODING\ttcT/tcG\tS/S\t25.0%\t4\tTT\tTT\tTG\tTG\n-chr1:19062\t[T/A]\tchr1\t19062\tCc01_g00030:1151\texon\tNON_SYNONYMOUS_CODING\ttTc/tAc\tF/Y\t33.3%\t3\tTT\tNN\tTT\tAA\n-chr1:19078\t[A/G]\tchr1\t19078\tCc01_g00030:1167\texon\tSYNONYMOUS_CODING\taaA/aaG\tK/K\t12.5%\t4\tAA\tAA\tAG\tAA\n-chr1:19138\t[T/C]\tchr1\t19138\tCc01_g00030:1227\texon\tSYNONYMOUS_CODING\ttaT/taC\tY/Y\t25.0%\t4\tTT\tTT\tCT\tTC\n-chr1:22125\t[C/T]\tchr1\t22125\tCc01_g00030:1773\texon\tSYNONYMOUS_CODING\tccC/ccT\tP/P\t25.0%\t4\tCC\tCC\tCT\tCT\n-chr1:22212\t[T/C]\tchr1\t22212\tCc01_g00030:1860\texon\tSYNONYMOUS_CODING\tagT/agC\tS/S\t12.5%\t4\tTT\tTT\tTC\tTT\n-chr1:22342\t[T/A]\tchr1\t22342\tCc01_g00030:1990\texon\tNON_SYNONYMOUS_CODING\tTca/Aca\tS/T\t25.0%\t4\tTT\tTT\tTA\tTA\n-chr1:22404\t[C/T]\tchr1\t22404\tCc01_g00030:2052\texon\tSYNONYMOUS_CODING\ttgC/tgT\tC/C\t25.0%\t4\tCC\tCC\tCT\tCT\n-chr1:22478\t[C/G]\tchr1\t22478\tCc01_g00030:2126\texon\tNON_SYNONYMOUS_CODING\taCt/aGt\tT/S\t25.0%\t4\tCC\tGG\tCC\tCC\n-chr1:22613\t[C/T]\tchr1\t22613\tCc01_g00030:2261\texon\tNON_SYNONYMOUS_CODING\tgCg/gTg\tA/V\t25.0%\t4\tCC\tTT\tCC\tCC\n-chr1:22657\t[C/T]\tchr1\t22657\tCc01_g00030:2305\texon\tSYNONYMOUS_CODING\tCta/Tta\tL/L\t12.5%\t4\tCC\tCC\tCT\tCC\n-chr1:22758\t[A/G]\tchr1\t22758\tCc01_g00030:2406\texon\tSYNONYMOUS_CODING\tgaA/gaG\tE/E\t25.0%\t4\tAA\tAA\tAG\tAG\n-chr1:22903\t[C/A]\tchr1\t22903\tCc01_g00030:2551\texon\tNON_SYNONYMOUS_CODING\tCtt/Att\tL/I\t25.0%\t4\tCC\tCC\tCA\tCA\n-chr1:22965\t[G/T]\tchr1\t22965\tCc01_g00030:2613\texon\tNON_SYNONYMOUS_CODING\tatG/atT\tM/I\t12.5%\t4\tGG\tGG\tTG\tGG\n-chr1:23037\t[A/G]\tchr1\t23037\tCc01_g00030:2685\texon\tSYNONYMOUS_CODING\taaA/aaG\tK/K\t25.0%\t4\tAA\tAA\tAG\tAG\n-chr1:24151\t[A/G]\tchr1\t24151\tCc01_g00030:3063\texon\tSYNONYMOUS_CODING\tgcA/gcG\tA/A\t25.0%\t4\tAA\tAA\tAG\tAG\n-chr1:24436\t[T/C]\tchr1\t24436\tCc01_g00030:3240\texon\tSYNONYMOUS_CODING\tagT/agC\tS/S\t25.0%\t4\tTT\tTT\tTC\tTC\n-chr1:25763\t[G/C]\tchr1\t25763\tCc01_g00030:3610\texon\tNON_SYNONYMOUS_CODING\tGta/Cta\tV/L\t25.0%\t4\tGG\tGG\tGC\tGC\n-chr1:26910\t[T/G]\tchr1\t26910\tCc01_g00030:3889\texon\tNON_SYNONYMOUS_CODING\tTct/Gct\tS/A\t25.0%\t4\tTT\tTT\tTG\tTG\n-chr1:26996\t[C/T]\tchr1\t26996\tCc01_g00030:3975\texon\tSYNONYMOUS_CODING\tgaC/gaT\tD/D\t25.0%\t4\tCC\tCC\tCT\tCT\n-chr1:27095\t[A/T]\tchr1\t27095\tCc01_g00030:4074\texon\tSYNONYMOUS_CODING\tccA/ccT\tP/P\t37.5%\t4\tAA\tAA\tTT\tAT\n-chr1:27755\t[C/T]\tchr1\t27755\tCc01_g00030:4734\texon\tSYNONYMOUS_CODING\tagC/agT\tS/S\t25.0%\t4\tCC\tCC\tCT\tCT\n-chr1:30493\t[C/T]\tchr1\t30493\tCc01_g00030:4992\texon\tSYNONYMOUS_CODING\tgaC/gaT\tD/D\t12.5%\t4\tCC\tCC\tCT\tC'..b'n\tSYNONYMOUS_CODING\tacG/acA\tT/T\t25.0%\t4\tCC\tTT\tCC\tCC\n-chr1:3940278\t[G/A]\tchr1\t3940278\tCc01_g02140:861\texon\tSYNONYMOUS_CODING\taaC/aaT\tN/N\t25.0%\t4\tGG\tAA\tGG\tGG\n-chr1:3940190\t[T/C]\tchr1\t3940190\tCc01_g02140:949\texon\tNON_SYNONYMOUS_CODING\tAtc/Gtc\tI/V\t12.5%\t4\tTT\tTT\tTC\tTT\n-chr1:3940062\t[G/A]\tchr1\t3940062\tCc01_g02140:1077\texon\tSYNONYMOUS_CODING\taaC/aaT\tN/N\t37.5%\t4\tGG\tAA\tGA\tAA\n-chr1:3939962\t[C/A]\tchr1\t3939962\tCc01_g02140:1177\texon\tNON_SYNONYMOUS_CODING\tGcc/Tcc\tA/S\t12.5%\t4\tCC\tCA\tCC\tCC\n-chr1:3939959\t[G/T]\tchr1\t3939959\tCc01_g02140:1180\texon\tNON_SYNONYMOUS_CODING\tCtt/Att\tL/I\t25.0%\t4\tGG\tTT\tGG\tGG\n-chr1:3939916\t[C/T]\tchr1\t3939916\tCc01_g02140:1223\texon\tNON_SYNONYMOUS_CODING\taGt/aAt\tS/N\t25.0%\t4\tCC\tTT\tCC\tCC\n-chr1:3938977\t[G/C]\tchr1\t3938977\tCc01_g02140:1335\texon\tSYNONYMOUS_CODING\ttcC/tcG\tS/S\t12.5%\t4\tGG\tGC\tGG\tGG\n-chr1:3938798\t[T/G]\tchr1\t3938798\tCc01_g02140:1369\texon\tNON_SYNONYMOUS_CODING\tAgt/Cgt\tS/R\t37.5%\t4\tTT\tGG\tTG\tTT\n-chr1:3938797\t[C/G]\tchr1\t3938797\tCc01_g02140:1370\texon\tNON_SYNONYMOUS_CODING\taGt/aCt\tS/T\t12.5%\t4\tCC\tCC\tCC\tCG\n-chr1:3938792\t[G/A]\tchr1\t3938792\tCc01_g02140:1375\texon\tSYNONYMOUS_CODING\tCta/Tta\tL/L\t12.5%\t4\tGG\tGG\tGG\tAG\n-chr1:3938784\t[C/T]\tchr1\t3938784\tCc01_g02140:1383\texon\tSYNONYMOUS_CODING\tccG/ccA\tP/P\t25.0%\t4\tCC\tTC\tCT\tCC\n-chr1:3938738\t[C/T]\tchr1\t3938738\tCc01_g02140:1429\texon\tNON_SYNONYMOUS_CODING\tGtt/Att\tV/I\t12.5%\t4\tCC\tCC\tCT\tCC\n-chr1:3938734\t[T/C]\tchr1\t3938734\tCc01_g02140:1433\texon\tNON_SYNONYMOUS_CODING\tcAa/cGa\tQ/R\t25.0%\t4\tTT\tTT\tTC\tCT\n-chr1:3938731\t[A/G]\tchr1\t3938731\tCc01_g02140:1436\texon\tNON_SYNONYMOUS_CODING\tgTg/gCg\tV/A\t12.5%\t4\tAA\tAA\tAG\tAA\n-chr1:3938674\t[C/T]\tchr1\t3938674\tCc01_g02140:1493\texon\tNON_SYNONYMOUS_CODING\taGt/aAt\tS/N\t12.5%\t4\tCC\tCC\tCT\tCC\n-chr1:3950327\t[A/C]\tchr1\t3950327\tCc01_g02150:300\texon\tNON_SYNONYMOUS_CODING\tcaT/caG\tH/Q\t37.5%\t4\tAA\tCC\tAC\tAA\n-chr1:3949508\t[G/T]\tchr1\t3949508\tCc01_g02150:438\texon\tSYNONYMOUS_CODING\ttcC/tcA\tS/S\t12.5%\t4\tGG\tGT\tGG\tGG\n-chr1:3949500\t[T/C]\tchr1\t3949500\tCc01_g02150:446\texon\tNON_SYNONYMOUS_CODING\ttAt/tGt\tY/C\t12.5%\t4\tTT\tTT\tTC\tTT\n-chr1:3949478\t[G/A]\tchr1\t3949478\tCc01_g02150:468\texon\tSYNONYMOUS_CODING\tcgC/cgT\tR/R\t12.5%\t4\tGG\tGG\tGA\tGG\n-chr1:3949470\t[C/T]\tchr1\t3949470\tCc01_g02150:476\texon\tNON_SYNONYMOUS_CODING\taGg/aAg\tR/K\t25.0%\t4\tCC\tTT\tCC\tCC\n-chr1:3949465\t[C/G]\tchr1\t3949465\tCc01_g02150:481\texon\tNON_SYNONYMOUS_CODING\tGgt/Cgt\tG/R\t37.5%\t4\tCC\tGG\tCG\tCC\n-chr1:3949086\t[C/T]\tchr1\t3949086\tCc01_g02150:541\texon\tNON_SYNONYMOUS_CODING\tGtc/Atc\tV/I\t12.5%\t4\tCC\tCC\tCT\tCC\n-chr1:3967535\t[C/T]\tchr1\t3967535\tCc01_g02160:57\texon\tSYNONYMOUS_CODING\tcgG/cgA\tR/R\t12.5%\t4\tCC\tCT\tCC\tCC\n-chr1:3967530\t[A/T]\tchr1\t3967530\tCc01_g02160:62\texon\tNON_SYNONYMOUS_CODING\tcTg/cAg\tL/Q\t12.5%\t4\tAA\tAA\tAA\tAT\n-chr1:3967524\t[G/A]\tchr1\t3967524\tCc01_g02160:68\texon\tNON_SYNONYMOUS_CODING\tcCa/cTa\tP/L\t37.5%\t4\tGG\tAG\tGA\tAG\n-chr1:3967512\t[A/G]\tchr1\t3967512\tCc01_g02160:80\texon\tNON_SYNONYMOUS_CODING\taTa/aCa\tI/T\t12.5%\t4\tAA\tAA\tAG\tAA\n-chr1:3967435\t[A/G]\tchr1\t3967435\tCc01_g02160:157\texon\tNON_SYNONYMOUS_CODING\tTcc/Ccc\tS/P\t25.0%\t4\tAA\tAG\tAA\tAG\n-chr1:3967421\t[G/A]\tchr1\t3967421\tCc01_g02160:171\texon\tSYNONYMOUS_CODING\tacC/acT\tT/T\t12.5%\t4\tGG\tAG\tGG\tGG\n-chr1:3967412\t[A/C]\tchr1\t3967412\tCc01_g02160:180\texon\tSTOP_GAINED\ttaT/taG\tY/0\t37.5%\t4\tAA\tAC\tAC\tAC\n-chr1:3967382\t[A/G]\tchr1\t3967382\tCc01_g02160:210\texon\tSYNONYMOUS_CODING\tccT/ccC\tP/P\t25.0%\t4\tAA\tGA\tGA\tAA\n-chr1:3967354\t[G/T]\tchr1\t3967354\tCc01_g02160:238\texon\tNON_SYNONYMOUS_CODING\tCaa/Aaa\tQ/K\t12.5%\t4\tGG\tGG\tGG\tGT\n-chr1:3967352\t[T/C]\tchr1\t3967352\tCc01_g02160:240\texon\tSYNONYMOUS_CODING\tcaA/caG\tQ/Q\t37.5%\t4\tTT\tCC\tCT\tCC\n-chr1:3967310\t[G/C]\tchr1\t3967310\tCc01_g02160:282\texon\tNON_SYNONYMOUS_CODING\tatC/atG\tI/M\t37.5%\t4\tGG\tGC\tGC\tGC\n-chr1:3967300\t[C/T]\tchr1\t3967300\tCc01_g02160:292\texon\tNON_SYNONYMOUS_CODING\tGat/Aat\tD/N\t12.5%\t4\tCC\tCC\tCC\tTC\n-chr1:3967295\t[G/A]\tchr1\t3967295\tCc01_g02160:297\texon\tSYNONYMOUS_CODING\tatC/atT\tI/I\t12.5%\t4\tGG\tGG\tGA\tGG\n-chr1:3966909\t[T/A]\tchr1\t3966909\tCc01_g02160:683\texon\tNON_SYNONYMOUS_CODING\ttAt/tTt\tY/F\t12.5%\t4\tTT\tTA\tTT\tTT\n-chr1:3966784\t[G/A]\tchr1\t3966784\tCc01_g02160:808\texon\tNON_SYNONYMOUS_CODING\tCtt/Ttt\tL/F\t16.7%\t3\tGG\tNN\tGA\tGG\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae SNP_density/test-data/result.txt --- a/SNP_density/test-data/result.txt Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,21 +0,0 @@ -Chromosome Position SNPs -chr1 200000 355 -chr1 400000 228 -chr1 600000 63 -chr1 800000 191 -chr1 1000000 88 -chr1 1200000 90 -chr1 1400000 190 -chr1 1600000 267 -chr1 1800000 226 -chr1 2000000 163 -chr1 2200000 357 -chr1 2400000 132 -chr1 2600000 158 -chr1 2800000 104 -chr1 3000000 146 -chr1 3200000 331 -chr1 3400000 181 -chr1 3600000 36 -chr1 3800000 167 -chr1 4000000 110 |
b |
diff -r 420b57c3c185 -r feb40a9a8eae SNP_density/test-data/result_bysample.txt --- a/SNP_density/test-data/result_bysample.txt Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,21 +0,0 @@ -Chromosome BA58 BA59 BD54 -chr1 220 197 225 -chr1 130 119 133 -chr1 43 43 40 -chr1 139 167 141 -chr1 62 72 78 -chr1 35 76 38 -chr1 98 123 82 -chr1 133 205 135 -chr1 94 141 151 -chr1 112 116 117 -chr1 228 238 204 -chr1 86 95 96 -chr1 60 106 82 -chr1 60 74 64 -chr1 106 112 115 -chr1 199 220 228 -chr1 151 94 64 -chr1 18 32 27 -chr1 87 90 100 -chr1 54 75 47 |
b |
diff -r 420b57c3c185 -r feb40a9a8eae VCF2Hapmap/VCF2FastaAndHapmap.pl --- a/VCF2Hapmap/VCF2FastaAndHapmap.pl Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,525 +0,0 @@\n-\n-#!/usr/bin/perl\n-\n-use strict;\n-use Getopt::Long;\n-\n-my $usage = qq~Usage:$0 <args> [<opts>]\n-\n-where <args> are:\n-\n- -v, --vcf <VCF input>\n- -o, --out <Output basename>\n- \n-<opts> are:\n-\n- -r, --reference <Reference fasta file>\n- -g, --gff <GFF input file to create alignments of genes>\n-~;\n-$usage .= "\\n";\n-\n-my ($input,$out,$reference,$gff);\n-\n-\n-\n-GetOptions(\n-\t"vcf=s" => \\$input,\n-\t"out=s" => \\$out,\n-\t"reference=s" => \\$reference,\n-\t"gff=s" => \\$gff\n-);\n-\n-\n-die $usage\n- if ( !$input || !$out);\n- \n-if ($gff && !$reference)\n-{\n-\tdie "You must provide a Fasta reference file when providing GFF annotation\\n";\n-}\n-\n- \n-my %ref_sequences; \n-if ($reference)\n-{\n-\tmy $id;\n-\tmy $sequence = "";\n-\topen(my $R,$reference) or die "cannot open file: $reference";\n-\twhile(<$R>)\n-\t{\n-\t\tmy $line =$_;\n-\t\t$line =~s/\\n//g;\n-\t\t$line =~s/\\r//g;\n-\t\tif ($line =~ />([^\\s]+)/){\n-\t\t\t$ref_sequences{$id} = $sequence;\n-\t\t\t$id=$1;$sequence="";\n-\t\t}\n-\t\telse\n-\t\t{\n-\t\t\t$sequence .= $line;\n-\t\t}\n-\t}\n-\tclose($R);\n-\t$ref_sequences{$id} = $sequence;\n-}\n-\n-\n-my %chr_of_gene;\n-my %ann;\n-if ($gff)\n-{\n-\topen(my $G,$gff) or die "cannot open file: $gff";\n-\twhile(<$G>)\n-\t{\n-\t\tmy $line =$_;\n-\t\t$line =~s/\\n//g;\n-\t\t$line =~s/\\r//g;\n-\t\tmy @i = split(/\\t/,$line);\n-\t\tmy $chr = $i[0];\n-\t\tmy $feature = $i[2];\n-\t\tmy $strand = $i[6];\n-\t\tmy $start = $i[3];\n-\t\tmy $stop = $i[4];\n-\t\tmy $inf = $i[8];\n-\t\tif ($feature eq \'gene\')\n-\t\t{\n-\t\t\t if ($inf =~/Name=([\\w\\-\\.]+)[;\\s]*/){$inf = $1;}\n-\t\t\t$ann{$inf}{"start"}=$start;\n-\t\t\t$ann{$inf}{"stop"}=$stop;\n-\t\t\t$ann{$inf}{"strand"}=$strand;\n-\t\t\t$chr_of_gene{$inf} = $chr;\n-\t\t}\n-\t}\n-\tclose($G);\n-}\n-\n-\n-\n-my %IUPAC =\n-(\n- \'[A/G]\'=> "R",\n- \'[G/A]\'=> "R",\n- \'[C/T]\'=> "Y",\n- \'[T/C]\'=> "Y",\n- \'[T/G]\'=> "K",\n- \'[G/T]\'=> "K",\n- \'[C/G]\'=> "S",\n- \'[G/C]\'=> "S",\n- \'[A/T]\'=> "W",\n- \'[T/A]\'=> "W",\n- \'[A/C]\'=> "M",\n- \'[C/A]\'=> "M",\n- \'[C/A/T]\'=> "H",\n- \'[A/T/C]\'=> "H",\n- \'[A/C/T]\'=> "H",\n- \'[C/T/A]\'=> "H",\n- \'[T/C/A]\'=> "H",\n- \'[T/A/C]\'=> "H",\n- \'[C/A/G]\'=> "V",\n- \'[A/G/C]\'=> "V",\n- \'[A/C/G]\'=> "V",\n- \'[C/G/A]\'=> "V",\n- \'[G/C/A]\'=> "V",\n- \'[G/A/C]\'=> "V",\n- \'[C/T/G]\'=> "B",\n- \'[T/G/C]\'=> "B",\n- \'[T/C/G]\'=> "B",\n- \'[C/G/T]\'=> "B",\n- \'[G/C/T]\'=> "B",\n- \'[G/T/C]\'=> "B",\n- \'[T/A/G]\'=> "D",\n- \'[A/G/T]\'=> "D",\n- \'[A/T/G]\'=> "D",\n- \'[T/G/A]\'=> "D",\n- \'[G/T/A]\'=> "D",\n- \'[G/A/T]\'=> "D",\n-);\n-\n-my %snps_of_gene;\n-my %snps_of_gene2;\n-my %indiv_order;\n-my $indiv_list;\n-my %genotyping_infos;\n-my $num_line = 0;\n-my $genename_rank_in_snpeff = 4;\n-\n-my $find_annotations = `grep -c \'EFF=\' $input`;\n-\n-open(my $HAPMAP,">$out.hapmap");\n-print $HAPMAP "rs#\talleles\tchrom\tpos\tgene\tfeature\teffect\tcodon_change\tamino_acid_change\tMAF\tmissing_data";\n-open(my $VCF,$input);\n-while(<$VCF>)\n-{\n-\tmy $line = $_;\n-\tchomp($line);\n-\tmy @infos = split(/\\t/,$line);\n-\t\n-\tif (/^##INFO=\\<ID=EFF/ && /Amino_Acid_length \\| Gene_Name \\| Transcript_BioType \\| Gene_Coding/)\n-\t{\n-\t\t$genename_rank_in_snpeff = 8;\n-\t}\n-\n-\tif (scalar @infos > 9)\n-\t{\n-\t\tif (/#CHROM/)\n-\t\t{\n-\t\t\tfor (my $j=9;$j<=$#infos;$j++)\n-\t\t\t{\n-\t\t\t\tmy $individu = $infos[$j];\n-\t\t\t\t$indiv_list .= "\t$individu";\n-\t\t\t\t$indiv_order{$j} = $individu;\n-\t\t\t}\n-\t\t\tprint $HAPMAP "$indiv_list\\n";\n-\t\t}\n-\t\telsif (!/^#/)\n-\t\t{\n-\t\t\t$num_line++;\n-\n-\t\t\tmy $chromosome = $infos[0];\n-\t\t\tmy $chromosome_position = $infos[1];\n-\t\t\tmy $ref_allele = $infos[3];\n-\t\t\tmy $alt_allele = $infos[4];\n- \t\n-\t\t\tif ($ref_allele =~/\\w\\w+/)\n-\t\t\t{\n-\t\t\t\t$ref_allele = "A";\n-\t\t\t\t$alt_allele = "T";\n-\t\t\t}\n-\t\t\telsif ($alt_allele =~/\\w\\w+/)\n-\t\t\t{\n-\t\t\t\t$ref_allele = "T";\n-\t\t\t\t$alt_allele = "A";\n-\t\t\t}\n-\t\t\t\n-\t\t\tmy $info = $infos[7];\n-\t\t\tmy $is_in_exon = "#";\n-\t\t\tmy $is_synonyme = "#";\n-\t\t\tmy $gene;\n-\t\t\tif ($find_annotations > 1)\n-\t\t\t{\n-'..b'\n-\t\t\tmy $stop = $ann{$seq}{"stop"};\n-\t\t\tmy $strand = $ann{$seq}{"strand"};\n-\t\t\tmy $genelength = $stop - $start+1;\n-\t\t\tmy $chr = $chr_of_gene{$seq};\n-\t\tmy $refseq = substr($ref_sequences{$chr},$start-1,$genelength);\n-\t\tif ($strand eq \'-\')\n-\t\t{\n-\t\t\t$refseq =~ tr /atcgATCG/tagcTAGC/; $refseq = reverse($refseq);\n-\t\t}\t\n-\t\t#print "$seq $chr $start $stop $refseq \\n";\n-\t\tmy $previous = 0;\n-\t\tforeach my $pos(sort {$a<=>$b} keys(%hashreal))\n-\t\t{\n-\t\t\tmy $length = $pos - $previous - 1;\n-\t\t\t$flanking5{$pos} = substr($refseq,$previous,$length);\n-\t\t\t$previous = $pos;\n-\t\t}\n-\t\tmy $length = length($refseq) - $previous;\n-\t\tmy $flanking3 = substr($refseq,$previous,$length);\n-\t\tforeach my $ind(@individuals_list)\n-\t\t{\n-\t\t\tmy $nb_missing_data_for_this_individual = 0;\n-\t\t\tif ($ind)\n-\t\t\t{\n- my $alignment_for_ind = "";\n- my $seq_without_underscore = $seq;\n- $seq_without_underscore =~s/_//g;\n- $alignment_for_ind .= ">$seq_without_underscore" . "_$ind" . "_1\\n";\n- foreach my $pos(sort {$a<=>$b} keys(%hashreal))\n- {\n- $alignment_for_ind .= $flanking5{$pos};\n- my $geno = $snps_of_gene{$seq}{$pos}{$ind};\n- $geno =~s/N/?/g;\n- if ($geno =~/\\?/){$nb_missing_data_for_this_individual++;}\n- my @alleles = split("",$geno);\n- $alignment_for_ind .= $alleles[0];\n- if ($alleles[0] eq $alleles[1])\n- {\n- $alignments_ind{$ind} .= $alleles[1];\n- }\n- else\n- {\n- my $snp_type = "[" . $alleles[0] . "/" . $alleles[1] . "]";\n- $alignments_ind{$ind} .= $IUPAC{$snp_type};\n- }\n- }\n- $alignment_for_ind .= $flanking3;\n-\t\t\t\t\t\t$alignment_for_ind .= "\\n";\n-\t\t\t\n-\t\t\t\n- $alignment_for_ind .= ">$seq_without_underscore" . "_$ind" . "_2\\n";\n- foreach my $pos(sort {$a<=>$b} keys(%hashreal))\n- {\n- $alignment_for_ind .= $flanking5{$pos};\n- my $geno = $snps_of_gene{$seq}{$pos}{$ind};\n- $geno =~s/N/?/g;\n- my @alleles = split("",$geno);\n- $alignment_for_ind .= $alleles[1];\n- }\n- $alignment_for_ind .= $flanking3;\n-\t\t\t\t\t\t$alignment_for_ind .= "\\n";\n- if (keys(%hashreal) != $nb_missing_data_for_this_individual)\n- {\n- print $ALIGN_EGGLIB $alignment_for_ind;\n- }\n-\t\t\t}\n-\t\t}\n-\t}\n-}\n-close($ALIGN_EGGLIB);\n-\n-\n-\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae VCF2Hapmap/vcf2FastaAndHapmap.sh --- a/VCF2Hapmap/vcf2FastaAndHapmap.sh Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,45 +0,0 @@ -#!/bin/bash - -tool_path=$(dirname $0) - - - -filein=$1 -fileout_label=$2 -fileout=$3 -option=$4 - -option_text='' - - -if [ "$option" != "none" ] -then fileout_seq=$5 - fileout_fa1=$6 - filefasta=$7 - if [ "$option" == "fasta_gff" ] - then filegff=$8 - fi -fi - -if [ "$option" == "fasta" ] -then option_text="--reference $filefasta" -fi - -if [ "$option" == "fasta_gff" ] -then option_text="--reference $filefasta --gff $filegff" -fi - - -perl $tool_path/VCF2FastaAndHapmap.pl --vcf $filein --out $fileout_label $option_text - - -cp $fileout_label.hapmap $fileout ; rm $fileout_label.hapmap - -if [ "$option" == "fasta_gff" ] -then cp $fileout_label.flanking.txt $fileout_seq ; rm $fileout_label.flanking.txt ; cp $fileout_label.gene_alignment.fas $fileout_fa1 ; rm $fileout_label.gene_alignment.fas ; -fi - -if [ "$option" == "fasta" ] -then cp $fileout_label.flanking.txt $fileout_seq ; rm $fileout_label.flanking.txt ; -fi - |
b |
diff -r 420b57c3c185 -r feb40a9a8eae VCF2Hapmap/vcf2FastaAndHapmap.xml --- a/VCF2Hapmap/vcf2FastaAndHapmap.xml Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,251 +0,0 @@\n-<tool id="sniplay_vcf2fastaandhapmap" name="VCF to Hapmap" version="1.1.0">\n- \n- <!-- [REQUIRED] Tool description displayed after the tool name -->\n- <description> Convert VCF to Hapmap </description>\n- \n- <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->\n- <requirements>\n- <requirement type="binary">perl</requirement>\n- </requirements>\n- \n- <!-- [OPTIONAL] Command to be executed to get the tool\'s version string -->\n- <version_command>\n-<!--\n- tool_binary -v\n--->\n- </version_command>\n- \n- <!-- [REQUIRED] The command to execute -->\n- <command interpreter="bash">\n-\tvcf2FastaAndHapmap.sh $filein $fileout_label $fileout $optional.file_opt \n-\t#if str( $optional.file_opt ) != "none":\n-\t\t$fileout_seq $fileout_fa1 $filefasta \n-\t\t#if str( $optional.file_opt ) == "fasta_gff":\n-\t\t$filegff\n-\t\t#end if\n-\t#end if\n- </command>\n- \n- <!-- [REQUIRED] Input files and tool parameters -->\n- <inputs>\n-\t<param name="filein" type="data" format="vcf" optional="false" label="VCF input" />\n-\t<param name="fileout_label" type="text" value="input" optional="false" label="Output file basename"/>\n-\t<conditional name="optional" >\n-\t <param name="file_opt" type="select" label="Optional files" >\n-\t \t<option value="none" selected="true">No</option>\n-\t \t<option value="fasta">Fasta</option>\n- \t<option value="fasta_gff">Fasta and GFF</option>\n- </param>\n-\t <when value="none" />\n- <when value="fasta">\n-\t\t<param name="filefasta" type="data" format="fasta" optional="false" label="Fasta file input" />\n- </when>\n-\t <when value="fasta_gff">\n-\t\t<param name="filefasta" type="data" format="fasta" optional="false" label="Fasta file input" />\n-\t\t<param name="filegff" type="data" format="gff" optional="false" label="GFF file input" help="VCF file must be annotated" />\n- </when>\n- </conditional>\n- </inputs>\n- \n- <!-- [REQUIRED] Output files -->\n- <outputs>\n-\t<data name="fileout" format="txt" label="${fileout_label}.hapmap" />\n-\t<data name="fileout_seq" format="txt" label="${fileout_label}.flanking.txt">\n-\t\t<filter>(optional[\'file_opt\'] != \'none\')</filter>\n-\t</data>\n-\t<data name="fileout_fa1" format="fasta" label="${fileout_label}.gene_alignment.fas">\n-\t\t<filter>(optional[\'file_opt\'] == \'fasta_gff\')</filter>\n-\t</data>\n- </outputs>\n- \n- <!-- [STRONGLY RECOMMANDED] Exit code rules -->\n- <stdio>\n- <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->\n- <exit_code range="1:" level="fatal" />\n- </stdio>\n- \n- <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->\n- <tests>\n- <!-- [HELP] Test files have to be in the ~/test-data directory -->\n- <test>\n- <param name="filein" value="sample.vcf" />\n- <param name="otpional.file_opt" value="none" />\n- <output name="fileout" file="result1.hapmap" />\n- </test>\n-\t<test>\n- <param name="filein" value="sample.vcf" />\n-\t <param name="otpional.file_opt" value="fasta" />\n- <param name="filefasta" value="reference.fa" />\n- <output name="fileout" file="result2.hapmap" />\n- <output name="fileout_seq" file="result2.flanking.txt" />\n- <output name="fileout_fa1" file="result2.gene_alignment.fas" />\n- </test>\n- </tests>\n- \n- <!-- [OPTIONAL] Help displayed in Galaxy -->\n- <help>\n-\n-\n-.. class:: infomark\n-\n-**Authors** Dereeper Alexis (alexis.dereeper@ird.fr), IRD, South Green platform\n-\n- | **Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1).\n-\n-.. class:: infomark\n-\n-**Galaxy integration** Andres Gwendoline, Institut Fran\xc3\xa7ais de Bioinformatique. \n-\n-.. class:: infomark\n-\n-**Support** For any questions, please send an e-mail '..b's fasta file and GFF file.\n-\n-------------\n-Output files\n-------------\n-\n-Hapmap file \n-\tHapmap converted file\n-\n-Additional files \n-\tIf you add fasta and/or GFF file as reference, you obtain 3 more files : One with flanking sequence and a fasta file\n-\n----------------------------------------------------\n-\n----------------\n-Working example\n----------------\n-\n-Input files\n-===========\n-\n-VCF file\n----------\n-\n-::\n-\n-\t#fileformat=VCFv4.1\n-\t#FILTER=<ID=LowQual,Description="Low quality">\n-\t#FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n-\t[...]\n-\tCHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tCATB1\n-\tchr1\t2209\t.\tG\tT\t213.84\t.\tAC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)\tGT:AD:DP:GQ:PL\t1/1:0,7:7:18:242,18,0\n-\n-Fasta file\n-----------\n-\n-\n-::\n-\n-\t>chr1\n-\tCAGTAAAGTTTGCAAAGAGATTCTGGCAAAGTT\n-\n-Parameters\n-==========\n-\n-Output name -> input\n-\n-Optional files -> Fasta\n-\n-\n-Output files\n-============\n-\n-input.hapmap\n-------------\n-\n-::\n-\n- rs#\talleles\tchrom\tpos\tstrand\tassembly#\tcenter\tprotLSID\tassayLSID\tpanelLSID\tQCcode\tCATB1\n-\tchr1:2209\tG/T\tchr1\t2209\t+\tNA\tNA\tNA\tNA\tNA\tNA\tGG\tTT\n-\tchr1:2232\tA/C\tchr1\t2232\t+\tNA\tNA\tNA\tNA\tNA\tNA\tAA\tCC\n-\n-input.flanking.txt\n-------------------\n-\n-::\n-\n-\tchr1-2209,GTCGCATCTGCAGCATATAGCCAACCTTCAACTTGCAGCTAAAACTCATCATCTCTTTCT[G/T]ACTGGCTTAACGATATTGTAAGMTGACTCAGAGGCCCACTTTTTTTTTAAAAATYAGCCT,0,0,0,Project_name,0,diploid,Other,Forward\n-\tchr1-2232,ACCTTCAACTTGCAGCTAAAACTCATCATCTCTTTCTKACTGGCTTAACGATATTGTAAG[A/C]TGACTCAGAGGCCCACTTTTTTTTTAAAAATYAGCCTGTCCCCAGCCGTGCTGACTGGGC,0,0,0,Project_name,0,diploid,Other,Forward\n-\n-input.gene_alignment.fas\n-------------------------\n-\n-::\n-\n-\t>chr1_CATB1_1\n-\tTCCTCAAACTTTCTTCAGCGCCTATGAATACAGCGTGCTATAGTTACGTGGGGCGTTT\n-\n-\t\n- </help>\n-\n- <citations>\n- <!-- [HELP] As DOI or BibTex entry -->\n- \t<citation type="bibtex">@article{Dereeper03062015,\n-author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-Fran\xc3\xa7ois and Ruiz, Manuel}, \n-title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations},\n-year = {2015}, \n-doi = {10.1093/nar/gkv351}, \n-abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, \n-URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, \n-eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, \n-journal = {Nucleic Acids Research} \n-}\n-\n- \t}</citation>\n-\n- </citations>\n- \n-</tool>\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae check_gwas_inputs/CheckGWASInputs.pl --- a/check_gwas_inputs/CheckGWASInputs.pl Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,184 +0,0 @@ -#!/usr/bin/perl - -use strict; -use Switch; -use Getopt::Long; - -my $usage = qq~Usage:$0 <args> [<opts>] -where <args> are: - -h, --hapmap <Hapmap input file> - -t, --trait <Trait input file> - -o, --out <Output base name> -~; -$usage .= "\n"; - -my ($hapmap,$trait,$out); - - -GetOptions( - "trait=s" => \$trait, - "out=s" => \$out, - "hapmap=s" => \$hapmap -); - - -die $usage - if ( !$trait || !$out || !$hapmap); - -my %inds; - -####################################### -# get individuals in trait file -####################################### -my %traits; -my $head_trait = `head -1 $trait`; -open(my $T,$trait); -<$T>; -while(<$T>) -{ - my @infos = split(/\t/,$_); - my $ind = $infos[0]; - $inds{$ind}++; - $traits{$ind} = $_; -} -close($T); -my $nb_ind_trait = scalar keys(%traits); - -####################################### -# get individuals in hapmap file -####################################### -my $line_ind = `head -1 $hapmap`; -chomp($line_ind); -my @infos = split(/\t/,$line_ind); -for (my $i = 11; $i <= $#infos; $i++) -{ - my $ind = $infos[$i]; - $inds{$ind}++; -} -my $nb_ind_hapmap = scalar @infos - 11; - -################################################################# -# create trait output by keeping individuals found in both files -################################################################# -open(my $O,">$out.trait"); -print $O $head_trait; -my $nb_common = 0; -foreach my $ind(keys(%inds)) -{ - my $nb_found = $inds{$ind}; - if ($nb_found == 2) - { - $nb_common++; - print $O $traits{$ind}; - } -} -close($O); - - -##################################################################### -# create hapmap output after keeping individuals found in both files -# and removing monomorphic positions -##################################################################### -open(my $O2,">$out.hapmap"); -my $numline = 0; -my %genotypes; -my %columns_to_keep; -my $nb_monomorphic = 0; -my $not_biallelic = 0; -my $diff_variation = 0; -open(my $H,$hapmap); -while(<$H>) -{ - $numline++; - my $line = $_; - $line =~s/\n//g; - $line =~s/\r//g; - my @infos = split(/\t/,$line); - if ($numline == 1) - { - my @titles; - for (my $i = 0; $i <= 10; $i++) - { - my $title = $infos[$i]; - push(@titles,$title); - } - print $O2 join("\t",@titles); - for (my $i = 11; $i <= $#infos; $i++) - { - my $ind = $infos[$i]; - my $nb_found = $inds{$ind}; - if ($nb_found == 2) - { - print $O2 " $ind"; - $columns_to_keep{$i} = 1; - } - } - print $O2 "\n"; - } - else - { - my $to_be_printed = ""; - my $variation = $infos[1]; - for (my $i = 0; $i <= 10; $i++) - { - my $title = $infos[$i]; - $to_be_printed .= "$title "; - } - my %letters; - for (my $i = 11; $i <= $#infos; $i++) - { - if ($columns_to_keep{$i}) - { - my $genotype = $infos[$i]; - if ($genotype ne 'NN') - { - my ($allele1,$allele2) = split(//,$genotype); - $letters{$allele1}=1; - $letters{$allele2}=1; - } - $to_be_printed .= "$genotype "; - } - } - chop($to_be_printed); - - my $variation_obs = join("/",sort keys(%letters)); - - # print only if polymorphic - if (scalar keys(%letters) < 2) - { - $nb_monomorphic++; - } - elsif (scalar keys(%letters) > 2) - { - $not_biallelic++; - } - else - { - if ($variation ne $variation_obs) - { - $to_be_printed =~s/$variation/$variation_obs/; - $diff_variation++; - } - - print $O2 $to_be_printed . "\n"; - } - } -} -close($H); -close($O2); - -print "==============================================\n"; -print "Individuals\n"; -print "==============================================\n"; -print "Individuals in hapmap file: $nb_ind_hapmap\n"; -print "Individuals in trait file: $nb_ind_trait\n"; -print "Individuals found in both files: $nb_common\n"; -print "==============================================\n"; -print "Markers\n"; -print "==============================================\n"; -print "Discarded markers:\n"; -print "Monomorphic: $nb_monomorphic\n"; -print "Not biallelic: $not_biallelic\n"; -print "Modified markers:\n"; -print "Difference in variation: $diff_variation\n"; - |
b |
diff -r 420b57c3c185 -r feb40a9a8eae check_gwas_inputs/CheckGWASInputs.sh --- a/check_gwas_inputs/CheckGWASInputs.sh Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,17 +0,0 @@ -#!/bin/bash -hapmap=$1 -trait=$2 -out_hapmap=$3 -out_trait=$4 -stats=$5 - -directory=`dirname $0` -mkdir tmpdir$$ -#cp -rf $input tmpdir$$/input - -perl $directory/CheckGWASInputs.pl -h $hapmap -t $trait -o tmpdir$$/out >>$stats 2>&1 - -mv tmpdir$$/out.hapmap $out_hapmap -mv tmpdir$$/out.trait $out_trait - - |
b |
diff -r 420b57c3c185 -r feb40a9a8eae check_gwas_inputs/CheckGWASInputs.xml --- a/check_gwas_inputs/CheckGWASInputs.xml Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,184 +0,0 @@ -<tool id="check_GWAS_inputs" name="Check GWAS Inputs" version="1.0"> - <description>checks concordance between input files for GWAS analysis</description> - <command interpreter="bash">./CheckGWASInputs.sh $hapmap $trait $out_hapmap $out_trait $stats - </command> - <inputs> - <param format="text" name="hapmap" type="data" label="Allelic file in Hapmap format" help="Allelic file in Hapmap format"/> - <param format="text" name="trait" type="data" label="Trait file" help="Phenotypic file"/> - </inputs> - <outputs> - <data format="txt" name="out_hapmap" label="Hapmap output"/> - <data format="txt" name="out_trait" label="Trait output"/> - <data format="txt" name="stats" label="Logfile and statistics"/> - </outputs> - <!-- [STRONGLY RECOMMANDED] Exit code rules --> - <stdio> - <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> - <exit_code range="1:" level="fatal" /> - </stdio> - - <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> - <tests> - <!-- [HELP] Test files have to be in the ~/test-data directory --> - <!-- [HELP] Multiple tests can be defined with different parameters --> -<!-- - <test> - </test> ---> - </tests> - <help> - - <![CDATA[ - - -.. class:: infomark - -**Authors** South Green - - | **Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). - -.. class:: infomark - -**Galaxy integration** South Green. - ---------------------------------------------------- - - -=============== -CheckGWASInputs -=============== - ------------ -Description ------------ - - | CheckGWASInputs checks concordance between input files for GWAS analysis. - - ------------------ -Workflow position ------------------ - -**Upstream tool** - -=============== ====================== =========== -Name output file(s) format -=============== ====================== =========== -VCF to Hapmap Hapmap file hapmap -=============== ====================== =========== - - - - ----------- -Input file ----------- - -Hapmap file - Allelic file in Hapmap format - -Trait file - Phenotypic file - ------------- -Output files ------------- - -Hapmap output - -Trait output - -Logfile and statistics - ---------------------------------------------------- - ---------------- -Working example ---------------- - -Input files -=========== - -Hapmap file ------------ - -:: - - rs# alleles chrom pos strand assembly# center protLSID assayLSID panel QCcode Ind1 Ind2 - SNP1 A/T 1 3102 + assembly NA NA NA speciesname NA AA AA AA - SNP2 A/T 1 4648 + assembly NA NA NA speciesname NA AA AA AA - -Trait file ----------- - -:: - - <Trait> Test - Ind1 -2.9985936006411 - Ind2 -2.68669426456267 - -Output files -============ - -Hapmap output -------------- - -:: - - rs# alleles chrom pos strand assembly# center protLSID assayLSID panel QCcode Ind1 Ind2 Ind3 Ind4 - SNP1 A/T 1 3102 + assembly NA NA NA speciesname NA AA AA AA AA AA - SNP2 A/T 1 4648 + assembly NA NA NA speciesname NA AA AA AA AA TT - - -Trait output ------------- - -:: - - <Trait> Test - Ind429 -26.2142525264157 - Ind373 12.0306115988504 - Ind81 1.98118654229534 - -Logfile and statistics ----------------------- - -:: - - - ============================================== - Individuals - ============================================== - Individuals in hapmap file: 500 - Individuals in trait file: 500 - Individuals found in both files: 500 - ============================================== - Markers - ============================================== - Discarded markers: - Monomorphic: 0 - Not biallelic: 0 - Modified markers: - Difference in variation: 0 - - ]]> - - - </help> - <citations> - <!-- [HELP] As DOI or BibTex entry --> - <citation type="bibtex">@article{Dereeper03062015, -author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, -title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, -year = {2015}, -doi = {10.1093/nar/gkv351}, -abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, -URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, -eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, -journal = {Nucleic Acids Research} -} - - </citation> - - </citations> -</tool> |
b |
diff -r 420b57c3c185 -r feb40a9a8eae check_gwas_inputs/CheckGWASInputs.xml~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/check_gwas_inputs/CheckGWASInputs.xml~ Fri Jul 10 10:38:11 2015 -0400 |
[ |
@@ -0,0 +1,184 @@ +<tool id="check_GWAS_inputs" name="Check GWAS Inputs" version="1.0"> + <description>checks concordance between input files for GWAS analysis</description> + <command interpreter="bash">./CheckGWASInputs.sh $hapmap $trait $out_hapmap $out_trait $stats + </command> + <inputs> + <param format="text" name="hapmap" type="data" label="Allelic file in Hapmap format" help="Allelic file in Hapmap format"/> + <param format="text" name="trait" type="data" label="Trait file" help="Phenotypic file"/> + </inputs> + <outputs> + <data format="txt" name="out_hapmap" label="Hapmap output"/> + <data format="txt" name="out_trait" label="Trait output"/> + <data format="txt" name="stats" label="Logfile and statistics"/> + </outputs> + <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <stdio> + <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> + <exit_code range="1:" level="fatal" /> + </stdio> + + <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> + <tests> + <!-- [HELP] Test files have to be in the ~/test-data directory --> + <!-- [HELP] Multiple tests can be defined with different parameters --> +<!-- + <test> + </test> +--> + </tests> + <help> + + <![CDATA[ + + +.. class:: infomark + +**Authors** South Green + + | **Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). + +.. class:: infomark + +**Galaxy integration** South Green. + +--------------------------------------------------- + + +=============== +CheckGWASInputs +=============== + +----------- +Description +----------- + + | CheckGWASInputs checks concordance between input files for GWAS analysis. + + +----------------- +Workflow position +----------------- + +**Upstream tool** + +=============== ====================== =========== +Name output file(s) format +=============== ====================== =========== +VCF to Hapmap Hapmap file hapmap +=============== ====================== =========== + + + + +---------- +Input file +---------- + +Hapmap file + Allelic file in Hapmap format + +Trait file + Phenotypic file + +------------ +Output files +------------ + +Hapmap output + +Trait output + +Logfile and statistics + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +=========== + +Hapmap file +----------- + +:: + + rs# alleles chrom pos strand assembly# center protLSID assayLSID panel QCcode Ind1 Ind2 + SNP1 A/T 1 3102 + assembly NA NA NA speciesname NA AA AA AA + SNP2 A/T 1 4648 + assembly NA NA NA speciesname NA AA AA AA + +Trait file +---------- + +:: + + <Trait> Test + Ind1 -2.9985936006411 + Ind2 -2.68669426456267 + +Output files +============ + +Hapmap output +------------- + +:: + + rs# alleles chrom pos strand assembly# center protLSID assayLSID panel QCcode Ind1 Ind2 Ind3 Ind4 + SNP1 A/T 1 3102 + assembly NA NA NA speciesname NA AA AA AA AA AA + SNP2 A/T 1 4648 + assembly NA NA NA speciesname NA AA AA AA AA TT + + +Trait output +------------ + +:: + + <Trait> Test + Ind429 -26.2142525264157 + Ind373 12.0306115988504 + Ind81 1.98118654229534 + +Logfile and statistics +---------------------- + +:: + + + ============================================== + Individuals + ============================================== + Individuals in hapmap file: 500 + Individuals in trait file: 500 + Individuals found in both files: 500 + ============================================== + Markers + ============================================== + Discarded markers: + Monomorphic: 0 + Not biallelic: 0 + Modified markers: + Difference in variation: 0 + + ]]> + + + </help> + <citations> + <!-- [HELP] As DOI or BibTex entry --> + <citation type="bibtex">@article{Dereeper03062015, +author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, +title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, +year = {2015}, +doi = {10.1093/nar/gkv351}, +abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, +URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, +eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, +journal = {Nucleic Acids Research} +} + + </citation> + + </citations> +</tool> |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/CalculateDiversityIndexes.pl --- a/egglib/CalculateDiversityIndexes.pl Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,81 +0,0 @@ -#!/usr/bin/perl - -use strict; -use Switch; -use Getopt::Long; -use Bio::SeqIO; - - -my $usage = qq~Usage:$0 <args> [<opts>] -where <args> are: - -i, --input <FASTA input> - -o, --output <output filename> - -d, --directory <directory of egglib package> -~; -$usage .= "\n"; - -my ($infile,$outfile,$dir_exe); - - -GetOptions( - "input=s" => \$infile, - "output=s" => \$outfile, - "directory=s"=> \$dir_exe -); - - -die $usage - if ( !$infile || !$outfile || !$dir_exe); - - -my $EGGSTATS_EXE = "$dir_exe/egglib-2.1.5/bin/eggstats"; - -my %gene_alignments; -my $in = Bio::SeqIO->new(-file => $infile , '-format' => 'Fasta'); -while ( my $seq = $in->next_seq() ) -{ - my $id = $seq -> id(); - my $sequence = $seq -> seq(); - my ($gene,$ind,$num_allele) = split("_",$id); - $gene_alignments{$gene}.= ">$id\n$sequence\n"; -} - -open(OUT,">$outfile"); -foreach my $gene(keys(%gene_alignments)) -{ - open(F,">$gene.egglib_input.fa"); - print F $gene_alignments{$gene}; - close(F); - - my $results_egglib = `$EGGSTATS_EXE $gene.egglib_input.fa`; - - # parse Seqlib output - if ($results_egglib) - { - my %egglig_stats; - my @eggstats = split(/^/,$results_egglib); - foreach my $eggstat(@eggstats) - { - my ($desc,$value) = split(/: /,$eggstat); - chomp($value); - $egglig_stats{$desc} = $value; - } - print OUT "$gene;"; - print OUT $egglig_stats{"Total number of sequences"} . ";"; - print OUT $egglig_stats{"Total number of sites"} . ";"; - print OUT $egglig_stats{"Number of analyzed sites"} . ";"; - print OUT $egglig_stats{"S"} . ";"; - print OUT $egglig_stats{"thetaW"} . ";"; - print OUT $egglig_stats{"Pi"} . ";"; - print OUT $egglig_stats{"D"} . ";"; - print OUT $egglig_stats{"number of haplotypes"} . ";"; - print OUT $egglig_stats{"haplotypes diversity"} . ";"; - print OUT $egglig_stats{"Fay and Wu H"} . ";"; - print OUT $egglig_stats{"Fst"} . ";"; - print OUT $egglig_stats{"Snn"} . ";"; - print OUT "\n"; - unlink("$gene.egglib_input.fa"); - } -} -close(OUT); - |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/CalculateDiversityIndexes.sh --- a/egglib/CalculateDiversityIndexes.sh Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,11 +0,0 @@ -#!/bin/bash -input=$1 -output=$2 -log=$3 - -directory=`dirname $0` - -perl $directory/CalculateDiversityIndexes.pl -i $input -o $output -d $directory >>$log 2>&1 - - - |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/CalculateDiversityIndexes.xml --- a/egglib/CalculateDiversityIndexes.xml Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,175 +0,0 @@ -<tool id="calculate_diversity" name="Diversity by gene" version="2.1.6"> - <description>calculates various diversity indexes with EggLib.</description> - <command interpreter="bash">./CalculateDiversityIndexes.sh $input $output $log - </command> - <inputs> - <param format="fasta" name="input" type="data" label="Fasta alignment" help="..."/> - </inputs> - <outputs> - <data format="txt" name="output" label="Diversity"/> - <data format="txt" name="log" label="Logfile"/> - </outputs> - <!-- [STRONGLY RECOMMANDED] Exit code rules --> - <stdio> - <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> - <exit_code range="1:" level="fatal" /> - </stdio> - - <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> - <tests> - <!-- [HELP] Test files have to be in the ~/test-data directory --> - <!-- [HELP] Multiple tests can be defined with different parameters --> -<!-- - <test> - </test> ---> - </tests> - <help> - - - -.. class:: infomark - -**Authors** EggLib_ - -.. _EggLib: http://egglib.sourceforge.net/ - - | "EggLib: processing, analysis and simulation tools for population genetics and genomics.", **De Mita S. and M. Siol.**, BMC Genet. 2012. 13:27. - -.. class:: infomark - -**Galaxy integration** South Green. - -.. class:: infomark - -**Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). - ---------------------------------------------------- - - - -================== -Diversity by genes -================== - ------------ -Description ------------ - - | Provides various diversity indexes using EggLib library. - | For further informations, please visite the EggLib website_. - -.. _website: http://egglib.sourceforge.net/ - - ------------------ -Workflow position ------------------ - -**Upstream tool** - -=============== ====================== =========== -Name output file(s) format -=============== ====================== =========== -VCF to Hapmap Fasta alignment fasta -=============== ====================== =========== - - - ----------- -Input file ----------- - -Fasta file - Fasta alignment - - - ------------- -Output files ------------- - -Diversity - -Log file - - ------------- -Dependencies ------------- -EggLib - version 2.1.5 - ---------------------------------------------------- - ---------------- -Working example ---------------- - -Input files -=========== - -Fasta file ----------- - -:: - - >LOCOs11g09160_AZUCENA_1 - ATGTGGCGGGCGAGGCGGTGGCCGTGGCCGTGGCGGTCGCAGCGGCGCGCGGCGAGGATGCTCTCGTCGGGTGAGCCAGCGGCGGGGCGGCGGAGGGTGGCGGCGCTGTGGGGGAACGGGGA - CTACGGGCGGCTGGGGATGGGGGCGCTGGAGTCGCGGTGGAGCCCCACGGCGTGCCCCTTCTTCCTCACCGGCCGCCCCGGCGACGACGACGACGACCCGCCCGCCTCCCTCGCCTGCGGCG - GCGCCCACACCCTCTTCCTCACCGGTACTCCTGAATTGAAATCGCCTGCTTGTGTAATTACTCGAGCGAGAGAGAGATTGTGTTGAATCGAGATTAATGTGGGAGTATGTGATTTTTGGCCT - GCTGGTTTTGCTCAGAGAGCGGGCGCGTGTTCGCCGCGGGTCTCAACGACTTCGGGCAGCTCGGGATAGGCTCCTCCGTGACTCATTCCCTGGTACTGAGCTTCTTGTACATCATGCCTCCA - TGTGAAATTTTCATCTACATTGTGAGCCAGCCTACTTTTACACAGTAAGCGAAAGCTGGCTGGACATATCAGAGTTGCAATGGGGATTGACCAAATCAATTCTGACTCCTGTTACATGTTGC - >LOCOs11g09160_AZUCENA_2 - ATGAGGCGGGCGAGGCGGTGGCCGTGGCCGTGGCGGTCGCAGCGGCGAGCGGCGAGGATGCTCTCGTCGGGTGAGCCAGCGGCGGGGCGGCGGAGGGTGGCGGCGCTGTGGGGGAACGGGGA - CTACGGGCGGCTGGGGATGGGGGCGCTGGAGTCGCGGTGGAGCCCCACGGCGTGCCCCTTCTTCCTCACCGGCCGCCCCGGCGACGACGACGACGACCCGCCCGCCTCCCTCGCCTGCGGCG - GCGCCCACACCCTCTTCCTCACCGGTACTCCTGAATTGAAATCGCCAGCTTGTGTAATTACTCGAGCGAGAGAGAGATTGTGTTGAATCGAGATTAATGTGGGAGTATGTGATTTTTGGCCT - GCTGGTTTTGCTCAGAGAGCGGGCGCGTGTTCGCCGCGGGTCTCAACGACTTCGGGCAGCTCGGGATAGGCTCCTCCGTGACTCATTCCCTGGTACTGAGCTTCTTGTACATCATGCCTCCA - TGTGAAATTTTCATCTACATTGTGAGCCAGCCTACTTTTACACAGTAAGCGAAAGCTGGCTGGACATATCAGAGTTGCAATGGGGATTGACCAAATCAATTCTGACTCCTGTTACATGTTGC - >LOCOs11g09160_BULUPANDAK_1 - ATGTGGCGGGCGAGGCGGTGGCCGTGGCCGTGGCGGTCGCAGCGGCGCGCGGCGAGGATGCTCTCGTCGGGTGAGCCAGCGGCGGGGCGGCGGAGGGTGGCGGCGCTGTGGGGGAACGGGGA - CTACGGGCGGCTGGGGATGGGGGCGCTGGAGTCGCGGTGGAGCCCCACGGCGTGCCCCTTCTTCCTCACCGGCCGCCCCGGCGACGACGACGACGACCCGCCCGCCTCCCTCGCCTGCGGCG - GCGCCCACACCCTCTTCCTCACCGGTACTCCTGAATTGAAATCGCCTGCTTGTGTAATTACTCGAGCGAGAGAGAGATTGTGTTGAATCGAGATTAATGTGGGAGTATGTGATTTTTGGCCT - GCTGGTTTTGCTCAGAGAGCGGGCGCGTGTTCGCCGCGGGTCTCAACGACTTCGGGCAGCTCGGGATAGGCTCCTCCGTGACTCATTCCCTGGTACTGAGCTTCTTGTACATCATGCCTCCA - TGTGAAATTTTCATCTACATTGTGAGCCAGCCTACTTTTACACAGTAAGCGAAAGCTGGCTGGACATATCAGAGTTGCAATGGGGATTGACCAAATCAATTCTGACTCCTGTTACATGTTGC - >LOCOs11g09160_BULUPANDAK_2 - ATGAGGCGGGCGAGGCGGTGGCCGTGGCCGTGGCGGTCGCAGCGGCGAGCGGCGAGGATGCTCTCGTCGGGTGAGCCAGCGGCGGGGCGGCGGAGGGTGGCGGCGCTGTGGGGGAACGGGGA - CTACGGGCGGCTGGGGATGGGGGCGCTGGAGTCGCGGTGGAGCCCCACGGCGTGCCCCTTCTTCCTCACCGGCCGCCCCGGCGACGACGACGACGACCCGCCCGCCTCCCTCGCCTGCGGCG - GCGCCCACACCCTCTTCCTCACCGGTACTCCTGAATTGAAATCGCCAGCTTGTGTAATTACTCGAGCGAGAGAGAGATTGTGTTGAATCGAGATTAATGTGGGAGTATGTGATTTTTGGCCT - GCTGGTTTTGCTCAGAGAGCGGGCGCGTGTTCGCCGCGGGTCTCAACGACTTCGGGCAGCTCGGGATAGGCTCCTCCGTGACTCATTCCCTGGTACTGAGCTTCTTGTACATCATGCCTCCA - TGTGAAATTTTCATCTACATTGTGAGCCAGCCTACTTTTACACAGTAAGCGAAAGCTGGCTGGACATATCAGAGTTGCAATGGGGATTGACCAAATCAATTCTGACTCCTGTTACATGTTGC - >LOCOs07g23169_AZUCENA_1 - ACTAGTGAAGTGTTGCCCGTTGGGTTTGGTGGAGTTTGAAGAGCTCTGTCGCCGTCTTCACTCCTCGCAGTCAGACGGGAGTGGACTACTGGAGGGAGAGAGAGGGTGAGCGAGGTGTGGGA - GCTGGTTTTGCTCAGAGAGCGGGCGCGTGTTCGCCGCGGGTCTCAACGACTTCGGGCAGCTCGGGATAGGCTCCTCCGTGACTCATTCCCTGGTACTGAGCTTCTTGTACATCATGCCTCCA - TGTGAAATTTTCATCTACATTGTGAGCCAGCCTACTTTTACACAGTAAGCGAAAGCTGGCTGGACATATCAGAGTTGCAATGGGGATTGACCAAATCAATTCTGACTCCTGTTACATGTTGC - -Output files -============ - -Diversity ---------- - -:: - - LOCOs07g23169;8;10494;10494;2;7.35039e-05;8.16793e-05;0.414213;2;0.428571;0.857143;0;1; - LOCOs11g09160;8;6577;6577;2;0.00011728;0.000130324;0.414213;2;0.428571;0.857143;0;1; - - - </help> - <citations> - <!-- [HELP] As DOI or BibTex entry --> - <citation type="bibtex">@article{Dereeper03062015, -author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, -title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, -year = {2015}, -doi = {10.1093/nar/gkv351}, -abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, -URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, -eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, -journal = {Nucleic Acids Research} -} - - </citation> - - </citations> -</tool> |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/bin/eggstats |
b |
Binary file egglib/egglib-2.1.5/bin/eggstats has changed |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/Align.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/Align.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,378 +0,0 @@\n-/*\r\n- Copyright 2008-2009 St\xc3\xa9phane De Mita, Mathieu Siol\r\n-\r\n- This file is part of the EggLib library.\r\n-\r\n- EggLib is free software: you can redistribute it and/or modify\r\n- it under the terms of the GNU General Public License as published by\r\n- the Free Software Foundation, either version 3 of the License, or\r\n- (at your option) any later version.\r\n-\r\n- EggLib is distributed in the hope that it will be useful,\r\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n- GNU General Public License for more details.\r\n-\r\n- You should have received a copy of the GNU General Public License\r\n- along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n-*/\r\n-\r\n-\r\n-#ifndef EGGLIB_ALIGN_HPP\r\n-#define EGGLIB_ALIGN_HPP\r\n-\r\n-#include "Container.hpp"\r\n-#include "CharMatrix.hpp"\r\n-#include <vector>\r\n-\r\n-/** \\mainpage Summary\r\n- * \r\n- * This is the automatically-generated reference manual of the C++\r\n- * egglib-cpp library. The library is presented as several modules, but\r\n- * note that they are only used to structure the documentation.\r\n- * \r\n- * There is a single namespace (egglib) in which all classes are\r\n- * defined. See an example of programming with egglib-cpp in the\r\n- * EggLib package main documentation. Use "Modules" or "Classes" above\r\n- * to navigate in the library reference manual.\r\n- * \r\n- */\r\n-\r\n-\r\n-/** \\defgroup core core\r\n- *\r\n- * \\brief Central core of the C++ library of Egglib\r\n- *\r\n- * Data storage classes, parsers/formatters and tools, plus exception\r\n- * types.\r\n- * \r\n- */\r\n-\r\n-namespace egglib {\r\n-\r\n-\r\n- /** \\brief Handles a sequence alignment\r\n- *\r\n- * \\ingroup core\r\n- * \r\n- * Creation from a file or string stream should be performed using\r\n- * the class Fasta. Align objects can be created by deep copy from\r\n- * both Align and Container type. In the latter case, the length are\r\n- * artificially equalized by "?" characters. Align objects can be\r\n- * created from a DataMatrix object (and all the way arround) using\r\n- * the specific class DMAConverter.\r\n- *\r\n- * Sequences are represented by two strings (name and sequence) and\r\n- * an integer (group) that can be accessed or modified by index.The\r\n- * order of sequences is guaranteed to be conserved, as if Align was\r\n- * a list of triplets (name, sequence, group).\r\n- *\r\n- * The data matrix is implemented as continuous array (char**) and\r\n- * allows efficient access and modification of data. For very large\r\n- * data matrices you might claim immediately the required memory\r\n- * using the constructor Align(unsigned int, char**).\r\n- * \r\n- */\r\n- class Align : public Container, public CharMatrix {\r\n- public:\r\n- \r\n- /** \\brief Creates an empty alignment\r\n- * \r\n- */\r\n- Align();\r\n-\r\n-\r\n- /** \\brief Creates an alignment from a data matrix.\r\n- * \r\n- * Allows you to create an object from data stored in a char*\r\n- * array. The array\'s dimensions must be passed to the\r\n- * constructor, and as a result there is not need to\r\n- * terminate each sequence by a NULL character.\r\n- * \r\n- * \\param number_of_sequences the number of sequences (the\r\n- * length of the first dimension of the array).\r\n- * \r\n- * \\param alignment_length the length of sequences (the\r\n- * length of all lines of the array).\r\n- * \r\n- * \\param cstring_array the pointer to the data matrix.\r\n- * \r\n- */\r\n- Align(unsigned int number_of_sequences, unsigned int alignment_length, char const * const * const cstring_array);\r\n-\r\n-\r\n- /** \\brief Creates an alignment with given dimensions\r\n- * \r\n- * Allows you to allocate directly a data matrix of a given\r\n- '..b'm position the position in the alignment (column).\r\n- * \\param ch the new character value.\r\n- */\r\n- virtual void set(unsigned int sequence, unsigned position, char ch);\r\n-\r\n-\r\n- /** \\brief Reverse a given column in binary data\r\n- *\r\n- * The specified column must contain only "0" ans "1" characters.\r\n- * "0" is replaced by "1" and all the way around\r\n- * \r\n- */\r\n- void binSwitch(unsigned int pos);\r\n-\r\n-\r\n- /** \\brief Extracts specified positions (columns) of the alignment\r\n- *\r\n- * All the specified sites are extracted in the specified\r\n- * order. This function is suitable for bootstrap (resample\r\n- * allowing redrawing the same site) and permutations.\r\n- * \r\n- * This function doesn\'t perform out-of-bound checking.\r\n- * \r\n- * \\param list_of_sites a vector containing alignment\r\n- * positions.\r\n- * \r\n- * \\return A copy of the object containing the specified\r\n- * set of positions.\r\n- * \r\n- */\r\n- Align vslice(std::vector<unsigned int> list_of_sites);\r\n-\r\n-\r\n- /** \\brief Extracts a range of positions (columns)\r\n- * \r\n- * \\param a the first position.\r\n- * \r\n- * \\param b the index immediately passed the last sequence to\r\n- * extract.\r\n- * \r\n- * \\return A copy of the object containing the specified\r\n- * range of sequences.\r\n- * \r\n- * Positions a to b-1 are extracted, provided that the\r\n- * indices fit in the current length of sequences. To extract\r\n- * all sequences, use align.vslice(0, align.ls()).\r\n- * \r\n- * Note: invalid ranges will be silently supported. If\r\n- * a>=ls or b<=a, an empty object is returned. If b>ns,\r\n- * ls will be substituted to a.\r\n- */\r\n- Align vslice(unsigned int a, unsigned int b);\r\n-\r\n-\r\n- /** \\brief Deletes all the content of the object\r\n- * \r\n- */\r\n- virtual void clear();\r\n-\r\n-\r\n- /** \\brief Same as ns()\r\n- * \r\n- */\r\n- inline unsigned int numberOfSequences() const {\r\n- return _ns;\r\n- }\r\n-\r\n-\r\n- /** \\brief Same as ls()\r\n- * \r\n- */\r\n- inline unsigned int numberOfSites() const {\r\n- return _ls;\r\n- }\r\n-\r\n-\r\n- /** \\brief Gets a group label (insecure)\r\n- * \r\n- */\r\n- inline unsigned int populationLabel(unsigned int sequenceIndex) const {\r\n- return groups[sequenceIndex];\r\n- }\r\n- \r\n- \r\n- /** \\brief Just return the passed value\r\n- *\r\n- */\r\n- inline double sitePosition(unsigned int position) const {\r\n- return (double) position;\r\n- }\r\n-\r\n-\r\n- protected:\r\n- \r\n- /// This function is not available for alignments\r\n- virtual void appendSequence(unsigned int pos, const char* sequence) {}\r\n-\r\n- // Initializer (creates a valid empty alignment)\r\n- virtual void init();\r\n- \r\n- // Makes a deep copy of the specified data matrix - if cstring_array is NULL, then ignores it and pads with ?\'s\r\n- virtual void setFromSource(unsigned int number_of_sequences, unsigned int alignment_length, const char* const * const cstring_array);\r\n-\r\n- // Copies from a Container\r\n- virtual void copyObject(const Container&);\r\n- \r\n- // Copies from an Align\r\n- virtual void copyObject(const Align&);\r\n- \r\n- // Alignment length\r\n- unsigned int _ls;\r\n- };\r\n-}\r\n-\r\n-#endif\r\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/Arg.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/Arg.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,300 +0,0 @@\n-/*\r\n- Copyright 2009-2010 St\xe9phane De Mita, Mathieu Siol\r\n-\r\n- This file is part of the EggLib library.\r\n-\r\n- EggLib is free software: you can redistribute it and/or modify\r\n- it under the terms of the GNU General Public License as published by\r\n- the Free Software Foundation, either version 3 of the License, or\r\n- (at your option) any later version.\r\n-\r\n- EggLib is distributed in the hope that it will be useful,\r\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n- GNU General Public License for more details.\r\n-\r\n- You should have received a copy of the GNU General Public License\r\n- along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n-*/\r\n-\r\n-\r\n-#ifndef EGGLIB_ARG_HPP\r\n-#define EGGLIB_ARG_HPP\r\n-\r\n-\r\n-#include "Current.hpp"\r\n-#include "Edge.hpp"\r\n-#include <string>\r\n-\r\n-\r\n-/** \\defgroup coalesce coalesce\r\n- *\r\n- * \\brief Coalescent simulator\r\n- *\r\n- * The set of classes implements a three-scale coalescent simulator with\r\n- * recombination, and a flexible mutation model. The main classes are\r\n- * Controller (the starting point for generating genealogies), ParamSet\r\n- * (that centralizes parameter specification), the Change hierarchy\r\n- * (that implements demographic change specifications), Arg (ancestral\r\n- * recombination graph; the result of generation a genealogy) and\r\n- * Mutator (that generates genotype data from an ARG).\r\n- * \r\n- */\r\n-\r\n-\r\n-namespace egglib {\r\n- \r\n- class Random;\r\n-\r\n- /** \\brief Ancestral recombination graph\r\n- *\r\n- * \\ingroup coalesce\r\n- * \r\n- * This class stores the ARG (genealogical information). It is\r\n- * progressively built by appropriate (especially regarding to the\r\n- * timing) calls to coal() and recomb() methods. Then it can be\r\n- * used by a mutator class to generates data, or it can also\r\n- * generate newick trees (one tree by non-recombining segment).\r\n- *\r\n- */\r\n- class Arg {\r\n-\r\n- public:\r\n- \r\n- /** \\brief Default constructor\r\n- *\r\n- * Creates a null, useless, object.\r\n- *\r\n- */\r\n- Arg();\r\n-\r\n- \r\n- /** \\brief Object initialization\r\n- * \r\n- * \\param current address of the Current instance used by\r\n- * the simulator.\r\n- * \r\n- * \\param numberOfSegments number of recombining segments.\r\n- * \r\n- */\r\n- void set(Current* current, unsigned int numberOfSegments);\r\n-\r\n- \r\n- /** \\brief Object reset method\r\n- * \r\n- * This method doesn\'t reset all parameters (the number of\r\n- * segments and associated tables are retained, as well as\r\n- * the Edge object pool).\r\n- * \r\n- * \\param current address of the Current instance used by\r\n- * the simulator.\r\n- * \r\n- */\r\n- void reset(Current* current);\r\n-\r\n-\r\n- /** \\brief Standard constructor\r\n- * \r\n- * \\param current address of the Current instance used by\r\n- * the simulator.\r\n- * \r\n- * \\param numberOfSegments number of recombining segments\r\n- *\r\n- */\r\n- Arg(Current* current, unsigned int numberOfSegments);\r\n-\r\n- \r\n- /** \\brief Destructor\r\n- * \r\n- * Clears all Edge instances referenced in the object.\r\n- * \r\n- */\r\n- virtual ~Arg();\r\n- \r\n-\r\n- /** \\brief Gets the current value of the time counter\r\n- * \r\n- */ \r\n- double time() const;\r\n- \r\n-\r\n- /** \\brief Increments the time counter\r\n- * \r\n- */\r\n- void addTime(double increment);\r\n- \r\n-\r\n- /** \\brief Performs a coalescence ev'..b'\n- * \r\n- * The uMRCA is the ultimate Most Recent Common Ancestor,\r\n- * that is the point where the last segment finds its most\r\n- * recent common ancestor. This member will have a meaningful\r\n- * value only if the coalescent process is completed.\r\n- * \r\n- */\r\n- inline double ageUltimateMRCA() const {\r\n- return _time;\r\n- }\r\n- \r\n-\r\n- /** \\brief Age of the MRCA for a given segment\r\n- * \r\n- * The MRCA is the Most Recent Common Ancestor, that is the\r\n- * point where the coalescent process is over (all lineages\r\n- * have coalesced). This member will have a meaningful\r\n- * value only if the coalescent process is completed.\r\n- * \r\n- * Note that the value is cached; it is computed only one\r\n- * upon first call and no again, even if the Arg is modified<\r\n- * \r\n- */\r\n- inline double ageMRCA(unsigned int segmentIndex) {\r\n- return _MRCA[segmentIndex]->bottom;\r\n- }\r\n-\r\n- /** \\brief MRCA for each segment\r\n- * \r\n- * The MRCA is the Most Recent Common Ancestor, that is the\r\n- * point where the coalescent process is over (all lineages\r\n- * have coalesced). This member will have a meaningful\r\n- * value only if the coalescent process is completed.\r\n- * \r\n- * Note that the value is cached; it is computed only one\r\n- * upon first call and no again, even if the Arg is modified\r\n- * \r\n- */\r\n- inline const Edge* MRCA(unsigned int segmentIndex) {\r\n- return _MRCA[segmentIndex];\r\n- }\r\n-\r\n- /// Ultimate MRCA\r\n- \r\n- inline const Edge* uMRCA() {\r\n- return edges[numberOfEdges-1];\r\n- }\r\n- \r\n- \r\n- /// the number of recombining segments\r\n- unsigned int numberOfSegments;\r\n-\r\n- /** \\brief Formats the newick-formatted tree for a segment\r\n- * \r\n- */\r\n- std::string newick(unsigned int segment);\r\n- \r\n-\r\n- /// Number of initial lineages\r\n- unsigned int numberOfSamples;\r\n-\r\n-\r\n- /** \\brief Total tree length (summed over all segments)\r\n- * \r\n- */\r\n- double totalLength;\r\n-\r\n- /** \\brief Segment-specific tree length\r\n- * \r\n- */\r\n- double* segmentLengths;\r\n-\r\n- /// Current number of Edges in the tree (including the MRCA node)\r\n- unsigned int numberOfEdges;\r\n-\r\n- /// Total number of recombination events that occurred\r\n- unsigned int numberOfRecombinationEvents;\r\n- \r\n- /// Set the number of actual sites in all branches\r\n- void set_actualNumberOfSites(unsigned int actualNumberOfSites);\r\n- \r\n- \r\n- private:\r\n- \r\n- /// Copy constructor not available\r\n- Arg(const Arg&) { }\r\n- \r\n- /// Assignment operator not available\r\n- Arg& operator=(const Arg&) { return *this; }\r\n-\r\n- void init_stable_parameters();\r\n- void init_variable_parameters();\r\n- void clear();\r\n- void addEdge(Edge*);\r\n- std::string rnewick(Edge* edge, unsigned int segment, double cache);\r\n-\r\n- Current* current;\r\n- double _time;\r\n- Edge** edges;\r\n- \r\n- void findMRCA(unsigned int segmentIndex);\r\n- void computeTotalLength();\r\n- void computeSegmentLength(unsigned int segmentIndex);\r\n-\r\n- unsigned int* numberOfEdgesPerSegment;\r\n- Edge** _MRCA;\r\n- \r\n- EdgePool edgePool;\r\n- };\r\n-\r\n-}\r\n-\r\n-#endif\r\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/BaseDiversity.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/BaseDiversity.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,169 +0,0 @@ -/* - Copyright 2009 St�phane De Mita, Mathieu Siol - - This file is part of the EggLib library. - - EggLib is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - EggLib is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with EggLib. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef EGGLIB_BASEDIVERSITY_HPP -#define EGGLIB_BASEDIVERSITY_HPP - -#include "CharMatrix.hpp" -#include "SitePolymorphism.hpp" -#include <string> - -/** \defgroup polymorphism polymorphism - * - * \brief Diversity analyses - * - * Two classes are contained in this module: NucleotideDiversity, that - * performs site-centered polymorphism analyses, and HaplotypeDiversity, - * that performs haplotype-centered analyses. The detection of - * polymorphic sites is common to both, through the base class - * BaseDiversity. However this phase must be repeated when stats from - * the two classes are needed. To reduce the computational burden, the - * function reserve() can be use, that directly allocates needed memory - * when the eventual number of polymorphic sites is known prior to - * analysis (even if not precisely). For both classes, a set of - * statistics are computed immediately upon load of a data set. For - * NucleotideDiversity, additional statistics are computed per group - * upon use of the corresponding accessors. This number of operations - * performed several times is strictly limited. This is particularly - * useful when different statistics are needed for a given alignment. - * However, this system allows not computing unnecessary statistics to - * a certain extend. - * - */ - -namespace egglib { - - /** \brief Base class of diversity classes - * - * Mutualizes the analysis of polymorphic sites through the method - * importSites() and related accessors. - * - * \ingroup polymorphism - * - */ - class BaseDiversity { - - public: - - /** \brief Constructor - * - */ - BaseDiversity(); - - /** \brief Destructor - * - */ - virtual ~BaseDiversity(); - - /** \brief Reserve sufficient memory for a given number of - * polymorphic sites. - * - * This method makes importSite function faster when you - * already know how many polymorphic sites to expect, since - * the necessary memory will be allocated prior the screening - * of data. It is possible to use reserve() even if with a - * number of sites that is not matching what importSites() - * will find. - * - * \param numberOfSites a strictly positive integer. - * - */ - virtual void reserve(unsigned int numberOfSites); - - /// Gets a site - const SitePolymorphism* get_site(unsigned int index) const; - - /// Gets a site position - unsigned int get_position(unsigned int index) const; - - /** \brief Predefined mapping string for DNA data - * - */ - static const std::string dnaMapping; - - - /** \brief Predefined mapping string for RNA data - * - */ - static const std::string rnaMapping; - - - /** \brief Predefined mapping string for amino acid data - * - */ - static const std::string aaMapping; - - - /// Clears and re-initializes object - virtual void reset(); - - - protected: - - virtual void init(); - virtual void clear(); - - // - void importSites(CharMatrix& data, bool allowMultipleMutations, - double minimumExploitableData, unsigned int ignoreFrequency, - std::string characterMapping, bool useZeroAsAncestral, - bool ignoreOutgroup); - - // - void analyzeSite(CharMatrix& data, unsigned int index, double maxMissingData, bool ignoreOutgroup); // analyzes a site, adds a Site to the Site container if the site is polymorphic - unsigned int getPopIndex(unsigned int label) const; // returns v_npop if not found - - SitePolymorphism** v_sites; // holder of polymorphic site addresses - bool* v_orientables; // stores whether the sites are orientable or not - unsigned int* v_sitePositions; // stores position of sites - - unsigned int v_reserved; - unsigned int v_ns; // maximum number of sequences analyzed (max of sites' ns) - unsigned int v_S; // number of polymorphic sites - unsigned int v_So; // number of orientable sites - unsigned int v_eta; // number of mutation (whatever multiple) - double v_nseff; // average number of analyzed sequence - unsigned int v_lseff; // number of analyzed sites - double v_nseffo; // average number of analyzed sequences for analyzes with outgroup - unsigned int v_lseffo; // number of analyzed sites for analyzes with outgroup - unsigned int v_npop; // number of populations - unsigned int *v_popLabel; // label of each pop - - // options - bool p_allowMultipleMutations; - double p_minimumExploitableData; - std::string p_characterMapping; - unsigned int p_pos_sep_mapping; - bool p_useZeroAsAncestral; - unsigned int p_ignoreFrequency; - - - - private: - - BaseDiversity(const BaseDiversity& source) { } - - BaseDiversity& operator=(const BaseDiversity& source) { - return *this; - } - - }; -} - -#endif |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/ChangeTypes.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/ChangeTypes.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,428 +0,0 @@\n-/*\r\n- Copyright 2009-2010 St\xe9phane De Mita, Mathieu Siol\r\n-\r\n- This file is part of the EggLib library.\r\n-\r\n- EggLib is free software: you can redistribute it and/or modify\r\n- it under the terms of the GNU General Public License as published by\r\n- the Free Software Foundation, either version 3 of the License, or\r\n- (at your option) any later version.\r\n-\r\n- EggLib is distributed in the hope that it will be useful,\r\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n- GNU General Public License for more details.\r\n-\r\n- You should have received a copy of the GNU General Public License\r\n- along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n-*/\r\n-\r\n-#ifndef EGGLIB_CHANGETYPES_HPP\r\n-#define EGGLIB_CHANGETYPES_HPP\r\n-\r\n-#include "ParamSet.hpp"\r\n-#include "Controller.hpp"\r\n-\r\n-namespace egglib {\r\n-\r\n-/**********************************************************************/\r\n-\r\n- /** \\brief Pure virtual base class for parameter changes\r\n- *\r\n- * \\ingroup coalesce\r\n- *\r\n- */\r\n- class Change {\r\n- public:\r\n- \r\n- /** \\brief Default constructor\r\n- *\r\n- * The default date is 0.\r\n- *\r\n- */\r\n- Change();\r\n- \r\n- /** \\brief Standard constructor\r\n- *\r\n- * \\param date the event date.\r\n- *\r\n- */\r\n- Change(double date);\r\n-\r\n- /// Gets the event date value\r\n- double date() const;\r\n- \r\n- /// Sets the event date value\r\n- void date(double value);\r\n- \r\n- /** \\brief Applies the event\r\n- *\r\n- * \\param paramSet the ParamSet instance to which the Change\r\n- * instance is attached.\r\n- * \\param controller the Controller instance of the\r\n- * simulation.\r\n- *\r\n- */\r\n- virtual void apply(ParamSet* paramSet, Controller* controller) const = 0;\r\n- \r\n- protected:\r\n- double _date;\r\n- \r\n- };\r\n-\r\n-/**********************************************************************/\r\n-\r\n- /** \\brief Pure virtual base class for single parameter changes\r\n- *\r\n- * \\ingroup coalesce\r\n- *\r\n- */\r\n- class SingleParamChange : public Change {\r\n- public:\r\n- \r\n- /** \\brief Default constructor\r\n- *\r\n- * The default date is 0., the default parameter value is 0.\r\n- *\r\n- */\r\n- SingleParamChange();\r\n- \r\n- /** \\brief Standard constructor\r\n- *\r\n- * \\param date the event date.\r\n- * \\param value the parameter value.\r\n- *\r\n- */\r\n- SingleParamChange(double date, double value);\r\n-\r\n- /// Gets the parameter value\r\n- double value() const;\r\n- \r\n- /// Sets the parameter value\r\n- void value(double value);\r\n-\r\n- protected:\r\n- double _value;\r\n-\r\n- };\r\n-\r\n-/**********************************************************************/\r\n-\r\n- /** \\brief Single parameter changes applied to a single population\r\n- *\r\n- * \\ingroup coalesce\r\n- *\r\n- */\r\n- class PopulationParamChange : public SingleParamChange {\r\n- public:\r\n-\r\n- /** \\brief Default constructor\r\n- *\r\n- * The default date is 0., the default parameter value is 0.,\r\n- * the default population is 0\r\n- *\r\n- */\r\n- PopulationParamChange();\r\n- \r\n- /** \\brief Standard constructor\r\n- *\r\n- * \\param date the event date.\r\n- * \\param population the population index.\r\n- * \\param value the parameter value.\r\n- *\r\n- */\r\n- PopulationParamCha'..b' */\r\n- PopulationSplit();\r\n-\r\n- /** \\brief Standard constructor\r\n- *\r\n- * A the time given by date, the specified population is\r\n- * split in two. An additional population (whose index is\r\n- * incremented from the current total number of population)\r\n- * is created and lineages are randomly picked and moved to\r\n- * the new population. The parameter proba gives the\r\n- * probability that a lineage from the population number pop\r\n- * moves instantly to the new population. If proba is 0,\r\n- * the program emulates the creation of an empty population\r\n- * (thinking forward in time, this is a population\r\n- * extinction). In general, forward in time, this is a\r\n- * population fusion.\r\n- * \r\n- * \\param date the date of the event.\r\n- * \\param pop population index.\r\n- * \\param proba the probability that lineages move to the\r\n- * new population.\r\n- *\r\n- */\r\n- PopulationSplit(double date, unsigned int pop, double proba);\r\n- \r\n- void apply(ParamSet* paramSet, Controller* controller) const;\r\n- \r\n- /// Gets the population index\r\n- unsigned int population() const;\r\n- \r\n- /// Sets the population index\r\n- void population(unsigned int);\r\n- \r\n- /// Gets the probability of instant migration\r\n- double probability() const;\r\n-\r\n- /// Sets the probability of instant migration\r\n- void probability(double);\r\n- \r\n- protected:\r\n- unsigned int _population;\r\n- double _probability;\r\n- };\r\n-\r\n-/**********************************************************************/\r\n-\r\n- /** \\brief Change of the migration rate of all population pairs\r\n- *\r\n- * The parameter is the new rate (applied to all population pairs)\r\n- *\r\n- * \\ingroup coalesce\r\n- *\r\n- */\r\n- class AllMigrationRateChange : public SingleParamChange {\r\n- public:\r\n- AllMigrationRateChange(double date, double value) : SingleParamChange(date, value) {}\r\n- void apply(ParamSet* paramSet, Controller* controller) const;\r\n- };\r\n-\r\n-/**********************************************************************/\r\n- \r\n- /** \\brief Change of a single migration rate\r\n- *\r\n- * \\ingroup coalesce\r\n- *\r\n- */\r\n- class SingleMigrationRateChange : public SingleParamChange {\r\n- public:\r\n- /** \\brief Default constructor\r\n- *\r\n- * The default date is 0., the default parameter value is 0.,\r\n- * the default source population is 0, the default\r\n- * destination population 1.\r\n- *\r\n- */\r\n- SingleMigrationRateChange();\r\n-\r\n- /** \\brief Standard constructor\r\n- *\r\n- * \\param date the date of the event.\r\n- * \\param source index of the source population.\r\n- * \\param dest index of the destination population.\r\n- * \\param migr new value of the pairwise migration rate.\r\n- *\r\n- */\r\n- SingleMigrationRateChange(double date, unsigned int source, unsigned int dest, double migr);\r\n- \r\n- /// Gets the source population index\r\n- unsigned source() const;\r\n- \r\n- /// Sets the source population index\r\n- void source(unsigned int);\r\n-\r\n- /// Gets the dest population index\r\n- unsigned dest() const;\r\n- \r\n- /// Sets the dest population index\r\n- void dest(unsigned int);\r\n-\r\n- void apply(ParamSet* paramSet, Controller* controller) const;\r\n- \r\n- protected:\r\n- unsigned int _source;\r\n- unsigned int _dest;\r\n- };\r\n-}\r\n-\r\n-#endif\r\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/CharMatrix.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/CharMatrix.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,70 +0,0 @@ -/* - Copyright 2009 St�phane De Mita, Mathieu Siol - - This file is part of the EggLib library. - - EggLib is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - EggLib is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with EggLib. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef EGGLIB_CHARMATRIX_HPP -#define EGGLIB_CHARMATRIX_HPP - - -namespace egglib { - - /** \brief Interface for classes usable as a square matrix of characters - * - * \ingroup core - * - */ - class CharMatrix { - - public: - - /** \brief Gets number of rows or sequences - * - */ - virtual unsigned int numberOfSequences() const = 0; - - - /** \brief Gets number of columns or sites - * - */ - virtual unsigned int numberOfSites() const = 0; - - - /** \brief Gets the character at a given position - * - * The accessor should be "fast" and does not guarantee to - * perform out-of-bounds checks - * - */ - virtual char character(unsigned int sequence, unsigned int site) const = 0; - - - /** \brief Gets population index - * - */ - virtual unsigned int populationLabel(unsigned int row) const = 0; - - - /** \brief Get site position - * - */ - virtual double sitePosition(unsigned int column) const = 0; - - }; -} - -#endif |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/Consensus.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/Consensus.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,421 +0,0 @@\n-/*\r\n- Copyright 2008-2009 St\xe9phane De Mita, Mathieu Siol\r\n-\r\n- This file is part of the EggLib library.\r\n-\r\n- EggLib is free software: you can redistribute it and/or modify\r\n- it under the terms of the GNU General Public License as published by\r\n- the Free Software Foundation, either version 3 of the License, or\r\n- (at your option) any later version.\r\n-\r\n- EggLib is distributed in the hope that it will be useful,\r\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n- GNU General Public License for more details.\r\n-\r\n- You should have received a copy of the GNU General Public License\r\n- along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n-*/\r\n-\r\n-#ifndef EGGLIB_CONSENSUS_HPP\r\n-#define EGGLIB_CONSENSUS_HPP\r\n-\r\n-#include "Align.hpp"\r\n-#include <sstream>\r\n-#include <string>\r\n-#include <vector>\r\n-\r\n-namespace egglib {\r\n-\r\n- /** \\brief Generates consensus sequences\r\n- *\r\n- * \\ingroup polymorphism\r\n- * \r\n- *\r\n- * A consensus is generated when two sequences have the same name, \r\n- * ignoring everything after the first separator character (by\r\n- * default, "_"). Hence, the names "foo", "foo_goo" and "foo_third"\r\n- * will be treated as identical and the root will be "foo". The root\r\n- * will be used to name the resulting sequence. Note that the\r\n- * class works only for DNA sequences.\r\n- *\r\n- * Symbol convention:\r\n- * - A: adenosine\r\n- * - C: cytosine\r\n- * - G: guanine\r\n- * - T: thymine\r\n- * - M: A or C\r\n- * - R: A or G\r\n- * - W: A or T (weak)\r\n- * - S: C or G (strong)\r\n- * - Y: C or T\r\n- * - K: G or T\r\n- * - B: C or G or T(not A)\r\n- * - D: A or G or T (not C)\r\n- * - H: A or C or T (not G)\r\n- * - V: A or C or G (not T)\r\n- * - N: A or C or G or T\r\n- * - ?: nonsequenced position\r\n- * \r\n- * Other symbols will be treated as ? (lowercase are supported).\r\n- * \r\n- * Rigorous (alias liberal or strong) mode:\r\n- * - If two characters are the same, it is retained whatever it is\r\n- * (A + A = A)\r\n- * - Otherwise:\r\n- * - If one is the missing character (?) the other is retained\r\n- * whatever it is (A + ? = A).\r\n- * - If characters are consistent, that is one contains\r\n- * more information, that one is retained (A + M = A).\r\n- * - If characters are not consistent, the closest \r\n- * generic symbol is retained (A + C = M).\r\n- * .\r\n- * Note that the feedback of inconsistent characters in the\r\n- * outcome is not garanteed.\r\n- * In fact, (A + A + G) will result in R (as expected) but (A +\r\n- * G + A) will result in A, masking the problem.\r\n- * However, the position will indeed be counted as inconsistent.\r\n- * \r\n- * Not rigorous (conservative/weak) mode:\r\n- * - If two characters are the same, it is retained whatever it\r\n- * is (A + A = A).\r\n- * - Otherwise:\r\n- * - If one is ? the other is retained whatever it is (A + ?\r\n- * = A).\r\n- * - Otherwise an inconsistent character (by default, Z) is\r\n- * retained (A + C = Z).\r\n- * \r\n- * Iterative process of consensus:\r\n- * - Each sequence is taken in turn.\r\n- * - Each pair involving the focus sequence is processed and a\r\n- * consensus is generated.\r\n- * - When all pair have been processsed, the consensus already\r\n- * generated are themselves iteratively processed until only one\r\n- * remains.\r\n- * - Note that at each time the last two are taken first.\r\n- * \r\n- * A transparent interface gives access to the data for all steps of\r\n- * the consensus process, as vectors that covers all pairs (including\r\n- * intermediate steps of the iterative procedure described above) as\r\n- * well as singleton sequences. For the latter'..b'\r\n- // Code for disgrement\r\n- char DISAGREEMENT;\r\n- \r\n- public:\r\n- // This class manages relationships different symbols\r\n- class CharacterContainer {\r\n- public:\r\n- // Default value: @\r\n- CharacterContainer();\r\n- \r\n- // Initiates to a given symbol\r\n- CharacterContainer(const char&);\r\n- \r\n- // Assignment operator\r\n- CharacterContainer& operator=(const char&);\r\n- \r\n- // Sets the symbol\r\n- void setValue(char);\r\n- \r\n- // Set the descendants\r\n- void setSons(std::vector<CharacterContainer>);\r\n- \r\n- // Tests whether the symbol is the same\r\n- bool is(CharacterContainer);\r\n- \r\n- // Tests if the query is contained amongst the sons\r\n- bool has(CharacterContainer);\r\n- \r\n- // Tests if the query is contained amongst the sons\r\n- bool has(char);\r\n- \r\n- /* Tests whether the left character has the left one\r\n- * Should be called on the N object only.\r\n- */\r\n- char lhas(CharacterContainer,CharacterContainer);\r\n- \r\n- /* Creates the object with the proper sons\r\n- * Should be called on the N object only.\r\n- */\r\n- CharacterContainer init(char);\r\n- \r\n- // The symbol\r\n- char value;\r\n- \r\n- // The descendants\r\n- std::vector<CharacterContainer> sons;\r\n- };\r\n-\r\n- private:\r\n- // Symbol ?\r\n- CharacterContainer ccQ;\r\n- \r\n- // Symbol A\r\n- CharacterContainer ccA;\r\n- \r\n- // Symbol C\r\n- CharacterContainer ccC;\r\n- \r\n- // Symbol G\r\n- CharacterContainer ccG;\r\n- \r\n- // Symbol T\r\n- CharacterContainer ccT;\r\n- \r\n- // Symbol U\r\n- CharacterContainer ccU;\r\n- \r\n- // Symbol M\r\n- CharacterContainer ccM;\r\n- \r\n- // Symbol R\r\n- CharacterContainer ccR;\r\n- \r\n- // Symbol W\r\n- CharacterContainer ccW;\r\n- \r\n- // Symbol S\r\n- CharacterContainer ccS;\r\n- \r\n- // Symbol Y\r\n- CharacterContainer ccY;\r\n- \r\n- // Symbol K\r\n- CharacterContainer ccK;\r\n- \r\n- // Symbol B\r\n- CharacterContainer ccB;\r\n- \r\n- // Symbol D\r\n- CharacterContainer ccD;\r\n- \r\n- // Symbol H\r\n- CharacterContainer ccH;\r\n- \r\n- // Symbol V\r\n- CharacterContainer ccV;\r\n- \r\n- // Symbol N\r\n- CharacterContainer ccN;\r\n- \r\n- // Symbol -\r\n- CharacterContainer ccGAP;\r\n- };\r\n- };\r\n-}\r\n-\r\n-#endif\r\n-\r\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/Container.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/Container.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b"@@ -1,326 +0,0 @@\n-/*\r\n- Copyright 2008-2009 St\xe9phane De Mita, Mathieu Siol\r\n- \r\n- This file is part of the EggLib library.\r\n-\r\n- EggLib is free software: you can redistribute it and/or modify\r\n- it under the terms of the GNU General Public License as published by\r\n- the Free Software Foundation, either version 3 of the License, or\r\n- (at your option) any later version.\r\n-\r\n- EggLib is distributed in the hope that it will be useful,\r\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n- GNU General Public License for more details.\r\n-\r\n- You should have received a copy of the GNU General Public License\r\n- along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n-*/\r\n-\r\n-\r\n-#ifndef EGGLIB_CONTAINER_HPP\r\n-#define EGGLIB_CONTAINER_HPP\r\n-\r\n-\r\n-namespace egglib {\r\n-\r\n- /** \\brief Handles a set of sequence alignment (aligned or not)\r\n- *\r\n- * \\ingroup core\r\n- * \r\n- * Creation from a file or string stream should be performed using\r\n- * the class Fasta.\r\n- *\r\n- * Sequences are represented by two strings (name and sequence) and\r\n- * an integer (group) that can be accessed or modified by index.The\r\n- * order of sequences is guaranteed to be conserved, as if Container\r\n- * was a list of triplets (name, sequence, group).\r\n- *\r\n- * The data matrix is implemented as continuous arrays (char**) and\r\n- * allows efficient access and modification of data. For very large\r\n- * data matrices you might claim immediately the required memory\r\n- * using the constructor Container(unsigned int, char**).\r\n- *\r\n- */\r\n- class Container {\r\n- \r\n- public:\r\n- \r\n- /** \\brief Creates an empty object\r\n- * \r\n- */\r\n- Container();\r\n- \r\n- \r\n- /** \\brief Copy constructor\r\n- * \r\n- */\r\n- Container(const Container& source);\r\n- \r\n- \r\n- /** \\brief Assignment operator\r\n- * \r\n- */\r\n- Container& operator= (const Container& source);\r\n-\r\n-\r\n- /** \\brief Creates an object from a data matrix\r\n- * \r\n- * Allows you to create an object from data stored in a char*\r\n- * array. The array's size must be passed to the constructor.\r\n- * Since sequences can have different lengths, you need to\r\n- * terminate each sequence by a NULL character. This constructor\r\n- * is dedicated to very performance-critical tasks. For usual\r\n- * tasks, using the default constructor and subsequently adding\r\n- * sequences with addSeq should be enough.\r\n- * \r\n- * \\param number_of_sequences the number of sequences (the length\r\n- * of the first dimension of the array).\r\n- * \r\n- * \\param cstring_array the pointer to the data matrix.\r\n- * \r\n- */\r\n- Container(unsigned int number_of_sequences, char const* const* const cstring_array);\r\n-\r\n-\r\n- /** \\brief Destructor\r\n- * \r\n- */\r\n- virtual ~Container();\r\n-\r\n- \r\n- /** \\brief Clears all content of the object\r\n- * \r\n- */\r\n- virtual void clear();\r\n-\r\n-\r\n- /** \\brief Adds a sequence to the object\r\n- *\r\n- * \\param name the name of the sequence, as a c-string.\r\n- * \\param sequence the sequence string, as a c-string.\r\n- * \\param group the group index of the sequence.\r\n- *\r\n- * \\return The new number of sequences.\r\n- * \r\n- */\r\n- virtual unsigned int append(const char* name, const char* sequence, unsigned int group=0);\r\n- \r\n- \r\n- /** \\brief Removes a sequence from the object\r\n- *\r\n- * \\param pos the index of the seque"..b"r of sequences\r\n- * \r\n- */\r\n- unsigned int ns() const;\r\n- \r\n- \r\n- /** \\brief Gets the length of a given sequence\r\n- * \r\n- * \\param pos the index of the sequence.\r\n- * \r\n- * \\return The length of that particular sequence.\r\n- * \r\n- */\r\n- virtual unsigned int ls(unsigned int pos) const ;\r\n- \r\n- \r\n- /** \\brief Gets the name of the a given sequence\r\n- * \r\n- * \\param pos the index of the sequence.\r\n- * \r\n- * \\return The name of that particular sequence.\r\n- * \r\n- */\r\n- virtual const char* name(unsigned int pos) const;\r\n-\r\n- \r\n- /** \\brief Gets the name of a given sequence\r\n- * \r\n- * \\param pos the index of the sequence.\r\n- * \r\n- * \\return The sequence string for that particular sequence.\r\n- * \r\n- */\r\n- virtual const char* sequence(unsigned int pos) const;\r\n-\r\n-\r\n-\r\n- /** \\brief Gets the group index of a given sequence\r\n- * \r\n- * \\param pos the index of the sequence.\r\n- * \r\n- * \\return The group index of that particular sequence.\r\n- * \r\n- */\r\n- virtual unsigned int group(unsigned int pos) const;\r\n- \r\n- \r\n- /** \\brief Checks if all lengths are equal\r\n- * \r\n- * Returns true if the length of all sequences are equal or\r\n- * if there is less thant two sequences.\r\n- * \r\n- */\r\n- bool isEqual() const;\r\n-\r\n-\r\n- /** \\brief Equalizes sequence lengths\r\n- *\r\n- * Extends sequences as need to ensure that all sequences\r\n- * have the same length.\r\n- *\r\n- * \\param ch the character to use for padding.\r\n- * \r\n- * \\return The final length obtained, which is the length of\r\n- * the longest sequence before the operation.\r\n- * \r\n- */\r\n- unsigned int equalize(char ch='?');\r\n-\r\n- \r\n- /** \\brief Finds a sequence by its name\r\n- * \r\n- * Gets the position of the first sequence with the specified\r\n- * name.\r\n- * \r\n- * \\param string a sequence name.\r\n- * \r\n- * \\param strict if true, seeks an exact match. If false,\r\n- * compares only until the end of the requested name (for\r\n- * example: ATCFF will match ATCFF_01 if strict is false).\r\n- * \r\n- * \\return The lowest index where the name matches, -1 if no\r\n- * sequence has such name.\r\n- * \r\n- */\r\n- int find(const char* string, bool strict=true) const;\r\n-\r\n-\r\n- protected:\r\n- // The number of sequences\r\n- unsigned int _ns;\r\n- \r\n- // The array of name lengths\r\n- unsigned int* lnames;\r\n- \r\n- // The array of names\r\n- char** names;\r\n- \r\n- // The array of sequences (as c-strings)\r\n- char** sequences;\r\n- \r\n- // The array of groups\r\n- unsigned int* groups;\r\n- \r\n- // Imports an array of c-strings\r\n- virtual void setFromSource(unsigned int number_of_sequences, const char* const* const cstring_array);\r\n- \r\n- // Constructor helper\r\n- virtual void copyObject(const Container&);\r\n- \r\n- // Constructor partial helper\r\n- virtual void getNamesAndGroups(const Container&);\r\n- \r\n- private:\r\n- \r\n- // The array of sequence lengths\r\n- unsigned int* lsequences;\r\n- \r\n- // Setup a valid empty object\r\n- virtual void init();\r\n- };\r\n-}\r\n- \r\n-#endif\r\n" |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/Controller.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/Controller.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,156 +0,0 @@ -/* - Copyright 2009-2010 St�phane De Mita, Mathieu Siol - - This file is part of the EggLib library. - - EggLib is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - EggLib is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with EggLib. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef EGGLIB_CONTROLLER_HPP -#define EGGLIB_CONTROLLER_HPP - -#include "Current.hpp" -#include "Arg.hpp" -#include "ParamSet.hpp" - -namespace egglib { - - class Random; - - /** \brief Controls a coalescent simulation - * - * \ingroup coalesce - * - * This class generates the gene genealogy, based on the parameters - * stocked in a ParamSet object. - * - */ - class Controller { - - public: - - /** \brief Default constructor - * - * Uses a default ParamSet object that will not allow - * performing simulations. - * - */ - Controller(); - - /** \brief Destructor - * - */ - ~Controller(); - - /** \brief Standard constructor - * - * \param paramSet a ParamSet object containing run - * parameters (it is taken as a reference and stored as this - * so it must not be modified during simulations). - * - * \param random the address of the random number generator. - * - */ - Controller(const ParamSet* paramSet, Random* random); - - /** \brief Reset for a new simulation - * - * Object is reinitiliazed for a new simulation. - * - */ - void reset(); - - /** \brief Increments the coalescent model - * - * \return The number of lineages. - * - */ - unsigned int step(); - - /** \brief Gets the Ancestral Recombination Graph - * - * \return The address of the ARG contained in the object. - * - */ - Arg* getArg(); - - /** \brief Applies a bottleneck to a given population - * - * The bottleneck is applied following Galtier, Depaulis and - * Barton (Genetics, 2000): the general time counter is - * stopped, and coalescence events are performed during a - * time (of normal coalescent process) given by the parameter - * strength. All coalescent events are instantaneous. - * - * \param populationIndex index of the population concerned - * by the event. - * - * \param strength strength of the bottleneck given by a - * number of time units (2N generations times the size of - * the population). - * - */ - void bottleneck(unsigned int populationIndex, double strength); - - /** \brief Migrate a complete population - * - * Takes all the lineages currently present in the population - * source to the population dest. - * - */ - void moveAllLineages(unsigned int source, unsigned int dest); - - /** \brief Migrate a complete population - * - * Takes all the lineages currently present in the population - * source to the population dest. - * - * \param source source population. - * \param dest destination population. - * \param probability the probability that a lineage of - * source migrates to dest. - * - */ - void moveSomeLineages(unsigned int source, unsigned int dest, double probability); - - /// Adds an empty population - void addPopulation(); - - private: - - /// The copy constructor is disabled - Controller(const Controller& source) {} - - /// The assignment operator is disabled - Controller& operator=(const Controller& source) {return *this;} - - void diploids(); - double getMigrationTime(double& migrationParameterDestination); - void getCoalescenceTime(double& destTime, unsigned int& destPopIndex); - double getCoalescenceTimeForPopulation(unsigned int populationIndex); - double getRecombinationTime() const; - void migrate(double migrationParameter); - - const ParamSet* initialParamSet; - ParamSet paramSet; - Current current; - Arg arg; - - Random* random; - - }; - -} - -#endif |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/Convert.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/Convert.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,234 +0,0 @@\n-/*\r\n- Copyright 2009 St\xc3\xa9phane De Mita, Mathieu Siol\r\n-\r\n- This file is part of the EggLib library.\r\n-\r\n- EggLib is free software: you can redistribute it and/or modify\r\n- it under the terms of the GNU General Public License as published by\r\n- the Free Software Foundation, either version 3 of the License, or\r\n- (at your option) any later version.\r\n-\r\n- EggLib is distributed in the hope that it will be useful,\r\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n- GNU General Public License for more details.\r\n-\r\n- You should have received a copy of the GNU General Public License\r\n- along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n-*/\r\n-\r\n-\r\n-#ifndef EGGLIB_CONVERT_HPP\r\n-#define EGGLIB_CONVERT_HPP\r\n-\r\n-\r\n-#include "DataMatrix.hpp"\r\n-#include "Align.hpp"\r\n-#include "EggException.hpp"\r\n-#include "Random.hpp"\r\n-#include <string>\r\n-\r\n-#include "config.h"\r\n-\r\n-#ifdef HAVE_LIBBPP_SEQ\r\n-#include <Bpp/Seq/Alphabet.all>\r\n-#include <Bpp/Seq/Sequence.h>\r\n-#include <Bpp/Seq/Container.all>\r\n-#endif\r\n-\r\n-\r\n-\r\n-namespace egglib {\r\n-\r\n-\r\n- /** \\brief Performs conversion between sequence holder types\r\n- *\r\n- * \\ingroup core\r\n- * \r\n- * Static methods of this class allows conversion between sequence\r\n- * holder types implying parametrizable modifications.\r\n- * \r\n- */\r\n- class Convert {\r\n-\r\n- public:\r\n- \r\n- /** \\brief DataMatrix to Align conversion\r\n- * \r\n- * By defaut, this method generates an Align instance\r\n- * containing only the polymorphic sites. The integers of\r\n- * the DataMatrix will be converted as follow: 0 to A, 1 to\r\n- * C, 2 to G and 3 to T. This behaviour can be largely\r\n- * modified using options.\r\n- * \r\n- * \\param dataMatrix DataMatrix instance.\r\n- * \r\n- * \\param length length of the desired alignment. Non-varying\r\n- * stretches of data will be introduced to reach the\r\n- * specified length. By default the positions of segregating\r\n- * sites will be determined from the positions given by the\r\n- * DataMatrix object. Those positions are expressed in a\r\n- * continuous range, and will be discretized. Mutations\r\n- * falling on the same site will be moved of one position\r\n- * left or right (always preserving the order of mutation\r\n- * sites). If positions are all zero (the default of the\r\n- * DataMatrix class) and if length is larger than the number\r\n- * of segregating sites, then all segregating sites will\r\n- * cluster on the left-hand side of the alignment.\r\n- * \r\n- * \\param random the address to a Random object allowing to \r\n- * draw random numbers (for randomizing positions and/or\r\n- * non-varying states). If an address is provided but no\r\n- * random numbers are required, it is ignored. If no address\r\n- * if provided and random numbers are required, a Random\r\n- * instance is built internally.\r\n- * \r\n- * \\param randomizePositions if true, the positions specified\r\n- * in the DataMatrix objects are ignored and the positions of\r\n- * mutations are drawn randomly along the interval (only if\r\n- * the specified length is larger than the number of\r\n- * segregating sites). If randomizePositions and false and\r\n- * positions are not\r\n- * \r\n- * \\param enforceLength specify whether a\r\n- * EggRuntimeError should be thrown when the number of\r\n- * polymorphic sites is larger than the specified length. If\r\n- * false (the default) and in cases where the specified\r\n- * length is too short to harbor all polymorphic '..b'rt).\r\n- * \r\n- * \\param nonVaryingState character to use for conserved\r\n- * stretches of data. It doesn\'t have to be included in the\r\n- * mapping. If randomizeNonVaryingState is true, this\r\n- * argument is ignored.\r\n- * \r\n- * \\return The resulting Align object.\r\n- * \r\n- */\r\n- static Align align(\r\n- DataMatrix& dataMatrix,\r\n- unsigned int length=0,\r\n- Random* random=NULL,\r\n- bool randomizePositions=false,\r\n- bool randomizeNonVaryingStates=false,\r\n- bool randomizeAlleles=false,\r\n- bool enforceLength=false,\r\n- std::string mapping="ACGT",\r\n- char unknown=\'?\',\r\n- char nonVaryingState=\'A\'\r\n- );\r\n-\r\n-\r\n-#ifdef HAVE_LIBBPP_SEQ\r\n-\r\n- /** \\brief Converts an alignment to the equivalent Bio++ type\r\n- *\r\n- * During conversion, name information is lost (arbitrary\r\n- * names are generated in order toprevent duplicate names).\r\n- * The object is attached to an alphabet matching the passed\r\n- * integer. The names are bare rank integers (starting at the\r\n- * value giving by *offset*).\r\n- *\r\n- * \\param align the source alignment object.\r\n- * \r\n- * \\param alphabetID an integer indicating which alphabet to\r\n- * use:\r\n- * - 1 for DNA\r\n- * - 2 for RNA\r\n- * - 3 for proteins\r\n- * - 4 for standard codon\r\n- * - 5 for vertebrate mitochondrial codon\r\n- * - 6 for invertebrate mitochondrial codon\r\n- * - 7 for echinoderm mitochondrial codon\r\n- * .\r\n- * Other values will result in an exception.\r\n- * \r\n- * \\param outgroupFlag an integer indicating whether to\r\n- * include outgroup sequences:\r\n- * - 0 use all sequences\r\n- * - 1 use only sequences without 999 label (ingroup)\r\n- * - 2 use only sequences with 999 label (outgroup)\r\n- * .\r\n- * Other values will result in an exception.\r\n- * \r\n- * \\param offset enter an integer to shift the names of the\r\n- * resulting alignment (useful to merge alignment and ensure\r\n- * that names are not duplicated).\r\n- * \r\n- * \\return A Bio++ alignment.\r\n- * \r\n- */\r\n- static bpp::AlignedSequenceContainer egglib2bpp(Align& align, unsigned int alphabetID, unsigned int outgroupFlag, unsigned int offset=0);\r\n-\r\n-#endif\r\n-\r\n-\r\n-\r\n- protected:\r\n-\r\n- /** \\brief This class cannot be instantiated\r\n- * \r\n- */\r\n- Convert() { }\r\n-\r\n-\r\n- /** \\brief This class cannot be instantiated\r\n- * \r\n- */\r\n- Convert(const Convert& source) { }\r\n-\r\n-\r\n- /** \\brief This class cannot be instantiated\r\n- * \r\n- */\r\n- Convert& operator=(const Convert& source) { return *this; }\r\n-\r\n-\r\n- /** \\brief This class cannot be instantiated\r\n- * \r\n- */\r\n- virtual ~Convert() { }\r\n-\r\n-#ifdef HAVE_LIBBPP_SEQ\r\n- static bpp::DNA dnaAlphabet;\r\n- static bpp::RNA rnaAlphabet;\r\n- static bpp::ProteicAlphabet proteicAlphabet;\r\n- static bpp::StandardCodonAlphabet standardCodonAlphabet;\r\n- static bpp::VertebrateMitochondrialCodonAlphabet vertebrateMitochondrialCodonAlphabet;\r\n- static bpp::InvertebrateMitochondrialCodonAlphabet invertebrateMitochondrialCodonAlphabet;\r\n- static bpp::EchinodermMitochondrialCodonAlphabet echinodermMitochondrialCodonAlphabet;\r\n-#endif\r\n-\r\n- };\r\n-}\r\n-\r\n-#endif\r\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/Current.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/Current.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,128 +0,0 @@ -/* - Copyright 2009-2010 St�phane De Mita, Mathieu Siol - - This file is part of the EggLib library. - - EggLib is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - EggLib is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with EggLib. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef EGGLIB_CURRENT_HPP -#define EGGLIB_CURRENT_HPP - - -namespace egglib { - - class Population; - class ParamSet; - - /** \brief Represents the current set of populations - * - * \ingroup coalesce - * - */ - class Current { - - public: - - /** \brief Default constructor - * - */ - Current(); - - /** \brief Standard constructor - * - * \param paramSet allows to initiate the correct structure - * of populations. - * - */ - Current(ParamSet* paramSet); - - /** \brief Rebuilds the object - * - * \param paramSet allows to initiate the correct structure - * of populations. - * - */ - void reset(ParamSet* paramSet); - - /** \brief Destructor - * - */ - virtual ~Current(); - - /** \brief Copy constructor - * - */ - Current(const Current&); - - /** \brief Assignment operator - * - */ - Current& operator=(const Current&); - - /** \brief Gets the current number of populations - * - */ - unsigned int numberOfPopulations() const; - - - /** \brief Adds an empty population to the system - * - */ - void addPopulation(); - - - /** \brief Gets the number of lineages contained by a given - * population - * - */ - unsigned int populationNumberOfLineages(unsigned int populationIndex) const; - - - /** \brief Provides access to a given population - * - * The returned pointer can be used to modify the object. - * - */ - Population* population(unsigned int populationIndex); - - - /** \brief Total number of lineages - * - */ - unsigned int totalNumberOfLineages() const; - - - /** \brief Efficient number of lineages - * - * This sums the number of covered segments of each lineage. - * - */ - unsigned int efficientNumberOfLineages() const; - - - private: - - void setPopulationArray(); - void copy(const Current&); - void clear(); - - unsigned int _numberOfPopulations; - unsigned int _numberOfSegments; - Population** populations; - }; - -} - -#endif |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/DataMatrix.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/DataMatrix.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,219 +0,0 @@ -/* - Copyright 2009-2010 St�phane De Mita, Mathieu Siol - - This file is part of the EggLib library. - - EggLib is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - EggLib is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with EggLib. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef EGGLIB_DATAMATRIX_HPP -#define EGGLIB_DATAMATRIX_HPP - - -#include "CharMatrix.hpp" - - -namespace egglib { - - /** \brief Data table - * - * \ingroup core - * - * Holds a data matrix representing genotype data from simulations. - * Data are stored as integers, to each site is associated a - * position, and to each sequence is associated a group index (any - * integer labelling, for example, a subpopulation). Supports the - * CharMatrix interface with the condition that allele genotype - * datum is >=0 and <=9. - * - */ - class DataMatrix : public CharMatrix { - - public: - - /** \brief Default constructor - * - * The data table default dimensions are {0,0} - * - */ - DataMatrix(); - - - /** \brief Standard constructor - * - * The data table dimensions must be given. - * Each cell default default is 0, and each site position is 0.. - * - * \param numberOfSequences number of lines of the data table. - * \param numberOfSites number of columns of the data table. - * - */ - DataMatrix(unsigned int numberOfSequences, unsigned int numberOfSites); - - - /** \brief Copy constructor - * - */ - DataMatrix(const DataMatrix&); - - - /** \brief Copy constructor - * - */ - DataMatrix(const CharMatrix&); - - - /** \brief Assignment operator - * - */ - virtual DataMatrix& operator=(const DataMatrix&); - - - /** \brief Assignment operator - * - */ - virtual DataMatrix& operator=(const CharMatrix&); - - - /** \brief Destructor - * - */ - virtual ~DataMatrix(); - - - /** \brief Gets number of sites - * - */ - unsigned int numberOfSites() const; - - - /** \brief Gets number of sequences - * - */ - unsigned int numberOfSequences() const; - - - /** \brief Sets a value of the data table - * - */ - void set(unsigned int sequence, unsigned int site, int value); - - - /** \brief Gets a value from the data table - * - */ - int get(unsigned int sequence, unsigned int site) const; - - - /** \brief Faster and unsecure version of get - * - */ - inline int fget(unsigned int sequence, unsigned int site) const { - return dataMatrix[sequence][site]; - } - - - /** \brief Sets the position of a site - * - */ - void sitePosition(unsigned int site, double value); - - - /** \brief Gets the position of a site - * - */ - double sitePosition(unsigned int site) const; - - - /** \brief Sets the group label of a sequence - * - */ - void populationLabel(unsigned int sequence, unsigned int value); - - - /** \brief Gets the group label of a sequence - * - */ - unsigned int populationLabel(unsigned int sequence) const; - - - /** \brief Removes all information from the object - * - */ - void clear(); - - - /** \brief Resizes the data matrix - * - * \param newNumberOfSequences number of sequences (rows) - * \param newNumberOfSites number of sites (columns) - * - * If new values are larger, data already set is left unchanged. - * New data are set to zero. - * - */ - void resize(unsigned int newNumberOfSequences, unsigned int newNumberOfSites); - - - /** \brief Shifts allele value - * - * \param minimum the minimum allele value. - * - * Shifts all alleles at all sites to ensure that alleles alleles - * are equal to or larger than minimum. The shifting is specific - * to each site. - * - */ - void shift(int minimum); - - /** \brief Gets the character at a given position - * - * An exception is generated if the allele value at this - * position is not >=0 and <=9. Not out-of-bound check is - * performed. - * - */ - char character(unsigned int sequence, unsigned int site) const; - - - - private: - - // Initializes to default values (for empty object) - void init(); - - // Copies from a source object - virtual void copy(const CharMatrix&); - - // Copies from a source object - virtual void copy(const DataMatrix&); - - // Number of lines of the data matrix - unsigned int _numberOfSequences; - - // Number of columns of the data matrix - unsigned int _numberOfSites; - - // Data matrix - int **dataMatrix; - - // Vector of site positions - double *positions; - - // Vector of group indices - unsigned int *groups; - }; -} - -#endif |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/Edge.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/Edge.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,331 +0,0 @@\n-/*\r\n- Copyright 2009-2010 St\xe9phane De Mita, Mathieu Siol\r\n-\r\n- This file is part of the EggLib library.\r\n-\r\n- EggLib is free software: you can redistribute it and/or modify\r\n- it under the terms of the GNU General Public License as published by\r\n- the Free Software Foundation, either version 3 of the License, or\r\n- (at your option) any later version.\r\n-\r\n- EggLib is distributed in the hope that it will be useful,\r\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n- GNU General Public License for more details.\r\n-\r\n- You should have received a copy of the GNU General Public License\r\n- along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n-*/\r\n-\r\n-#ifndef EGGLIB_EDGE_HPP\r\n-#define EGGLIB_EDGE_HPP\r\n-\r\n-#include <vector>\r\n-#include <climits>\r\n-#include "EggException.hpp"\r\n-\r\n-namespace egglib {\r\n-\r\n- class Random;\r\n-\r\n- /** \\brief Edge of the ancestral recombination graph\r\n- *\r\n- * \\ingroup coalesce\r\n- *\r\n- * Each Edge instance provides access to its 0, 1 or 2 descendants\r\n- * (the former holds for a terminal node, the middle for the parent\r\n- * of a recombined node and the latter for the parent of a coalesced\r\n- * node (most classical node in the coalescent).The Edge also\r\n- * provides to the edge length. Note that the Edge instance must be\r\n- * understood as an ARG node and the branch above it (latter in the\r\n- * coalescence process). Edge instances also keep track of the list\r\n- * of descendants descending from this node (which may differ along\r\n- * recombining segment). Edge instances *must* be created through one\r\n- * of the "default" and "coalescence" constructors or through the \r\n- * recombination method. Edge instances should never be copied but\r\n- * manipulated by references.\r\n- * \r\n- */\r\n- class Edge {\r\n- \r\n- public:\r\n- \r\n- /// Destructor\r\n- virtual ~Edge();\r\n- \r\n- /** \\brief Constructor\r\n- * \r\n- * \\param numberOfSegments the number of recombining segments\r\n- * (one for a non-recombining region).\r\n- * \r\n- * Use the Pool, instead. Objects are delivered with a\r\n- * complete coverage.\r\n- * \r\n- */ \r\n- Edge(unsigned int numberOfSegments);\r\n-\r\n-\r\n- /// Restore object to `factory` state\r\n- void reset();\r\n-\r\n-\r\n- /** \\brief Builds for internal node\r\n- * \r\n- * \\param date the date of creation of the edge.\r\n- * \\param son1 first edge descending from this edge.\r\n- * \\param son2 second edge descending from this edge.\r\n- * \\param edgesPerSegments counts the current number of\r\n- * (non-coalesced lineages for each lineages); must have the\r\n- * appropriate size and will be updated.\r\n- * \\param MRCA the list where to place the address of segment\r\n- * MRCA, if it occurs.\r\n- * \\param totalLength the total length of the tree.\r\n- * \\param segmentLengths the table of tree lengths per\r\n- * segment.\r\n- *\r\n- * Assumes the current object has the correct number of\r\n- * segments.\r\n- * \r\n- */\r\n- void coalescence(double date, Edge* son1, Edge* son2,\r\n- unsigned int* edgesPerSegments, Edge** MRCA,\r\n- double& totalLength, double* segmentLengths);\r\n-\r\n-\r\n- /** \\brief Generates a recombination event\r\n- * \r\n- * \\param date the date of the event.\r\n- * \\param dest1 destination for the first resulting edge.\r\n- * \\param dest2 destination for the second resulting edge.\r\n- * \\param random pointer to the Random instance used by the \r\n- * simulator.\r\n- '..b"numberOfSegments) {\r\n- if (segbools[i]==true) {\r\n- totalLength += segments[i]*length;\r\n- for (j=0; j<segments[i]; j++) {\r\n- segmentLengths[i+j] += length;\r\n- }\r\n- }\r\n- i+=segments[i];\r\n- }\r\n- }\r\n-\r\n-\r\n- };\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n-\r\n- /** \\brief Pool of Edge objects\r\n- *\r\n- * \\ingroup coalesce\r\n- *\r\n- * Holds a pool of Edge objects that can be recycled to spare the\r\n- * building burden. A construction time, a number of Edge objects\r\n- * equals to the predicted number of needed instances should be\r\n- * requested. The Edge's will be prebuilt immediately and delivered\r\n- * upon request. After use, the Edge's should be released. It is only\r\n- * possible to release the last issued Edge instance or all of them\r\n- * at once.\r\n- * \r\n- */\r\n- class EdgePool {\r\n-\r\n- public:\r\n- \r\n- /// Default constructor (nothing allocated)\r\n- EdgePool();\r\n-\r\n-\r\n- /// Destructor\r\n- virtual ~EdgePool();\r\n-\r\n-\r\n- /** \\brief Configure pool\r\n- * \r\n- * Pre-allocates a given number of Edge objects. The objects\r\n- * will be immediately available.\r\n- * \r\n- * Data previously allocated (by a previous call of this\r\n- * function or by the deliver() method) will be lost so it\r\n- * can be required to use clear() before.\r\n- * \r\n- * \\param numberOfSegments the number of segments of the\r\n- * simulation; all Edge instances will use this value.\r\n- * \r\n- * \\param numberOfPreAllocated the number of Edge that should\r\n- * be kept ready for immediate use.\r\n- * \r\n- */\r\n- void set(unsigned int numberOfSegments, unsigned numberOfPreAllocated);\r\n- \r\n- \r\n- /** \\brief Frees internally stored memory\r\n- * \r\n- * This invalidate all points that have been delivered\r\n- * previously. However, any previously set number of segments\r\n- * (0, by default) is retained.\r\n- * \r\n- */\r\n- void clear();\r\n-\r\n- \r\n- /** \\brief Deliver an Edge\r\n- * \r\n- * The object must not be freed by the client! This object is\r\n- * allocated on the heap if the cache is not large enough,\r\n- * only reset if it was previously released, or just delivered\r\n- * if it is one of the initially allocated instances.\r\n- * \r\n- */\r\n- Edge* deliver();\r\n- \r\n- \r\n- /** \\brief Release an Edge\r\n- * \r\n- * Release the last delivered Edge. The instance is only\r\n- * cached for a potential future use; it is not freed nor\r\n- * reset immediately. If no Edge's are in use, nothing is\r\n- * done.\r\n- * \r\n- */\r\n- void releaseLast();\r\n-\r\n- \r\n- /** \\brief Release all Edge's\r\n- * \r\n- * Release all delivered Edges. The instances are only\r\n- * cached for a potential future use; they are not freed nor\r\n- * reset immediately. If no Edge's are in use, nothing is\r\n- * done.\r\n- * \r\n- */\r\n- void releaseAll();\r\n-\r\n- private:\r\n- \r\n- /// Not available\r\n- EdgePool(const EdgePool& ep) {}\r\n- \r\n- /// Not available\r\n- EdgePool& operator=(const EdgePool& ep) { return *this; }\r\n-\r\n- unsigned int numberOfSegments;\r\n- unsigned int used;\r\n- unsigned int released;\r\n- unsigned int ready;\r\n- Edge** cache;\r\n- \r\n- };\r\n-\r\n-}\r\n-\r\n-#endif\r\n" |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/EggException.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/EggException.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,173 +0,0 @@ -/* - Copyright 2009 St�phane De Mita, Mathieu Siol - - This file is part of the EggLib library. - - EggLib is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - EggLib is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with EggLib. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef EGGLIB_EGGEXCEPTION_HPP -#define EGGLIB_EGGEXCEPTION_HPP - -#include <string> -#include <exception> - -namespace egglib { - - /** \brief Base exception type for errors occurring in this library - * - * \ingroup core - * - */ - class EggException : public std::exception { - public: - /// Constructor with empty error message - EggException(); - /// Creates the exception - EggException(const char* message); - /// Destructor - ~EggException() throw() {} - /// Gets error message - virtual const char* what() const throw(); - - protected: - std::string message; - - }; - - - /** \brief Exception type for memory errors - * - * \ingroup core - * - */ - class EggMemoryError : public EggException { - public: - /// Creates the exception - EggMemoryError(); - /// Destructor - ~EggMemoryError() throw() {} - }; - - - /** \brief Exception type for argument value errors - * - * \ingroup core - * - */ - class EggArgumentValueError : public EggException { - public: - /// Creates the exception - EggArgumentValueError(const char* m ); - /// Destructor - ~EggArgumentValueError() throw() {} - }; - - - /** \brief Exception type for runtime errors - * - * Runtime error definition is rather large. Includes bugs as well - * as logical errors. - * - * \ingroup core - * - */ - class EggRuntimeError : public EggException { - public: - /// Creates the exception - EggRuntimeError(const char* m ); - /// Destructor - ~EggRuntimeError() throw() {} - }; - - - /** \brief Exception type for file/string formatting errors - * - * \ingroup core - * - */ - class EggFormatError : public EggException { - public: - /// Creates the exception - EggFormatError(const char* fileName, const char* expectedFormat, const char* m); - /// Destructor - ~EggFormatError() throw() {} - /// Gets the file name - std::string fileName() const; - /// Gets the expected format - std::string expectedFormat() const; - /// Formats a longer string - virtual const char* what_more() const; - - protected: - std::string fname; - std::string eformat; - }; - - - /** \brief Exception type for errors while opening a file - * - * \ingroup core - * - */ - class EggOpenFileError : public EggException { - public: - /// Creates the exception - EggOpenFileError(const char* fileName ); - /// Destructor - ~EggOpenFileError() throw() {} - }; - - - /** \brief Exception type for unaligned sequences - * - * \ingroup core - * - */ - class EggUnalignedError : public EggException { - public: - /** \brief Creates the exception - * - */ - EggUnalignedError(); - - /** \brief Destructor - * - */ - ~EggUnalignedError() throw() {} - }; - - /** \brief Exception type for invalid character - * - * \ingroup core - * - */ - class EggInvalidCharacterError : public EggException { - public: - /** \brief Creates the exception - * - */ - EggInvalidCharacterError(char c, unsigned int seqIndex, unsigned int posIndex); - - /** \brief Destructor - * - */ - ~EggInvalidCharacterError() throw() {} - }; - -} - - - -#endif |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/FStatistics.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/FStatistics.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,288 +0,0 @@\n-/*\r\n- Copyright 2009 St\xe9phane De Mita, Mathieu Siol\r\n-\r\n- This file is part of the EggLib library.\r\n-\r\n- EggLib is free software: you can redistribute it and/or modify\r\n- it under the terms of the GNU General Public License as published by\r\n- the Free Software Foundation, either version 3 of the License, or\r\n- (at your option) any later version.\r\n-\r\n- EggLib is distributed in the hope that it will be useful,\r\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n- GNU General Public License for more details.\r\n-\r\n- You should have received a copy of the GNU General Public License\r\n- along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n-*/\r\n-\r\n-#ifndef EGGLIB_FSTATISTICS_HPP\r\n-#define EGGLIB_FSTATISTICS_HPP\r\n-\r\n-\r\n-\r\n-namespace egglib {\r\n-\r\n-\r\n- /** \\brief Computes Fis, Fst and Fit from diploid data\r\n- *\r\n- * The class requires loading data. Data are loaded by individual\r\n- * (two genotypes per individual). The analyses are cached: they are\r\n- * performed upon the first call to statistics accessors. The cache\r\n- * is emptied whenever a datum is loaded.\r\n- * \r\n- * The computations are performed after Weir and Cockerham. The\r\n- * statistics F, theta and f are generalized for multiple alleles.\r\n- * To allow computation of multi-locus statistics, variance\r\n- * components are also available. The three components of the\r\n- * variance are Vpopulation (between-population), Vindividual\r\n- * (within-population, between-individual) and Vallele (within-\r\n- * individual). The formulas to compute the F-statistics are as\r\n- * follows:\r\n- * - 1-F = Vallele/(Vpopulation+Vindividual+Vallele)\r\n- * - theta = Vpopulation/(Vpopulation+Vindividual+Vallele)\r\n- * - 1-f = Vallele/(Vindividual+Vallele).\r\n- * \r\n- * \\ingroup polymorphism\r\n- *\r\n- */\r\n- class FStatistics {\r\n- \r\n- public:\r\n- \r\n- /** \\brief Constructor\r\n- * \r\n- */ \r\n- FStatistics();\r\n-\r\n- \r\n- /** \\brief Destructor\r\n- * \r\n- */ \r\n- virtual ~FStatistics();\r\n-\r\n- \r\n- /** \\brief Reserve sufficient memory for a given number of\r\n- * individuals.\r\n- * \r\n- * This method makes the load function faster by allocating\r\n- * all required memory at once.\r\n- * \r\n- * \\param numberOfIndividuals a strictly positive integer.\r\n- * \r\n- */\r\n- void reserve(unsigned int numberOfIndividuals);\r\n-\r\n-\r\n- /** \\brief Loads the data for one individual\r\n- * \r\n- * \\param genotype1 an integer giving the first allele.\r\n- * \\param genotype2 an integer giving the second allele.\r\n- * \\param populationLabel an integer indication belonging to\r\n- * a population.\r\n- * \r\n- * Genotypes and population labels are not required to be\r\n- * consecutive (both are labels, not indices). They are\r\n- * internally mapped to indices (the mapping can be obtained\r\n- * by accessors populationLabel and allele).\r\n- * \r\n- * All genotypes are considered to be valid (no missing data).\r\n- * If statistics were computed previous to call to this\r\n- * function, all data will be erase.\r\n- * \r\n- */\r\n- void loadIndividual(unsigned int genotype1,\r\n- unsigned int genotype2, unsigned int populationLabel);\r\n-\r\n-\r\n- /** \\brief Label of a population\r\n- * \r\n- * The index corresponds to the local mapping of populations\r\n- * regardless of the ranking of population labels. (No out\r\n- * of bound checking.)\r\n- * \r\n- */\r\n- unsigne'..b'leIndex);\r\n-\r\n-\r\n- /** \\brief Absolute genotype frequency\r\n- * \r\n- * Note that allele AB is considered different to BA (this\r\n- * means that values can be accessed both sides of the\r\n- * diagonal.\r\n- * \r\n- */\r\n- unsigned int genotypeFrequencyTotal(unsigned int alleleIndex1, unsigned int alleleIndex2);\r\n-\r\n-\r\n- /** \\brief Absolute genotype frequency in a population\r\n- * \r\n- * Note that allele AB is considered different to BA (this\r\n- * means that values can be accessed both sides of the\r\n- * diagonal.\r\n- * \r\n- */\r\n- unsigned int genotypeFrequencyPerPopulation(unsigned int populationIndex, unsigned int alleleIndex1, unsigned int alleleIndex2);\r\n-\r\n- \r\n- /** \\brief Sample size of a population\r\n- * \r\n- */\r\n- unsigned int populationFrequency(unsigned int populationIndex);\r\n-\r\n-\r\n- /** \\brief Weir-Cockerham F-statistic\r\n- * \r\n- * Note: equivalent to Fit.\r\n- * \r\n- */\r\n- double F();\r\n-\r\n-\r\n- /** \\brief Weir-Cockerham theta-statistic\r\n- * \r\n- * Note: equivalent to Fst.\r\n- * \r\n- */\r\n- double theta();\r\n-\r\n-\r\n- /** \\brief Weir-Cockerham f-statistic\r\n- * \r\n- * Note: equivalent to Fis.\r\n- * \r\n- */\r\n- double f();\r\n- \r\n-\r\n- /** \\brief Between-population component of variance\r\n- * \r\n- */\r\n- double Vpopulation();\r\n-\r\n-\r\n- /** \\brief Within-population, between-individual component of variance\r\n- * \r\n- */\r\n- double Vindividual();\r\n- \r\n- \r\n- /** \\brief Within-individual component of variance\r\n- * \r\n- */\r\n- double Vallele();\r\n-\r\n-\r\n- protected:\r\n- \r\n- bool d_flag;\r\n- void d_init();\r\n- void d_clear();\r\n- unsigned int d_reserved;\r\n- unsigned int d_numberOfGenotypes;\r\n- unsigned int *d_genotypes;\r\n- unsigned int *d_populationLabels;\r\n-\r\n- bool s_flag;\r\n- void s_init();\r\n- void s_clear();\r\n- void s_compute();\r\n- void processPopulations();\r\n- void processAlleles();\r\n- unsigned int getPopulationIndex(unsigned int) const;\r\n- unsigned int getAlleleIndex(unsigned int) const;\r\n- unsigned int s_numberOfAlleles;\r\n- unsigned int *s_alleleValueMapping;\r\n- unsigned int s_numberOfPopulations;\r\n- unsigned int *s_populationLabelMapping;\r\n- unsigned int *s_populationFrequencies;\r\n- unsigned int *s_alleleFrequenciesTotal;\r\n- unsigned int **s_alleleFrequenciesPerPopulation;\r\n- unsigned int **s_genotypeFrequenciesTotal;\r\n- unsigned int ***s_genotypeFrequenciesPerPopulation;\r\n-\r\n- bool w_flag;\r\n- void w_init();\r\n- void w_clear();\r\n- void w_compute();\r\n- double w_F;\r\n- double w_T;\r\n- double w_f;\r\n- double *w_a;\r\n- double *w_b;\r\n- double *w_c;\r\n- double w_nbar;\r\n- double w_nc;\r\n- double *w_pbar;\r\n- double *w_ssquare;\r\n- double *w_hbar;\r\n- double w_sum_a;\r\n- double w_sum_b;\r\n- double w_sum_c;\r\n- double w_sum_abc;\r\n- double w_sum_bc;\r\n-\r\n- \r\n- private:\r\n- \r\n- FStatistics(const FStatistics& source) { }\r\n- \r\n- FStatistics& operator=(const FStatistics& source) {\r\n- return *this;\r\n- }\r\n-\r\n- };\r\n-}\r\n-\r\n-#endif\r\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/Fasta.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/Fasta.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,278 +0,0 @@\n-/*\r\n- Copyright 2008-2009 St\xe9phane De Mita, Mathieu Siol\r\n-\r\n- This file is part of the EggLib library.\r\n-\r\n- EggLib is free software: you can redistribute it and/or modify\r\n- it under the terms of the GNU General Public License as published by\r\n- the Free Software Foundation, either version 3 of the License, or\r\n- (at your option) any later version.\r\n-\r\n- EggLib is distributed in the hope that it will be useful,\r\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n- GNU General Public License for more details.\r\n-\r\n- You should have received a copy of the GNU General Public License\r\n- along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n-*/\r\n-\r\n-#ifndef EGGLIB_FASTA_HPP\r\n-#define EGGLIB_FASTA_HPP\r\n-\r\n-#include <istream>\r\n-#include <iostream>\r\n-#include <string>\r\n-#include "Container.hpp"\r\n-\r\n-namespace egglib {\r\n-\r\n- /** \\brief Fasta parser/formatted\r\n- *\r\n- * \\ingroup core\r\n- *\r\n- * Reads a multifasta sequence file from a string, a stream or a file\r\n- * and returns a Container. See the description of the format below.\r\n- * Formats a fasta string from a sequence container object and places\r\n- * it in a string, a stream of a file. All methods are static and the\r\n- * class cannot be instantiated. The methods parsef and formatf will\r\n- * open the file for you while the others will read/write directly\r\n- * in a string.\r\n- * \r\n- * Specifications of the fasta format:\r\n- * \r\n- * - The number of sequences is not limited.\r\n- * \r\n- * - Each sequence is preceded by a header limited to a single\r\n- * line and starting by a ">" character.\r\n- * \r\n- * - The header length is not limited and all characters are\r\n- * allowed but white spaces and special characters are\r\n- * discouraged.\r\n- * \r\n- * - Group indices are specified by \\@0, \\@1, \\@2... strings\r\n- * appearing at the end of the header string (just before the\r\n- * carriage return). Note that group labels are ignored by\r\n- * default.\r\n- * \r\n- * - Group indices are ignored unless specifically specified in a\r\n- * parser\'s options.\r\n- * \r\n- * - The sequence itself continues on following lines until the\r\n- * next ">" character or the end of the file.\r\n- * \r\n- * - White spaces, tab and carriage returns are allowed at any\r\n- * position There is no limitation in length and different\r\n- * sequences can have different lengths.\r\n- * \r\n- * - Although the standard is lower case characters, Fasta\r\n- * assumes upper case characters and only supports lower case\r\n- * characters (and converts them to upper case characters).\r\n- * Information coded by change in case is lost.\r\n- *\r\n- */\r\n- class Fasta {\r\n-\r\n- public:\r\n-\r\n- /** \\brief Imports a fasta file\r\n- *\r\n- * Imports the content of the file as is. Calls the method\r\n- * pase(std::istream*, bool) by creating its own istream.\r\n- *\r\n- * \\param fname the name of a fasta file.\r\n- * \r\n- * \\param importGroupLabels if set to true, scan automatically\r\n- * for groups. The format is @ followed by an integer, placed\r\n- * at the end of the header string(sequences without labels\r\n- * will be treated as \\@0).\r\n- * \r\n- * \\return A Container object containing the sequences.\r\n- * \r\n- */\r\n- static Container parsef(const char* fname, bool importGroupLabels=false);\r\n-\r\n-\r\n- /** \\brief Imports a fasta file\r\n- *\r\n- * Imports the content of the file as is. Calls the method\r\n- * pase(std::istream*, bool) by creating its own istream. This\r\n- * method expects a reference to a Container to which the\r\n- * sequences will be appended.\r\n- *\r\n-'..b'ner object containing the sequences.\r\n- * \r\n- */\r\n- static Container parse(std::istream& stream, bool importGroupLabels=false);\r\n-\r\n-\r\n- /** \\brief Imports a fasta file from an open stream\r\n- *\r\n- * Imports the content of the file as is. This\r\n- * method expects a reference to a Container to which the\r\n- * sequences will be appended.\r\n- *\r\n- * \\param stream an open stream (file or string) containing the\r\n- * data.\r\n- * \r\n- * \\param container a Container instance, empty or not.\r\n- * \r\n- * \\param importGroupLabels if set to true, scan automatically\r\n- * for groups. The format is @ followed by an integer, placed\r\n- * at the end of the header string(sequences without labels\r\n- * will be treated as \\@0).\r\n- * \r\n- * \\return Nothing: the new sequences are appended to the\r\n- * Container passed as argument.\r\n- * \r\n- */\r\n- static void parse(std::istream& stream, Container& container, bool importGroupLabels=false);\r\n- \r\n- \r\n- /** \\brief Export sequences as fasta\r\n- *\r\n- * \\param fname the name of the file where to place the result.\r\n- * \r\n- * \\param container Container object to export.\r\n- * \r\n- * \\param exportGroupLabels if set to true, exports group\r\n- * indices as a \\@x at the end of the sequence name, where x is\r\n- * the group index. Otherwise, this information is discarded.\r\n- * \r\n- * \\param lineLength the number of characters to place on a\r\n- * single line. If zero, no newlines are inserted within\r\n- * sequences.\r\n- * \r\n- */\r\n- static void formatf(const char* fname, const Container& container, bool exportGroupLabels=false, unsigned int lineLength=50);\r\n-\r\n-\r\n- /** \\brief Export sequences as fasta\r\n- *\r\n- * \\param file an open stream.\r\n- * \r\n- * \\param container Container object to export.\r\n- * \r\n- * \\param exportGroupLabels if set to true, exports group\r\n- * indices as a \\@x at the end of the sequence name, where x is\r\n- * the group index. Otherwise, this information is discarded.\r\n- * \r\n- * \\param lineLength the number of characters to place on a\r\n- * single line. If zero, no newlines are inserted within\r\n- * sequences.\r\n- * \r\n- */\r\n- static void format(std::ostream& file, const Container& container, bool exportGroupLabels=false, unsigned int lineLength=50);\r\n-\r\n-\r\n- /** \\brief Export sequences as fasta\r\n- * \r\n- * This medod creates internally an ostringstream, calls the\r\n- * method format(ostream, container, bool) and returns the\r\n- * resulting string.\r\n- *\r\n- * \\param container Container object to export.\r\n- * \r\n- * \\param exportGroupLabels if set to true, exports group\r\n- * indices as a \\@x at the end of the sequence name, where x is\r\n- * the group index. Otherwise, this information is discarded.\r\n- * \r\n- * \\param lineLength the number of characters to place on a\r\n- * single line. If zero, no newlines are inserted within\r\n- * sequences.\r\n- * \r\n- * \\return The formatted string.\r\n- * \r\n- */\r\n- static std::string format(const Container& container, bool exportGroupLabels=false, unsigned int lineLength=50);\r\n-\r\n- \r\n- \r\n- protected:\r\n- \r\n- /// This class cannot be instantiated\r\n- Fasta() { }\r\n- \r\n- /// This class cannot be instantiated\r\n- Fasta(const Fasta& source) { }\r\n- \r\n- /// This class cannot be or copied\r\n- Fasta& operator=(const Fasta& source) { return *this; }\r\n- \r\n- /// This class cannot be instantiated\r\n- virtual ~Fasta() { }\r\n-\r\n- \r\n- };\r\n-}\r\n-\r\n-#endif\r\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/HFStatistics.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/HFStatistics.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,227 +0,0 @@ -/* - Copyright 2010 St�phane De Mita, Mathieu Siol - - This file is part of the EggLib library. - - EggLib is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - EggLib is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with EggLib. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef EGGLIB_HFSTATISTICS_HPP -#define EGGLIB_HFSTATISTICS_HPP - - - -namespace egglib { - - - /** \brief Computes Fst and Fit from haploid data - * - * The class requires loading data. Data are loaded by haploid - * (one genotype per individual). The analyses are cached: they are - * performed upon the first call to statistics accessors. The cache - * is emptied whenever a datum is loaded. - * - * The computations are performed after Weir and Cockerham. The - * statistic theta is generalized for multiple alleles. To allow - * computation of multi-locus statistics, variance components are - * also available. The two components of the variance are T1 and T2 - * and theta is T1/T2 (from Weir 1996 "Genetic Data Analysis II", - * Sinauer associates, Sunderland MA). - * - * \ingroup polymorphism - * - */ - class HFStatistics { - - public: - - /** \brief Constructor - * - */ - HFStatistics(); - - - /** \brief Destructor - * - */ - virtual ~HFStatistics(); - - - /** \brief Reserve sufficient memory for a given number of - * individuals. - * - * This method makes the load function faster by allocating - * all required memory at once. - * - * \param numberOfIndividuals a strictly positive integer. - * - */ - void reserve(unsigned int numberOfIndividuals); - - - /** \brief Loads the data for one individual - * - * \param genotype an integer giving the allele. - * \param populationLabel an integer indication belonging to - * a population. - * - * Genotypes and population labels are not required to be - * consecutive (both are labels, not indices). They are - * internally mapped to indices (the mapping can be obtained - * by accessors populationLabel and allele). - * - * All genotypes are considered to be valid (no missing data). - * If statistics were computed previous to call to this - * function, all data will be erased. - * - */ - void loadIndividual(unsigned int genotype, unsigned int populationLabel); - - - /** \brief Label of a population - * - * The index corresponds to the local mapping of populations - * regardless of the ranking of population labels. (No out - * of bound checking.) - * - */ - unsigned int populationLabel(unsigned int populationIndex); - - - /** \brief Value of an allele - * - * The index corresponds to the local mapping of alleles - * regardless of the ranking of allele values. (No out of - * bound checking.) - * - */ - unsigned int alleleValue(unsigned int alleleIndex); - - - /// Allele of a given individual (no checking) - unsigned int allele(unsigned int individualIndex) const; - - /// Population label of a given individual (no checking) - unsigned int individualLabel(unsigned int individualIndex) const; - - - /** \brief Number of alleles - * - */ - unsigned int numberOfAlleles(); - - - /** \brief Number of populations - * - */ - unsigned int numberOfPopulations(); - - - /** \brief Number of loaded genotypes - * - */ - unsigned int numberOfGenotypes() const; - - - /** \brief Absolute total allele frequency - * - */ - unsigned int alleleFrequencyTotal(unsigned int alleleIndex); - - - /** \brief Absolute allele frequency in a population - * - */ - unsigned int alleleFrequencyPerPopulation(unsigned int populationIndex, unsigned int alleleIndex); - - - /** \brief Sample size of a population - * - */ - unsigned int populationFrequency(unsigned int populationIndex); - - - /** \brief Weir-Cockerham theta-statistic - * - * Note: equivalent to Fst. - * - */ - double theta(); - - - /** \brief Between-population component of variance - * - */ - double T1(); - - - /** \brief Total variance - * - */ - double T2(); - - - protected: - - bool d_flag; - void d_init(); - void d_clear(); - unsigned int d_reserved; - unsigned int d_numberOfGenotypes; - unsigned int *d_genotypes; - unsigned int *d_populationLabels; - - bool s_flag; - void s_init(); - void s_clear(); - void s_compute(); - void processPopulations(); - void processAlleles(); - unsigned int getPopulationIndex(unsigned int) const; - unsigned int getAlleleIndex(unsigned int) const; - unsigned int s_numberOfAlleles; - unsigned int *s_alleleValueMapping; - unsigned int s_numberOfPopulations; - unsigned int *s_populationLabelMapping; - unsigned int *s_populationFrequencies; - unsigned int *s_alleleFrequenciesTotal; - unsigned int **s_alleleFrequenciesPerPopulation; - - bool w_flag; - void w_init(); - void w_clear(); - void w_compute(); - double w_T; - double *w_T1; - double *w_T2; - double w_nbar; - double w_nc; - double *w_pbar; - double *w_ssquare; - double w_sum_T1; - double w_sum_T2; - - - private: - - HFStatistics(const HFStatistics& source) { } - - HFStatistics& operator=(const HFStatistics& source) { - return *this; - } - - }; -} - -#endif |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/HaplotypeDiversity.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/HaplotypeDiversity.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,176 +0,0 @@ -/* - Copyright 2008-2009 Stéphane De Mita, Mathieu Siol - - This file is part of the EggLib library. - - EggLib is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - EggLib is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with EggLib. If not, see <http://www.gnu.org/licenses/>. -*/ - - -#ifndef EGGLIB_HAPLOTYPEDIVERSITY_HPP -#define EGGLIB_HAPLOTYPEDIVERSITY_HPP - -#include "BaseDiversity.hpp" - -namespace egglib { - - - /** \brief Computes diversity based on haplotype analysis - * - * \ingroup polymorphism - * - * This class relies on detection of polymorphic sites, as does - * NucleotideDiversity, with the exception that sites with missing - * data cannot be processed (minimumExploitableData is enforced to - * 1.). - * - * Like NucleotideDiversity, the same object can be used to analyze - * different data sets. Only the call to load() is required before - * accessing the data. - * - * Hst, Gst and Kst are between population differenciation indices. - * They are respectively defined in equations 2, 5-6 and 9 of Hudson - * et al. 1992a (Molecular Biology and Evolution 9:138-151). Also, - * Fst is defined in equation 3 of Hudson et al. 1992b (Genetics - * 132:583-589). Finally, Snn is from Hudson 2000 Genetics. It is - * computed as the average of Xi for all sequences. Where Xi is the - * ratio of nearest neighbours from the same group to the number of - * nearest neighbours. Nearest neigbours are all the sequences with - * the lowest number of differences to the focal sequence. NOTE: - * Gst/Hst are quite similar, but Fst and Kst are more different. Snn - * is a different statistic. Gst and Hst are two ways to estimate the - * between-population fraction of haplotypic diversity. - * - */ - class HaplotypeDiversity : public BaseDiversity { - - public: - - /** \brief Constructor - * - */ - HaplotypeDiversity(); - - /** \brief Destructor - * - */ - virtual ~HaplotypeDiversity(); - - /** \brief Identifies polymorphic sites and computes basis - * statistics - * - * \param data an alignment object (subclass of CharMatrix). - * The presence of outgroup or of different populations will - * be detected based on the populationLabel members of the - * passed object. The populationLabel 999 will be interpreted - * as outgroups. If several outgroups are passed, sites were - * the outgroups are not consistent will be treated as "non- - * orientable". - * - * \param allowMultipleMutations if true, sites with more - * than two alleles will not be ignored. The sum of the - * frequencies of all alleles not matching the outgroup will - * treated as the derived allele frequency (for orientable - * sites). - * - * \param ignoreFrequency removes sites that are polymorph - * because of an allele at absolute frequency smaller than or - * equal to this value. If ignoreFrequency=1, no sites are - * removed, if ignoreFrequency=1, singleton sites are - * ignored. Such sites are completely removed from the - * analysis (not counted in lseff). Note that if more than - * one mutation is allowed, the site is removed only if all - * the alleles but one are smaller than or equal to this - * value. For example, an alignment column AAAAAAGAAT is - * ignored with an ignoreFrequency of 1, but AAAAAAGGAT is - * conserved (including the third allele T which is a - * singleton). - * - * \param characterMapping a string giving the list of - * characters that should be considered as valid data. If a - * space is present in the string, the characters left of the - * space will be treated as valid data and the characters - * right of the space will be treated as missing data, that - * is tolerated but ignored. All characters not in the string - * will cause an EggInvalidCharacterError to be raised. - * - */ - void load(CharMatrix& data, - bool allowMultipleMutations=false, - unsigned int ignoreFrequency=0, - std::string characterMapping=dnaMapping - ); - - /// Number of distinct haplotypes - unsigned int K() const; - - /// Haplotype diversity (unbiased) - double He() const; - - /** \brief Returns the allele number of a given sequence - * - * The passed index must be given ignoring any outgroup - * sequence. - * - */ - unsigned int haplotypeIndex(unsigned int) const; - - /// Population differenciation, based on nucleotides (Hudson 1992a) - double Kst() const; - - /// Population differenciation, based on nucleotides (Hudson 1992b) - double Fst() const; - - /// Population differenciation, based on haplotypes (Nei version) - double Gst() const; - - /// Population differenciation, based on haplotypes (Hudson et al. version) - double Hst() const; - - /// Hudson's Snn (nearest neighbor statistics) - double Snn() const; - - - protected: - - void init(); - void clear(); - - inline unsigned int diff(CharMatrix& data, unsigned int ind1, unsigned int ind2) const; - - bool m_loaded; - unsigned int m_K; - double m_He; - double m_Kst; - double m_Fst; - double m_Gst; - double m_Hst; - double m_Snn; - unsigned int *m_haplotypeIndex; - - - private: - - HaplotypeDiversity(const HaplotypeDiversity& source) { - - } - - HaplotypeDiversity& operator=(const HaplotypeDiversity& source) { - return *this; - } - - }; -} - -#endif |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/LinkageDisequilibrium.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/LinkageDisequilibrium.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,233 +0,0 @@\n-/*\r\n- Copyright 2009 St\xe9phane De Mita, Mathieu Siol\r\n-\r\n- This file is part of the EggLib library.\r\n-\r\n- EggLib is free software: you can redistribute it and/or modify\r\n- it under the terms of the GNU General Public License as published by\r\n- the Free Software Foundation, either version 3 of the License, or\r\n- (at your option) any later version.\r\n-\r\n- EggLib is distributed in the hope that it will be useful,\r\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n- GNU General Public License for more details.\r\n-\r\n- You should have received a copy of the GNU General Public License\r\n- along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n-*/\r\n-\r\n-#ifndef EGGLIB_LINKAGEDISEQUILIBRUM_HPP\r\n-#define EGGLIB_LINKAGEDISEQUILIBRUM_HPP\r\n-\n-\r\n-#include "BaseDiversity.hpp"\r\n-#include "EggException.hpp"\r\n-\r\n-\r\n-namespace egglib {\r\n-\r\n- /** \\brief Analyzes linkage disequilibrium per pair of polymorphic sites\r\n- *\r\n- * \\ingroup polymorphism\r\n- * \r\n- * The class considers an alignment and detects polymorphic sites\r\n- * using the BaseDiversity functionality (shared with other classes\r\n- * of the module). Only sites with exactly two alleles are\r\n- * considered. Statistics of pairwise linkage disequilibrium can\r\n- * be accessed by pair index (note that out-of-range errors are not\r\n- * checked). Population labels are ignored (but outgroups are\r\n- * excluded from the analysis).\r\n- *\r\n- */\r\n- class LinkageDisequilibrium : public BaseDiversity {\r\n-\r\n- public:\n-\n- /// Default constructor\r\n- LinkageDisequilibrium();\n-\n- /// Destructor\r\n- virtual ~LinkageDisequilibrium();\r\n-\r\n- /** \\brief Analyzes polymorphic sites of an alignment\r\n- * \r\n- * \\param data an alignment object (subclass of CharMatrix).\r\n- * The presence of outgroup or of different populations will\r\n- * be detected based on the populationLabel members of the\r\n- * passed object. The populationLabel 999 will be interpreted\r\n- * as outgroups. If several outgroups are passed, sites were\r\n- * the outgroups are not consistent will be treated as "non-\r\n- * orientable".\r\n- * \r\n- * \\param minimumExploitableData site where the non-missing\r\n- * data (as defined by characterMapping) are at a frequency\r\n- * larger than this value will be removed from the analysis.\r\n- * Use 1. to take only \'complete\' sites into account and 0.\r\n- * to use all sites.\r\n- * \r\n- * \\param ignoreFrequency removes sites that are polymorphic\r\n- * because of an allele at absolute frequency smaller than or\r\n- * equal to this value. If ignoreFrequency=1, no sites are\r\n- * removed, if ignoreFrequency=1, singleton sites are\r\n- * ignored. Such sites are completely removed from the\r\n- * analysis (not counted in lseff). Note that if more than\r\n- * one mutation is allowed, the site is removed only if all\r\n- * the alleles but one are smaller than or equal to this\r\n- * value. For example, an alignment column AAAAAAGAAT is\r\n- * ignored with an ignoreFrequency of 1, but AAAAAAGGAT is\r\n- * conserved (including the third allele T which is a\r\n- * singleton).\r\n- * \r\n- * \\param characterMapping a string giving the list of\r\n- * characters that should be considered as valid data. If a\r\n- * space is present in the string, the characters left of the\r\n- * space will be treated as valid data and the characters\r\n- * right of the space will be treated as missing data, that\r\n- * is tolerated but ignored. All characters not in the string\r\n- * will cause an EggInvalidCharacterError to be raised.\r\n- */\r\n- void load(CharMatrix& data,\r\n- double minimumExploitableData=1.,\r\n- '..b'ir_index);\r\n-\r\n- /// position of the first site for a given pair\r\n- unsigned int site1(unsigned int pair_index);\r\n-\r\n- /// position of the second site for a given pair\r\n- unsigned int site2(unsigned int pair_index);\r\n-\r\n- /// correlation coefficient between r2 and distance\r\n- double correl() const;\r\n- \r\n- /** \\brief Computes the minimal number of recombination events\r\n- * \r\n- * The computation is performed as described in Hudson, RR and\r\n- * NL Kaplan. 1985. Statistical properties of the number of\r\n- * recombination events in the history of a sample of DNA\r\n- * sequences. Genetics 111: 147-164. The returned parameter is\r\n- * the minimal number of recombination events, given by the\r\n- * number of non-overlapping pairs of segregating sites violating\r\n- * the rule of the four gamete. Only sites with two alleles are\r\n- * considered. Note that homoplasy (multiple mutations) mimicks\r\n- * recombination. The result of this function is not stored\r\n- * in this instance, and re-computed at each call.\r\n- * \r\n- * \\param data the same CharMatrix instance as passed to the load\r\n- * method. The instance must not have been modified.\r\n- * \r\n- */\r\n- unsigned int Rmin(CharMatrix& data) const;\r\n-\r\n-\r\n-\r\n- protected:\r\n- \r\n- // adds a pair of polymorphic sites\r\n- // assume position2>position1,\r\n- // sites are polymorphic with exactly 2 alleles\r\n- void add(CharMatrix& data, unsigned int position1, unsigned int position2);\r\n-\r\n- // Constructor help\r\n- void init();\r\n- \r\n- // Destructor helper\r\n- void clear();\r\n- \r\n- // Resizes arrays\r\n- void reset();\r\n- \r\n- // Small helper\r\n- inline double min(double a, double b) { return (a>b)?a:b;}\r\n-\r\n- // Small helper\r\n- inline double max(double a, double b) { return (a>b)?b:a;}\r\n-\r\n- // Small helper\r\n- inline void check(unsigned int pos) { if (pos>=_n) throw EggRuntimeError("tried to access an invalid index"); }\r\n-\r\n- /* Performs correlation\r\n- *\r\n- * This function works independently from the rest of the class.\r\n- *\r\n- * \\param n length of data arrays.\r\n- * \\param x first data vector.\r\n- * \\param y second data vector.\r\n- * \\param r variable to receive the correlation coefficient.\r\n- * \\param a variable to receive the regression slope.\r\n- */\r\n- static void _correl(unsigned int n, const int* x, const double* y, double& r, double& a);\r\n-\r\n- // Distance between pairs\r\n- int* _d;\r\n- \r\n- // D (classical) measure of LD\r\n- double *_D;\r\n- \r\n- // D\'\r\n- double *_Dp;\r\n- \r\n- // r, correlation coefficient\r\n- double *_r;\r\n- \r\n- // square r\r\n- double *_r2;\r\n- \r\n- // Data array (not managed by the instance)\r\n- unsigned int *_site1;\r\n-\r\n- // Data array (not managed by the instance)\r\n- unsigned int *_site2;\r\n- \r\n- // Number of pairs\r\n- unsigned int _n;\r\n-\r\n- private:\r\n- \r\n- /// Copy constructor not available\r\n- LinkageDisequilibrium(const LinkageDisequilibrium&) { }\r\n-\r\n- /// Assignment operator not available\r\n- LinkageDisequilibrium& operator=(const LinkageDisequilibrium&) {\r\n- return *this;\r\n- }\r\n-\r\n-\r\n- class Interval {\r\n- public:\r\n- Interval(unsigned int, unsigned int);\r\n- unsigned int a() const;\r\n- unsigned int b() const;\r\n- bool good() const;\r\n- void set_false();\r\n- private:\r\n- unsigned int _a;\r\n- unsigned int _b;\r\n- unsigned int _good;\r\n- };\r\n-\r\n-\r\n- };\r\n-}\r\n-\r\n-#endif\r\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/MicrosatelliteDiversity.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/MicrosatelliteDiversity.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,120 +0,0 @@ -/* - Copyright 2008-2010 St�phane De Mita, Mathieu Siol - - This file is part of the EggLib library. - - EggLib is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - EggLib is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with EggLib. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef EGGLIB_MICROSATELLITEDIVERSITY_HPP -#define EGGLIB_MICROSATELLITEDIVERSITY_HPP - -#include "DataMatrix.hpp" -#include <cstdlib> - -namespace egglib { - - /** \brief Analyzes microsatellite data - * - * \ingroup polymorphism - * - * Use the load() method to analyze data. All sites will be analyzed - * and accessors allow to access the value of a given statistics for - * a given site. There is no out-of-bound checking implemented in - * accessors. - * - */ - class MicrosatelliteDiversity { - - public: - - /** \brief Creates an object - * - */ - MicrosatelliteDiversity(); - - - /** \brief Destroys an object - * - */ - virtual ~MicrosatelliteDiversity(); - - - /** \brief Performs the analysis - * - * \param dataMatrix the object to analyze. - * - * \param missingData the integer identifying missing data. - * - * \param noMissingData if true, no allele will be - * excluded (including the one identified by the argument - * missingData). - * - */ - void load(const DataMatrix& dataMatrix, - int missingData=999, bool noMissingData=false); - - - /// Number of sites (or markers) - unsigned int numberOfSites() const; - - /// Heterozygosity - double He(unsigned int siteIndex) const; - - /// Number of alleles - unsigned int numberOfAlleles(unsigned int siteIndex) const; - - /// Variance of allele size - double sizeVariance(unsigned int siteIndex) const; - - /// IAM-based estimator of theta - double thetaAssumingIAM(unsigned int siteIndex) const; - - /// SMM-based estimator of theta, calculated from He - double thetaAssumingSMMfromHe(unsigned int siteIndex) const; - - /// SMM-based estimator of theta, calculated from VarSize - double thetaAssumingSMMfromSizeVariance(unsigned int siteIndex) const; - - - protected: - - unsigned int v_numberOfSites; - double *v_He; - unsigned int *v_numberOfAlleles; - double *v_sizeVariance; - double *v_thetaAssumingIAM; - double *v_thetaAssumingSMMfromHe; - double *v_thetaAssumingSMMfromSizeVariance; - - void init(); - void clear(); - - - private: - - - /// No copy allowed - MicrosatelliteDiversity(const MicrosatelliteDiversity& source) { - } - - /// No copy allowed - MicrosatelliteDiversity& operator=(const MicrosatelliteDiversity& source) { - return *this; - } - - }; -} - -#endif |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/Ms.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/Ms.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,199 +0,0 @@\n-/*\r\n- Copyright 2008,2009,2011 St\xe9phane De Mita and Mathieu Siol\r\n-\r\n- This file is part of the EggLib library.\r\n-\r\n- EggLib is free software: you can redistribute it and/or modify\r\n- it under the terms of the GNU General Public License as published by\r\n- the Free Software Foundation, either version 3 of the License, or\r\n- (at your option) any later version.\r\n-\r\n- EggLib is distributed in the hope that it will be useful,\r\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n- GNU General Public License for more details.\r\n-\r\n- You should have received a copy of the GNU General Public License\r\n- along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n-*/\r\n-\r\n-#ifndef EGGLIB_GMS_HPP\r\n-#define EGGLIB_GMS_HPP\r\n-\r\n-#include "DataMatrix.hpp"\r\n-#include <string>\r\n-#include <istream>\r\n-\r\n-namespace egglib {\r\n-\r\n- /** \\brief ms-like sequence format parser\r\n- * \r\n- * The class provides parsing (input) and formatting (output)\r\n- * operations in ms format, that is the format used by Richard\r\n- * Hudson\'s program ms for outputting genotypes and by the\r\n- * associated program samplestat for reading them. Both types of\r\n- * operations are available through static methods using either\r\n- * a string or a stream (which can be a stream to or from a file\r\n- * or a string). In either case, types from the STL are used.\r\n- * Although ms deals only with data coded with 0 and 1, the class Ms\r\n- * offers the possibility of both importing and exporting data coded\r\n- * with by integer. All methods have an option named "separated". If\r\n- * this option is true, the parser or formatter introduces a slight\r\n- * modification of the format: genotypes individual data are\r\n- * separated by a white space ("1 0 1 1" instead of "1011", allowing\r\n- * genotype values larger than 9: "1 0 11 1").\r\n- *\r\n- * \\ingroup core\r\n- *\r\n- */\r\n- class Ms {\r\n-\r\n- public:\r\n- \r\n- /** \\brief Imports a sequence alignment\r\n- * \r\n- * Creates a istringstream from the string and calls the\r\n- * overloaded method.\r\n- * \r\n- * \\param str the string to parse.\r\n- * \\param ns the expected number of sequences.\r\n- * \\param separated true if a white space separator is placed\r\n- * between genotype at each site.\r\n- *\r\n- * \\return A sequence alignment as a data matrix.\r\n- */\r\n- static DataMatrix get(std::string, unsigned int ns, bool separated=false);\r\n-\r\n-\r\n- /** \\brief Imports a sequence alignment\r\n- * \r\n- * Attemps to generate a DataMatrix object from the stream.\r\n- * Reads only one simulation and throws a SeqlibFormatError\r\n- * exception in case of format error.\r\n- * \r\n- * Allows any number of white lines before the //, but no other\r\n- * data. Supports \\r at the end of lines (before the \\n).\r\n- * Accepted symbols are all integers (0-9).\r\n- *\r\n- * \\param stream the stream to parse.\r\n- * \\param ns the expected number of sequences.\r\n- * \\param separated true if a white space separator is placed\r\n- * between genotype at each site.\r\n- * \r\n- * \\return A sequence alignment as a data matrix.\r\n- */\r\n- static DataMatrix get(std::istream& stream, unsigned int ns, bool separated=false);\r\n-\r\n-\r\n- /** \\brief Exports a sequence alignment\r\n- * \r\n- * Internally creates a stringstream, calls the overloaded method\r\n- * and returns the outcome.\r\n- *\r\n- * \\param dataMatrix the alignment object to write.\r\n- * \\param separated true if a white space separator must be placed\r\n- * between the genotype at each site.\r\n- * \r\n- */\r\n- static std::string format(DataMatrix& dataMatrix, bool separated=false);\r\n- '..b'd string to the stream \'on the fly\'. The\r\n- * formatted string is guaranteed to starts with a // line and\r\n- * ends with an empty line. The client is expected to take care\r\n- * of writing any header and add an additional white line between\r\n- * simulations if needed. The method throws a SeqlibRuntimeError\r\n- * if the stream is not writable. The data matrix should contain\r\n- * only data within range 0-9 if separated is false (default) and\r\n- * any positive (>=0) integer if separated is true. Note that\r\n- * output generated with separated=true is never compatible with\r\n- * the original ms format, and that output generated with\r\n- * separator=false is compatible with the original ms format only\r\n- * if all alleles are 0 or 1 (which is not checked by this\r\n- * formatted).\r\n- * \r\n- * \\param stream the stream (file or string stream) where to\r\n- * write the output.\r\n- * \\param dataMatrix the alignment object to write.\r\n- * \\param separated true if a white space separator must be placed\r\n- * between the genotype at each site.\r\n- * \r\n- */\r\n- static void format(std::ostream& stream, DataMatrix& dataMatrix, bool separated=false);\r\n-\r\n-\r\n- /** \\brief Returns the last tMRCA read by any Ms instance\r\n- * \r\n- * If a tMRCA value was present in the last simulation read by\r\n- * any Ms instance, it will be returned by this method. A value\r\n- * of -1. is returned if no simulation was read, or if the last\r\n- * simulation didn\'t contain a tMRCA value or if the last\r\n- * simulation provoked an exception before reaching the tMRCA\r\n- * line.\r\n- * \r\n- */\r\n- static double tMRCA();\r\n-\r\n-\r\n- /** \\brief Returns the last "prob" read by any Ms instance\r\n- * \r\n- * "prob" is returned by ms when a fixed number of segregating\r\n- * sites is used in conjunction with a theta value. If a "prob"\r\n- * value was present in the last simulation read by any Ms\r\n- * instance, it will be returned by this method. A value of -1\r\n- * is returned if no simulation was read, or if the last\r\n- * simulation didn\'t contain a "prob" value or if the last\r\n- * simulation provoked an exception before reaching the "prob"\r\n- * line.\r\n- * \r\n- */\r\n- static double prob();\r\n- \r\n-\r\n- /** \\brief Returns the tree string found in the last simulation read by any Ms instance\r\n- * \r\n- * If one or more trees were present in the last simulation read\r\n- * by any Ms instance, they will be returned as a unique string\r\n- * by this method. An empty string is returned if no simulation\r\n- * was read, or if the last simulation, or if the last simulation\r\n- * didn\'t contain any tree value or if the last simulation\r\n- * provoked an exception before reaching the tree line.\r\n- * \r\n- * Note: the trees are returned as a single line.\r\n- * \r\n- */\r\n- static std::string trees();\r\n-\r\n- \r\n- private:\r\n- // Line parser (the last \\n is extracted and discarded - no error upon EOF)\r\n- std::string next_line(std::istream& stream);\r\n- \r\n- /// tMRCA (-1 if not found in ms output)\r\n- static double _tMRCA;\r\n- \r\n- /// probability (-1 if not found in ms output)\r\n- static double _prob;\r\n- \r\n- /// tree string (maybe contain several trees) (empty string if not found in ms output)\r\n- static std::string _trees;\r\n-\r\n- \r\n- /// No instantiation allowed\r\n- Ms() { }\r\n- \r\n- /// A fortiori no destruction allowed\r\n- ~Ms() { }\r\n-\r\n- /// No copy allowed\r\n- Ms(const Ms&) { }\r\n-\r\n- /// No copy allowed\r\n- Ms& operator=(const Ms&) { return *this; }\r\n- \r\n- };\r\n-}\r\n- \r\n-#endif\r\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/Mutation.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/Mutation.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,63 +0,0 @@ -/* - Copyright 2009-2010 St�phane De Mita, Mathieu Siol - - This file is part of the EggLib library. - - EggLib is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - EggLib is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with EggLib. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef EGGLIB_MUTATION_HPP -#define EGGLIB_MUTATION_HPP - -#include <vector> -#include "Edge.hpp" - -namespace egglib { - - /** \brief Very simple container of some information relative to a mutation - * - * \ingroup coalesce - * - */ - class Mutation { - - public: - - /// Default constructor - Mutation(); - - /// Age - //double age; - - /// Mutation index (for finding in Edge) - unsigned int actualSiteIndex; - - /// Position - double position; - - /// Segment index - unsigned int segmentIndex; - - /// Pointer to edge - //const Edge* edge; - - private: - - void init(); - - }; - -} - -#endif |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/Mutator.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/Mutator.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,397 +0,0 @@\n-/*\r\n- Copyright 2009, 2010, 2012 St\xe9phane De Mita, Mathieu Siol\r\n-\r\n- This file is part of the EggLib library.\r\n-\r\n- EggLib is free software: you can redistribute it and/or modify\r\n- it under the terms of the GNU General Public License as published by\r\n- the Free Software Foundation, either version 3 of the License, or\r\n- (at your option) any later version.\r\n-\r\n- EggLib is distributed in the hope that it will be useful,\r\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n- GNU General Public License for more details.\r\n-\r\n- You should have received a copy of the GNU General Public License\r\n- along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n-*/\r\n-\r\n-#ifndef EGGLIB_MUTATOR_HPP\r\n-#define EGGLIB_MUTATOR_HPP\r\n-\r\n-\r\n-#include "DataMatrix.hpp"\r\n-#include "Random.hpp"\r\n-#include "Arg.hpp"\r\n-#include "Mutation.hpp"\r\n-\r\n-\r\n-namespace egglib {\r\n- \r\n-\r\n- /** \\brief Implements mutation models\r\n- *\r\n- * \\ingroup coalesce\r\n- * \r\n- * Works with a previously built Ancestral Reconbination Graph. The\r\n- * user must sets options using the setter-based interface. After\r\n- * that he or she can call the method mute() that will generates\r\n- * a DataMatrix object.\r\n- * \r\n- * Genotype data are represented by integer numbers. Regardless of\r\n- * the mutation model, the ancestral state is always 0. The user can\r\n- * set the rate of mutation (or, alternatively, fix the number of\r\n- * mutations that occurred - which is the number of segregating sites\r\n- * only with an infinite site model).\r\n- * \r\n- * Other options fall into two separate groups: the positions of the\r\n- * mutated sites and the process of mutation (how new alleles are\r\n- * generated).\r\n- * \r\n- * Concerning allele generation, several mutation models are available\r\n- * (coded by single letters):\r\n- * - F: fixed number of alleles. Among other markers, this model is\r\n- * appropriate for simulating nucleotides. The user is able\r\n- * to choose the number of alleles (where 2 is the standard\r\n- * for an infinite site model and 4 for a finite site model).\r\n- * Mutator allows assigning independent weights between all\r\n- * different transition types and can draw randomly the\r\n- * ancestral states, providing a way to emulate evolution of\r\n- * nucleotides with multiple mutations at the same site and\r\n- * reversion.\r\n- * - I: infinite number of alleles. At a given site, each mutation\r\n- * raises a new allele. The value of the alleles is therefore\r\n- * irrelevant (it only denotes its order of appearance). This\r\n- * model does not permit homoplasy.\r\n- * - S: stepwise mutation model. In this model the value of the\r\n- * alleles are interpreted as a size (typically for simulating\r\n- * a microsatellite marker). Each mutation either increases\r\n- * or decreases the allele size by an increment of one.\r\n- * - T: two-phase mutation model. This model is a generalization\r\n- * of the stepwise mutation model (S). For a mutation, the\r\n- * increment (either increase or decrease) is 1 with the\r\n- * probability given by the parameter (1-TPMproba). With\r\n- * probability TPMproba, the increment is drawn from a\r\n- * geometric distribution of parameter given by the other\r\n- * parameter (TPMparam).\r\n- * \r\n- * By default, the program will assume an infinite site model (ISM).\r\n- * Each mutation will occur to a new position drawn from the interval\r\n- * [0,1]. It is possible to set any mutation model with an ISM \r\n- * (including microsatellite-like models I, S and T). Alternatively,\r\n- * the user can specify a finite number of sites available for\r\n- * mutation. For a microsatellite mark'..b'del). It gives the parameter\r\n- * of the geometric distribution which is used to generate\r\n- * the mutation step (if it is not one).\r\n- * \r\n- * The value must be >=0. and <=1. \r\n- * \r\n- */\r\n- void TPMparam(double value);\r\n-\r\n-\r\n- /** \\brief Gets the number of mutable sites\r\n- * \r\n- * A value a zero must be interpreted as the infinite site\r\n- * model. Note that after all calls all data from the tables\r\n- * sitePositions and siteWeights will be reset.\r\n- * \r\n- */\r\n- unsigned int numberOfSites() const;\r\n- \r\n- \r\n- /** \\brief Sets the number of mutable sites\r\n- * \r\n- * The value of zero is accepted and imposed the infinite\r\n- * site model.\r\n- * \r\n- */\r\n- void numberOfSites(unsigned int);\r\n- \r\n- \r\n- /** \\brief Gets the position of a given site\r\n- * \r\n- */\r\n- double sitePosition(unsigned int siteIndex) const;\r\n-\r\n- \r\n- /** \\brief Set the position of a given site\r\n- * \r\n- * The position must be >=0 and <=1\r\n- * \r\n- */\r\n- void sitePosition(unsigned int siteIndex, double position);\r\n-\r\n-\r\n- /** \\brief Gets the mutation weight of a given site\r\n- * \r\n- */\r\n- double siteWeight(unsigned int siteIndex) const;\r\n-\r\n- \r\n- /** \\brief Set the site weight of a given site\r\n- * \r\n- * The weight must be strictly positive.\r\n- * \r\n- */\r\n- void siteWeight(unsigned int siteIndex, double weight);\r\n-\r\n-\r\n- /** \\brief Performs mutation\r\n- * \r\n- * \\param arg Ancestral recombination graph instance. If the\r\n- * ARG is partially built or not a all, or improperly so,\r\n- * the behaviour of this method is not defined.\r\n- * \r\n- * \\param random The address of a Random instance to be\r\n- * used for generating random numbers.\r\n- * \r\n- * \\return A DataMatrix instance containing simulated data.\r\n- * \r\n- */\r\n- DataMatrix mute(Arg* arg, Random* random);\r\n-\r\n-\r\n- /** \\brief Gets the last number of mutations\r\n- *\r\n- * Returns the number of mutations of the last call of mute( ).\r\n- * By default, this method returns 0.\r\n- *\r\n- */\r\n- unsigned int numberOfMutations() const; \r\n-\r\n-\r\n- private:\r\n- \r\n- void clear();\r\n- void init();\r\n- void copy(const Mutator&);\r\n-\r\n- //int nextAllele(int allele, Random* random);\r\n- int TPMstep(double inTPMproba, Random* random);\r\n- void apply_mutation(unsigned int matrixIndex,\r\n- unsigned int actualSite, DataMatrix& data,\r\n- const Edge* edge, int allele,\r\n- unsigned int segment, Random* random);\r\n-\r\n- \r\n- char _model;\r\n- double _mutationRate;\r\n- unsigned int _fixedNumberOfMutations;\r\n- unsigned int _numberOfAlleles;\r\n- double** _transitionWeights;\r\n- bool _randomAncestralAllele;\r\n- unsigned int _numberOfSites;\r\n- double* _sitePositions;\r\n- double* _siteWeights;\r\n- double _TPMproba;\r\n- double _TPMparam;\r\n- int maxAllele;\r\n- unsigned int _numberOfMutations;\r\n- std::vector<Mutation> _cache_mutations;\r\n- unsigned int _cache_mutations_reserved;\r\n-\r\n- };\r\n-\r\n-\r\n- bool compare(Mutation mutation1, Mutation mutation2); // returns True if mutation1 is older\r\n-\r\n-}\r\n-\r\n-\r\n-\r\n-\r\n-#endif\r\n-\r\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/NucleotideDiversity.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/NucleotideDiversity.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,377 +0,0 @@\n-/*\r\n- Copyright 2008-2009 St\xc3\xa9phane De Mita, Mathieu Siol\r\n-\r\n- This file is part of the EggLib library.\r\n-\r\n- EggLib is free software: you can redistribute it and/or modify\r\n- it under the terms of the GNU General Public License as published by\r\n- the Free Software Foundation, either version 3 of the License, or\r\n- (at your option) any later version.\r\n-\r\n- EggLib is distributed in the hope that it will be useful,\r\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n- GNU General Public License for more details.\r\n-\r\n- You should have received a copy of the GNU General Public License\r\n- along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n-*/\r\n-\r\n-\r\n-#ifndef EGGLIB_NUCLEOTIDEDIVERSITY_HPP\r\n-#define EGGLIB_NUCLEOTIDEDIVERSITY_HPP\r\n-\r\n-\r\n-#include "BaseDiversity.hpp"\r\n-#include <string>\r\n-#include <vector>\r\n-\r\n-\r\n-\r\n-namespace egglib {\r\n-\r\n-\r\n- /** \\brief Performs analyzes of population genetics\r\n- *\r\n- * \\ingroup polymorphism\r\n- * \r\n- * This class computes several summary statistics based on\r\n- * nucleotide analysis. Note that it is possible to use the same\r\n- * object to analyze different data set. Calling the load() method\r\n- * erases all data preivously computed (if any). Calling the load()\r\n- * method is absolutely required to compute any statistics. Some\r\n- * statistics are not computed by default, but are if the\r\n- * corresponding accessor is used (only load() is required).\r\n- * \r\n- * Note that "unsecure" accessors don\'t perform out-of-bound checks.\r\n- * \r\n- * S is the number of varying sites (only in sites that were not\r\n- * rejected).\r\n- * \r\n- * eta is the minimum number of mutations, that is the sum of the\r\n- * number of alleles minus 1 for each varying site. eta = S if all\r\n- * sites have no variant or 2 alleles. eta is computed independently\r\n- * of the option multiple and IS NOT computed over lseff sites.\r\n- *\r\n- * Pi is the average number of pairwise differences between sequences\r\n- * (expressed here per site) or (as computed here) the mean per site\r\n- * (unbiased) heterozygosity. Pi is zero if no polymorphic sites.\r\n- *\r\n- * D is the Tajima\'s test of neutrality\r\n- * Ref. Tajima F.: Statistical method for testing the neutral\r\n- * mutation hypothesis by DNA polymorphism. Genetics 1989, 123:585-595.\r\n- * It is arbitrary set to 0 if no polymorphic sites.\r\n- *\r\n- * tW: thetaW: estimator of theta based on polymorphic sites (ref.\r\n- * e.g. Watterson 1975 Theor. Pop. Biol.).\r\n- * Both D and thetaW are computed assuming that rounded nseff samples\r\n- * have been sampled.\r\n- * The variance of D is computed using rounded nseff instead of ns.\r\n- *\r\n- * H is the Fay and Wu\'s test of neutrality.\r\n- * Z is the standardized version and E a similar test.\r\n- * Ref. Fay J. C., Wu C.-I.: Hitchhiking under positive Darwinian\r\n- * selection. Genetics 2000, 155:1405-1413. and Zeng K., Fu Y. X.,\r\n- * Shi S., Wu C.-I.: Statistical tests for detecting positive\r\n- * selection by utilizing high-frequency variants. Genetics 2006,\r\n- * 174:1431-9. Both are arbitrary set to 0 if no polymorphic or\r\n- * orientable sites.\r\n- *\r\n- * tH and tL: theta H: estimators of theta based on derived\r\n- * polymorphic sites (ref in Fay and Wu and Zeng al.). The variance\r\n- * of H/Z are computed assuming that rounded nseff samples have\r\n- * been sampled.\r\n- * \r\n- */\r\n- class NucleotideDiversity : public BaseDiversity {\r\n-\r\n- public:\r\n-\r\n- /** \\brief Builds an object\r\n- * \r\n- */\r\n- NucleotideDiversity();\r\n-\r\n-\r\n- /** \\brief Destroys an object\r\n- * \r\n- */\r\n- virtual ~NucleotideDiversity();\r\n-\r\n-\r\n- /** \\brief Identifies polymorphic sites and computes basis\r\n- '..b" * - 1: A&G A G specific 1 + fixed 2-3\r\n- * - 2: A A&G A specific 2\r\n- * - 3: A A&G G specific 2 + fixed 1-3\r\n- * - 4: A A A&G specific 3\r\n- * - 5: A G A&G specific 3 + fixed 1-2\r\n- * - 6: A&G A&G A shared 1-2\r\n- * - 7: A&G A A&G shared 1-3\r\n- * - 8: A A&G A&G shared 2-3\r\n- * - 9: A&G A&G A&G shared 1-2-3\r\n- * - 10: A G G fixed 1\r\n- * - 11: A G A fixed 2\r\n- * - 12: A A G fixed 3\r\n- *\r\n- * \\param index must be an index from 0 to 12.\r\n- * \r\n- */\r\n- unsigned int triConfiguration(unsigned int index);\r\n-\r\n-\r\n- /// Builds and returns the vector of positions of all polymorphic sites\r\n- std::vector<unsigned int> polymorphic_positions() const;\r\n-\r\n-\r\n- /** \\brief Builds and returns the vector of positions of all singleton sites\r\n- * \r\n- * A site singleton when it is polymorphic according to\r\n- * parameter of the diversity analysis, when it has exactly two\r\n- * alleles and one of them is at absolute frequency 1 (one\r\n- * copy) disregarding the outgroup.\r\n- * \r\n- */\r\n- std::vector<unsigned int> singleton_positions() const;\r\n-\r\n-\r\n- protected:\r\n-\r\n- /** \\brief This class cannot be copied\r\n- * \r\n- */\r\n- NucleotideDiversity(const NucleotideDiversity& source) { }\r\n-\r\n-\r\n- /** \\brief This class cannot be copied\r\n- * \r\n- */\r\n- NucleotideDiversity& operator=(const NucleotideDiversity& source) { return *this; }\r\n-\r\n-\r\n- void init(); // initializes values\r\n- void clear(); // free memory but doesn't initializes\r\n- \r\n- // diversity (without outgroup)\r\n- void diversity();\r\n- \r\n- // diversity with outgroup\r\n- void outgroupDiversity();\r\n- \r\n- // site patterns\r\n- void differentiation();\r\n- \r\n- // triconfigurations\r\n- void triConfigurations();\r\n- \r\n-\r\n- // holders for statistics, with booleans flagging groups of stats\r\n- \r\n- bool b_analysisSites;\r\n- \r\n- bool b_diversity;\r\n- \r\n- double v_Pi; // nucleotide diversity\r\n- double v_thetaW; // theta (Watterson estimator)\r\n- double v_average_Pi; // average diversity across populations\r\n- double *v_pop_Pi; // diversity per population\r\n- double v_D; // Tajima's D\r\n- \r\n- bool b_outgroupDiversity;\r\n- \r\n- double v_thetaH; // theta (Fay and Wu estimator)\r\n- double v_thetaL; // theta (Zeng estimator)\r\n- double v_H; // Fay and Wu's H\r\n- double v_Z; // normalized Fay and Wu's H\r\n- double v_E; // Zeng et al.'s E\r\n- \r\n- bool b_differentiation;\r\n- \r\n- unsigned int *v_pairwiseFixedDifferences;\r\n- unsigned int *v_pairwiseCommonAlleles;\r\n- unsigned int *v_pairwiseSharedAlleles;\r\n- unsigned int *v_popPolymorphic;\r\n- unsigned int *v_popSpecific;\r\n- unsigned int *v_popSpecificDerived;\r\n- unsigned int v_countFixedDifferences;\r\n- unsigned int v_countCommonAlleles;\r\n- unsigned int v_countSharedAlleles;\r\n- unsigned int v_countSpecificAlleles;\r\n- unsigned int v_countSpecificDerivedAlleles;\r\n- \r\n- \r\n- bool b_triConfigurations;\r\n- \r\n- unsigned int *v_triConfigurations;\r\n-\r\n- };\r\n-}\r\n-\r\n-#endif\r\n" |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/ParamSet.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/ParamSet.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,279 +0,0 @@\n-/*\r\n- Copyright 2009-2010 St\xe9phane De Mita, Mathieu Siol\r\n-\r\n- This file is part of the EggLib library.\r\n-\r\n- EggLib is free software: you can redistribute it and/or modify\r\n- it under the terms of the GNU General Public License as published by\r\n- the Free Software Foundation, either version 3 of the License, or\r\n- (at your option) any later version.\r\n-\r\n- EggLib is distributed in the hope that it will be useful,\r\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\r\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r\n- GNU General Public License for more details.\r\n-\r\n- You should have received a copy of the GNU General Public License\r\n- along with EggLib. If not, see <http://www.gnu.org/licenses/>.\r\n-*/\r\n-\r\n-#ifndef EGGLIB_PARAMSET_HPP\r\n-#define EGGLIB_PARAMSET_HPP\r\n-\r\n-\r\n-#include "DataMatrix.hpp"\r\n-\r\n-\r\n-namespace egglib {\r\n-\r\n- class Change;\r\n- class Controller;\r\n-\r\n-\r\n- /** \\brief Set of parameters\r\n- *\r\n- * \\ingroup coalesce\r\n- *\r\n- */\r\n- class ParamSet {\r\n-\r\n- public:\r\n- \r\n- /** \\brief Default constructor\r\n- *\r\n- * Initializes all parameters to reasonnable values (except\r\n- * that the sample size is null: 1 population, 0 samples,\r\n- * selfing rate of 0, recombination rate of 0, growth rate of\r\n- * 0, population size of 1 and no changes.\r\n- *\r\n- */\r\n- ParamSet();\r\n-\r\n- /** \\brief Destructor\r\n- * \r\n- */\r\n- ~ParamSet();\r\n- \r\n- /** \\brief Copy constructor\r\n- * \r\n- */\r\n- ParamSet(const ParamSet&);\r\n- \r\n- /** \\brief Assignment operator\r\n- * \r\n- */\r\n- ParamSet& operator=(const ParamSet&);\r\n-\r\n- /** \\brief Restores default value of all parameters\r\n- * \r\n- */\r\n- void reset();\r\n-\r\n- /** \\brief Gets the number of populations\r\n- * \r\n- */\r\n- unsigned int numberOfPopulations() const;\r\n- \r\n- /** \\brief Gets a pairwise migration rate\r\n- * \r\n- * It is allowed to access a diagonal value. Diagonal\r\n- * values contain the sum of values of the corresponding\r\n- * line (diagonal cell excepted, of course).\r\n- * \r\n- */\r\n- double pairwiseMigrationRate(unsigned int source, unsigned int dest) const;\r\n- \r\n- /** \\brief Sets a pairwise migration rate\r\n- * \r\n- * It is not allowed to set a value on the diagonal (this\r\n- * would raise an exception). The method takes care of\r\n- * modifying the diagonal accordingly (still this is not\r\n- * relevant for the client);\r\n- * \r\n- */\r\n- void pairwiseMigrationRate(unsigned int source, unsigned int dest, double value);\r\n- \r\n- /** \\brief Sets the migration rate for all matrix\r\n- * \r\n- */\r\n- void migrationRate(double value);\r\n- \r\n- /** \\brief Gets a population size\r\n- * \r\n- */\r\n- double populationSize(unsigned int populationIndex) const;\r\n- \r\n- /** \\brief Sets a population size\r\n- * \r\n- * The size must be strictly positive.\r\n- * \r\n- */\r\n- void populationSize(unsigned int populationIndex, double value);\r\n- \r\n- /** \\brief Gets a growth rate\r\n- * \r\n- */\r\n- double growthRate(unsigned int populationIndex) const;\r\n- \r\n- /** \\brief Sets a growth rate\r\n- * \r\n- */\r\n- void growthRate(unsigned int populationIndex, double value);\r\n- \r\n- /** \\brief Gets the recombination rate\r\n-'..b'e is planned.\r\n- * \r\n- */\r\n- double nextChangeDate() const;\r\n- \r\n- /** \\brief Applies the next change event\r\n- * \r\n- * \\param controller the Change event might need to have\r\n- * access to simulation controller (to trigger coalescent\r\n- * events, for example).\r\n- * \r\n- */\r\n- void nextChangeDo(Controller* controller);\r\n- \r\n- /** \\brief Gets the number of single sample from a population\r\n- * \r\n- */\r\n- unsigned int singles(unsigned int populationIndex) const;\r\n-\r\n- /** \\brief Sets the number of single sample from a population\r\n- * \r\n- */\r\n- void singles(unsigned int populationIndex, unsigned int value);\r\n-\r\n- /** \\brief Gets the number of double sample from a population\r\n- * \r\n- */\r\n- unsigned int doubles(unsigned int populationIndex) const;\r\n-\r\n- /** \\brief Sets the number of double sample from a population\r\n- * \r\n- */\r\n- void doubles(unsigned int populationIndex, unsigned int value);\r\n- \r\n- /** \\brief Computes the total number of samples\r\n- * \r\n- */\r\n- unsigned int numberOfSamples() const;\r\n- \r\n- /** \\brief Gives the date of the last size change\r\n- * \r\n- * \\param populationIndex the index of the population.\r\n- * \\return The date where the last change occurred, or 0. if\r\n- * no change occurred during the simulation.\r\n- *\r\n- */\r\n- double dateOfLastChange(unsigned int populationIndex) const;\r\n-\r\n-\r\n- /** \\brief Sets the date of the last size change\r\n- * \r\n- * \\param populationIndex the index of the population.\r\n- * \\param date the date where the last change occurred, or 0.\r\n- * if no change occurred during the simulation.\r\n- *\r\n- */\r\n- void dateOfLastChange(unsigned int populationIndex, double date) const;\r\n-\r\n- \r\n- /** \\brief Set groups labels\r\n- * \r\n- * Sets the group labels of the DataMatrix, according to the\r\n- * current state of population structure, and assuming that\r\n- * the DataMatrix was generated by the class Arg.\r\n- * \r\n- * \\param dataMatrix the DataMatrix object to modify. The\r\n- * number of sequences must match the total number of samples\r\n- * defined by the ParamSet object this method is called on.\r\n- * \r\n- * \\param labelIndividuals by default, labels the different\r\n- * samples depending on the population they come from (0\r\n- * being the label of the first population). If this flag is\r\n- * set to true, then the samples are labelled depending on\r\n- * the individual they come from, regardless of populations.\r\n- * In that case there can be only one or two genes for a\r\n- * given group label.\r\n- * \r\n- */\r\n- void setGroups(DataMatrix& dataMatrix, bool labelIndividuals=false);\r\n-\r\n- private:\r\n-\r\n- void clear();\r\n- void init();\r\n- void copy(const ParamSet&);\r\n- \r\n- double _selfingRate;\r\n- double _recombinationRate;\r\n- unsigned int _numberOfSegments;\r\n- unsigned int _numberOfPopulations;\r\n- unsigned int* _singles;\r\n- unsigned int* _doubles;\r\n- double* _growthRates;\r\n- double* _populationSize;\r\n- double* _dateOfLastChange;\r\n- double** migrationMatrix;\r\n- unsigned int _numberOfChanges;\r\n- unsigned int nextChangeIndex;\r\n- Change const** changes;\r\n- };\r\n-\r\n-}\r\n-\r\n-#endif\r\n' |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/Population.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/Population.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,143 +0,0 @@ -/* - Copyright 2009-2010 St�phane De Mita, Mathieu Siol - - This file is part of the EggLib library. - - EggLib is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - EggLib is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with EggLib. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef EGGLIB_POPULATION_HPP -#define EGGLIB_POPULATION_HPP - - -#include "Edge.hpp" - -namespace egglib { - - class Random; - - /** \brief Handles a single population - * - * \ingroup coalesce - * - */ - class Population { - - public: - - /** \brief Default constructor - * - * Generates an empty population. - * - */ - Population(); - - /** \brief Copy constructor - * - */ - Population(const Population& source); - - /** \brief Assignment operator - * - */ - Population& operator=(const Population& source); - - /** \brief Destructor - * - * The object only cleans Edge objects currently stored in it. - * - */ - ~Population(); - - /** \brief Standard constructor - * - * The Edge instances will be handled by address and they - * MUST be passed using the method set(). - * - * \param numberOfSegments number of recombining segments. - * - * \param numberOfLineages the number of lineages contained - * in this population. - * - * \param firstIndex the absolute index (or ID) of the first - * lineage (the other will have consecutive incremented - * ID's). - * - */ - Population(unsigned int numberOfSegments, - unsigned int numberOfLineages, unsigned firstIndex); - - /** \brief Gets the number of lineages - * - */ - unsigned int numberOfLineages() const; - - /** \brief Gets the efficient number of lineages - * - * The number of lineages is multiplied by the number of - * covered segments of each lineages. - * - */ - unsigned int efficientNumberOfLineages() const; - - /** \brief Sets the Edge of a lineage - * - * \param index the index of the lineage within the - * population. - * \param edge the address of the Edge instance representing - * the lineage. - * - */ - void set(unsigned int index, Edge* edge); - - /** \brief Removes and returns a random lineage. - * - * \param random pointer to simulator's random generator - * instance. - * - */ - Edge* extractRandomly(Random* random); - - /** \brief Removes and returns a given lineage. - * - * \param index the relative index of the lineage. - * - */ - Edge* extractByIndex(unsigned int index); - - /** \brief Appends a lineage to the object - * - */ - void push(Edge* edge); - - /** \brief Gets coverage - * - */ - unsigned int coverage(unsigned int edgeIndex) const; - - - private: - - void copy(const Population& source); - void clear(); - Edge* pick(unsigned int index); - void init(); - unsigned int _numberOfLineages; - unsigned int _efficientNumberOfLineages; - Edge** lineages; - }; - -} - -#endif |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/Random.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/Random.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,134 +0,0 @@ -/* - Copyright 2008,2009,2012 St�phane De Mita, Mathieu Siol - Adapted from MStrat, developed by Charles-Edouard Coste, - Thomas M. Bataillon, Mathieu Cotisson, Guy Decoux, Chistophe Rozale, - Daniel J. Schoen and Jacques L. David. - - This file is part of the EggLib library. - - EggLib is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - EggLib is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with EggLib. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef EGGLIB_RANDOM_HPP -#define EGGLIB_RANDOM_HPP - -namespace egglib { - - /** \brief Pseudo-random number generator - * - * \ingroup core - * - * Random is a pseudo-random number generator, adapted from a part of MStrat, - * developed by Charles-Edouard Coste, Thomas M. Bataillon, Mathieu Cotisson, - * Guy Decoux, Chistophe Rozale, Daniel J. Schoen and Jacques L. David. - * - * It uses two different seeds. By default, they are initialized to available - * arbitrary values. However, a given sequence can be repeated by passing the - * same two seeds. - * - */ - class Random { - public: - /** \brief Initializes using default seeds - * - * Uses the current system time and the memory address of the object as an attempt to generate unique sequences. - */ - Random(); - - /** \brief Initializes using given seeds - * - * This constructor can be used to reproduce a given sequence. - */ - Random(double seed1, double seed2); - - /** \brief Draws a number from an exponential distribution - * - * \param expectation the distribution mean (also 1/lambda - * where lambda is the rate parameter). - * - */ - double erand(double expectation); - - /** \brief Draws an integer from a uniform distribution bound by 0 and max (max is not included) - * - * max is not included. - * - */ - unsigned int irand(unsigned int max); - - /** \brief Draws an integer from a Poisson distribution with parameter p - * - * The Poisson transformation algorithm was taken from (in French) - * http://www.u-picardie.fr/~cochard/IEM/demos/C107/C107_3.htm. - */ - unsigned int prand(double p); - - /** \brief Draws a number from a normal distribution of expectation 0 and variance 1 - * - * The algorithm used is the polar form of the Box-Muller - * algorithm. \todo use the Ziggurat algorithm for the - * nrand() method of Random. - * - */ - double nrand(); - - /** \brief Draws a number from a geometric law - * - * \param param the parameter of the law - * - */ - unsigned int grand(double); - - /** \brief Draws a number from a uniform distribution between 0 and 1 - * - */ - double uniform(); - - /** \brief Gets the current value of the first seed - * - */ - double seed1() const; - - /** \brief Gets the current value of the second seed - * - */ - double seed2() const; - - /** \brief Sets the current value of the first seed - * - */ - void seed1(double); - - /** \brief Sets the current value of the second seed - * - */ - void seed2(double); - - private: - // First seed - double _seed1; - - // Second seed - double _seed2; - - /* since the normal random generator draws two numbers at - * a time, one is cached and returned at any subsequent call - */ - bool b_ncached; - double v_ncached; - - }; -} - -#endif |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/SitePolymorphism.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/SitePolymorphism.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,207 +0,0 @@ -/* - Copyright 2008-2009 Stéphane De Mita, Mathieu Siol - - This file is part of the EggLib library. - - EggLib is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - EggLib is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with EggLib. If not, see <http://www.gnu.org/licenses/>. -*/ - - -#ifndef EGGLIB_SITEPOLYMORPHISM_HPP -#define EGGLIB_SITEPOLYMORPHISM_HPP - - - -namespace egglib { - - - /** \brief Implements diversity analysis at the site level - * - * \ingroup polymorphism - * - * Data are loaded along with a population index. It is necessary to - * set the number of populations prior to use. - * - * Outgroup sequence must be loaded separetedly. There can be any - * number of outgroups, but they must be all consistent otherwise the - * site will be considered as not orientable. - * - */ - class SitePolymorphism { - - public: - - /** \brief Builds an object - * - */ - SitePolymorphism(); - - - /** \brief Builds an object - * - * \param npop number of populations - * - */ - SitePolymorphism(unsigned int npop); - - - /** \brief Destroys an object - * - */ - virtual ~SitePolymorphism(); - - - /** \brief Copy constructor - * - */ - SitePolymorphism(const SitePolymorphism& source); - - - /** \brief Assignment operator - * - */ - SitePolymorphism& operator=(const SitePolymorphism& source); - - - /** \brief Sets the number of populations - * - * NOTE THAT all previous data is lost. - * - */ - void numberOfPopulations(unsigned int npop); - - - /** \brief Adds a character - * - * \param populationIndex the index of the population from - * which is sampled this character (do not use "population - * label"). - * - * \param character the character value (it is assumed it - * represents a valid character. - * - */ - void load(unsigned int populationIndex, char character); - - - /** \brief Loads outgroup state - * - * There can be any number of outgroup states. Only - * characters that are considered as valid (whatever the list - * is) should be loaded. - * - */ - void outgroup(char state); - - - /** \brief Number of different alleles - * - */ - unsigned int numberOfAlleles() const; - - - /** \brief Gets an allele (unsecure) - * - * Assumes that the index provided lies in the valid range - * - */ - char allele(unsigned int index) const; - - - /** \brief Gets a frequency (unsecure) - * - * The sum of of frequencies of the allele over populations - * is computed. Not out-of-bounds check is performed. - * - */ - unsigned int alleleFrequency(unsigned int alleleIndex) const; - - - /** \brief Gets the frequency of an allele in one pop (unsecure) - * - * The frequency of the allele in the given population is - * returned. Not out-of-bounds check is performed. - * - */ - unsigned int alleleFrequency(unsigned int popIndex, unsigned int alleleIndex) const; - - - /** \brief Sums the frequency of derived allele(s) - * - * This method assumes that the site is orientable. It will - * use as outgroup the first outgroup character entered, - * assuming at least one was entered and that all (if more - * than one) were identical. - * - */ - unsigned int derivedAlleleFrequency() const; - - - /** \brief Number of sequences that were analyzed - * - */ - unsigned int ns() const; - - - /** \brief Gets the number of analyzed sequences for a population - * - * No out-of-bound check is performed - * - */ - unsigned int ns(unsigned int popIndex) const; - - - /** \brief Checks if the site can be oriented - * - * Returns true if at least one outgroup datum has been - * loaded, if all outgroup data are identical (regardless of - * their value) and if the outgroup allele is one of the - * allele in the sample. - * - */ - bool isOrientable() const; - - bool isPolymorphic(unsigned int popIndex) const; - bool hasSpecificAllele(unsigned int popIndex, bool restrictToDerived) const; - bool haveFixedDifference(unsigned int pop1, unsigned int pop2) const; - bool haveCommonAllele(unsigned int pop1, unsigned int pop2) const; - bool haveSharedAllele(unsigned int pop1, unsigned int pop2) const; - - - - - protected: - - // helpers - void init(); - void clear(); - void copy(const SitePolymorphism& site); - - - // data - unsigned int m_numberOfPopulations; - unsigned int m_numberOfStates; - char * m_states; - unsigned int ** m_frequencies; - unsigned int m_numberOfOutgroups; - char * m_outgroups; - unsigned int m_ns; - unsigned int * m_pop_ns; - - bool m_cache_orientable; - - }; -} - -#endif |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/Staden.hpp --- a/egglib/egglib-2.1.5/include/egglib-cpp/Staden.hpp Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,140 +0,0 @@ -/* - Copyright 2008-2009 St�phane De Mita, Mathieu Siol - - This file is part of EggLib. - - EggLib is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - EggLib is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with EggLib. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef EGGLIB_STADEN_HPP -#define EGGLIB_STADEN_HPP - -#include <string> -#include <istream> -#include "Align.hpp" - -namespace egglib { - - /** \brief Parser of Staden output format - * - * \ingroup core - * - * The parser is available as a static method. It takes either a - * stream or a string containing data formatted by the program GAP4 - * of the Staden package (command 'dump contig to file'). - * - */ - class Staden { - - public: - - - /** \brief Parses a string - * - * \param string a string containing an alignment formatted - * by the program GAP4 of the Staden package. - * - * \param deleteConsensus if true, the sequence named - * "CONSENSUS" is deleted from the file (if it is present). - * - * \return An Align instance containing the data found in - * the Staden while, after recoding the character following - * the standard codes. - * - * This method opens a stream to the string and calls the - * overloaded method. - * - * The character replacement rules assume Staden default - * convention, as follows: - * - "-" codes for an unknown base and is replaced by "N". - * - "*" codes for an alignment gap and is replaced by "-". - * - A white space represents missing data and is replaced - * by "?". - * - */ - static Align parse(const std::string& string, bool deleteConsensus=true); - - - /** \brief Parses an open stream - * - * \param stream the open containing an alignment formatted - * by the program GAP4 of the Staden package. - * - * \param deleteConsensus if true, the sequence named - * "CONSENSUS" is deleted from the file (if it is present). - * - * \return An Align instance containing the data found in - * the Staden while, after recoding the character following - * the standard codes. - * - * The character replacement rules assume Staden default - * convention, as follows: - * - "-" codes for an unknown base and is replaced by "N". - * - "*" codes for an alignment gap and is replaced by "-". - * - A white space represents missing data and is replaced - * by "?". - * - */ - static Align parse(std::istream& stream, bool deleteConsensus=true); - - - private: - - /// Not allowed to instantiate this class - Staden() { } - - /// Not allowed to instantiate this class - Staden(const Staden& source) { } - - /// Not allowed to instantiate this class - ~Staden() { } - - - /* Gets the start position of sequences - * - * The functions gives total number of characters before the start of sequences - * and reads through until the next backspace (ignores the first line). - */ - static void getShift(); - - // Translates according to the Staden format - static char transforme(char); - - // Imports one sequence - static bool readOneSequence(); - - // Imports and concatenates one sequence - static bool readAppendOneSequence(); - - // Replaces dots by the matching character from CONSENSUS - static void undot(bool delete_consensus=true); - - // The number of characters before the start of sequences - static int shift; - - // The dynamically filled container (will result in an aligment) - static Container container; - - // The current position - static int currpos; - - // The reading stream - static std::istream* stream; - - // Stores unique 8 characters discriminating readings - static std::vector<std::string> ID; - }; -} - -#endif |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/include/egglib-cpp/config.h --- a/egglib/egglib-2.1.5/include/egglib-cpp/config.h Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,80 +0,0 @@ -/* config.h. Generated from config.h.in by configure. */ -/* config.h.in. Generated from configure.ac by autoheader. */ - -/* Define to 1 if you have the <inttypes.h> header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the `bpp-core' library (-lbpp-core). */ -/* #undef HAVE_LIBBPP_CORE */ - -/* Define to 1 if you have the `bpp-seq' library (-lbpp-seq). */ -/* #undef HAVE_LIBBPP_SEQ */ - -/* Define to 1 if you have the <memory.h> header file. */ -#define HAVE_MEMORY_H 1 - -/* Define to 1 if you have the `sqrt' function. */ -/* #undef HAVE_SQRT */ - -/* Define to 1 if stdbool.h conforms to C99. */ -#define HAVE_STDBOOL_H 1 - -/* Define to 1 if you have the <stdint.h> header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the <stdlib.h> header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the <strings.h> header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the <string.h> header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the <sys/stat.h> header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the <sys/types.h> header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the <unistd.h> header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if the system has the type `_Bool'. */ -#define HAVE__BOOL 1 - -/* Name of package */ -#define PACKAGE "egglib-cpp" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "demita@gmail.com" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "EggLib C++ library" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "EggLib C++ library 2.1.5" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "egglib-cpp" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "2.1.5" - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Version number of package */ -#define VERSION "2.1.5" - -/* Define to `__inline__' or `__inline' if that's what the C compiler - calls it, or to nothing if 'inline' is not supported under any name. */ -#ifndef __cplusplus -/* #undef inline */ -#endif - -/* Define to `unsigned int' if <sys/types.h> does not define. */ -/* #undef size_t */ |
b |
diff -r 420b57c3c185 -r feb40a9a8eae egglib/egglib-2.1.5/lib/libegglib-cpp.a |
b |
Binary file egglib/egglib-2.1.5/lib/libegglib-cpp.a has changed |
b |
diff -r 420b57c3c185 -r feb40a9a8eae hapmap2mlmm/HapmapToMLMMFiles.pl --- a/hapmap2mlmm/HapmapToMLMMFiles.pl Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,117 +0,0 @@ -#!/usr/bin/perl - -use strict; -use Switch; -use Getopt::Long; - -my $usage = qq~Usage:$0 <args> [<opts>] -where <args> are: - -h, --hapmap <Hapmap input file> - -m, --map <Map output file> - -g, --geno <Genotype output file> - -p, --path <Path for transpose executable> -~; -$usage .= "\n"; - -my ($hapmap,$map,$geno,$path); - - -GetOptions( - "geno=s" => \$geno, - "map=s" => \$map, - "hapmap=s" => \$hapmap, - "path=s" => \$path, -); - - -die $usage - if ( !$geno || !$map || !$hapmap || !$path); - -my $TRANSPOSE_EXE = "$path/transpose.awk"; - -my @snps; -my %chrom_pos; -my $num_line = 0; -open(my $O,">geno_transposed"); -open(my $H,$hapmap); -while(<$H>) -{ - $num_line++; - my $line = $_; - chomp($line); - $line =~s/\r//g; - $line =~s/\n//g; - my @infos = split(/\t/,$line); - if ($num_line == 1) - { - print $O "Ind_id"; - for (my $i = 11; $i <= $#infos; $i++) - { - my $individual = $infos[$i]; - print $O " " . $individual; - } - print $O "\n"; - } - elsif ($num_line > 1) - { - my $snp = $infos[0]; - my $variation = $infos[1]; - my %scores; - if ($variation =~/(\w)\/(\w)/) - { - my $allele1 = $1; - my $allele2 = $2; - $scores{$allele1} = 0; - $scores{$allele2} = 1; - } - my $chrom = $infos[2]; - my $pos = $infos[3]; - $chrom_pos{$snp}{"chrom"} = $chrom; - $chrom_pos{$snp}{"pos"} = $pos; - push(@snps,$snp); - print $O "$snp"; - for (my $i = 11; $i <= $#infos; $i++) - { - my $genotype = $infos[$i]; - my @alleles = split("",$genotype); - if ($genotype ne "NN") - { - my $score = $scores{$alleles[0]} + $scores{$alleles[1]}; - print $O " $score"; - } - else - { - print $O " NA"; - } - } - print $O "\n"; - } -} -close($H); -close($O); - -open(my $M,">$map"); -print $M "SNP Chr Pos\n"; -foreach my $snp(@snps) -{ - print $M "$snp " . $chrom_pos{$snp}{"chrom"} . " ". $chrom_pos{$snp}{"pos"} . "\n"; -} -close($M); - -system("$TRANSPOSE_EXE geno_transposed >geno_transposed2"); - -open(my $F,">$geno"); -open(my $G,"geno_transposed2"); -while(<$G>) -{ - my $line = $_; - $line =~s/ /\t/g; - print $F $line; -} -close($G); -close($F); - -unlink("geno_transposed"); -unlink("geno_transposed2"); - - |
b |
diff -r 420b57c3c185 -r feb40a9a8eae hapmap2mlmm/HapmapToMLMMFiles.sh --- a/hapmap2mlmm/HapmapToMLMMFiles.sh Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,11 +0,0 @@ -#!/bin/bash -hapmap=$1 -map=$2 -geno=$3 - -directory=`dirname $0` - -perl $directory/HapmapToMLMMFiles.pl -h $hapmap -g $geno -m $map -p $directory - - - |
b |
diff -r 420b57c3c185 -r feb40a9a8eae hapmap2mlmm/HapmapToMLMMFiles.xml --- a/hapmap2mlmm/HapmapToMLMMFiles.xml Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,158 +0,0 @@ -<tool id="hapmap_to_mlmm_files" name="HapmapToMLMMFiles" version="1.1"> - <description>converts a hapmap file into MLMM input files</description> - <command interpreter="bash">./HapmapToMLMMFiles.sh $input $snp_info $genot - </command> - <inputs> - <param format="txt" name="input" type="data" label="Allelic file in Hapmap format" help="Allelic file in Hapmap format"/> - </inputs> - <outputs> - <data format="txt" name="snp_info" label="SNP Info file"/> - <data format="txt" name="genot" label="Genotyping file for MLMM"/> - </outputs> - <!-- [STRONGLY RECOMMANDED] Exit code rules --> - <stdio> - <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> - <exit_code range="1:" level="fatal" /> - </stdio> - - <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> - <tests> - <!-- [HELP] Test files have to be in the ~/test-data directory --> - <!-- [HELP] Multiple tests can be defined with different parameters --> -<!-- - <test> - </test> ---> - </tests> - <help> - - - - -.. class:: infomark - -**Authors** Dereeper Alexis (alexis.dereeper@ird.fr), IRD, South Green platform - - | **Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). - -.. class:: infomark - -**Galaxy integration** South Green. - ---------------------------------------------------- - - -================= -HapmapToMLMMFiles -================= - ------------ -Description ------------ - - | HapmapToMLMMFiles converts a hapmap file into input files compatible with the MLMM software. - - ------------------ -Workflow position ------------------ - -**Upstream tool** - -=============== ====================== =========== -Name output file(s) format -=============== ====================== =========== -VCF to Hapmap Fasta alignment fasta -=============== ====================== =========== - - -**Downstream tool** - -=========== ========================== ======= -Name input file(s) format -=========== ========================== ======= -MLMM -=========== ========================== ======= - - - ----------- -Input file ----------- - -Hapmap file - Allelic file in Hapmap format - - - ------------- -Output files ------------- - -SNP Info file - -Genotyping file for MLMM - - - ---------------------------------------------------- - ---------------- -Working example ---------------- - -Input files -=========== - -Hapmap file ------------ - -:: - - rs# alleles chrom pos strand assembly# center protLSID assayLSID panel QCcode Ind1 Ind2 - SNP1 A/T 1 3102 + assembly NA NA NA speciesname NA AA AA AA - SNP2 A/T 1 4648 + assembly NA NA NA speciesname NA AA AA AA - - -Output files -============ - -SNP Info file -------------- - -:: - - SNP Chr Pos - SNP1 1 3102 - SNP2 1 4648 - SNP3 1 7601 - - -Genotyping file for MLMM ------------------------- - -:: - - Ind_id SNP1 SNP2 SNP3 SNP4 SNP5 SNP6 SNP7 SNP8 SNP9 SNP10 SNP11 SNP12 SNP13 SNP14 - Ind1 0 0 0 0 0 0 2 0 2 0 0 0 2 0 - Ind2 0 0 0 0 0 2 2 0 0 0 0 0 0 0 - - - </help> - <citations> - <!-- [HELP] As DOI or BibTex entry --> - <citation type="bibtex">@article{Dereeper03062015, -author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, -title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, -year = {2015}, -doi = {10.1093/nar/gkv351}, -abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, -URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, -eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, -journal = {Nucleic Acids Research} -} - - </citation> - - </citations> -</tool> |
b |
diff -r 420b57c3c185 -r feb40a9a8eae hapmap2mlmm/transpose.awk --- a/hapmap2mlmm/transpose.awk Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,27 +0,0 @@ -#!/usr/bin/gawk -f - -BEGIN { - max_x =0; - max_y =0; -} - -{ - max_y++; - for( i=1; i<=NF; i++ ) - { - if (i>max_x) max_x=i; - A[i,max_y] = $i; - } -} - -END { - for ( x=1; x<=max_x; x++ ) - { - for ( y=1; y<=max_y; y++ ) - { - if ( (x,y) in A ) printf "%s",A[x,y]; - if ( y!=max_y ) printf " "; - } - printf "\n"; - } -} |
b |
diff -r 420b57c3c185 -r feb40a9a8eae ped2bed/ped2bed.sh --- a/ped2bed/ped2bed.sh Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,20 +0,0 @@ -#!/bin/bash -ped=$1 -map=$2 -bed=$3 -fam=$4 -bim=$5 -logs=$6 - -directory=`dirname $0` -mkdir tmpdir$$ -cp -rf $ped tmpdir$$/input.ped -cp -rf $map tmpdir$$/input.map - -plink --file tmpdir$$/input --out tmpdir$$/out --make-bed --noweb >>$logs 2>&1 - -mv tmpdir$$/out.bed $bed -mv tmpdir$$/out.fam $fam -mv tmpdir$$/out.bim $bim - - |
b |
diff -r 420b57c3c185 -r feb40a9a8eae ped2bed/ped2bed.xml --- a/ped2bed/ped2bed.xml Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,196 +0,0 @@ -<tool id="ped2bed" name="plink: ped2bed" version="1.24"> - <description>Convert ped to bed</description> - <requirements> - <requirement type="package" version="1.07">plink</requirement> - </requirements> - <command interpreter="bash">./ped2bed.sh $ped $map $bed $fam $bim $logs - </command> - <inputs> - <param format="txt" name="ped" type="data" label="Allelic file in PED format" help="Allelic file in PED format"/> - <param format="txt" name="map" type="data" label="Map file" help="Map file"/> - </inputs> - <outputs> - <data format="txt" name="bed" label="Bed file"/> - <data format="txt" name="fam" label="Fam file"/> - <data format="txt" name="bim" label="Bim file"/> - <data format="txt" name="logs" label="All Logs"/> - </outputs> - <!-- [STRONGLY RECOMMANDED] Exit code rules --> - <stdio> - <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> - <exit_code range="1:" level="fatal" /> - </stdio> - - <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> - <tests> - <!-- [HELP] Test files have to be in the ~/test-data directory --> - <!-- [HELP] Multiple tests can be defined with different parameters --> -<!-- - <test> - </test> ---> - </tests> - <help> - - -.. class:: infomark - -**Authors** plink_ - -.. _plink: http://pngu.mgh.harvard.edu/purcell/plink/ - - | "PLINK: a toolset for whole-genome association and population-based linkage analysis.", **Purcell S, Neale B, Todd-Brown K, Thomas L, Ferreira MAR, Bender D, Maller J, Sklar P, de Bakker PIW, Daly MJ, Sham PC.**, American Journal of Human Genetics, 81, 2007. - -.. class:: infomark - -**Galaxy integration** South Green. - -.. class:: infomark - -**Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1). - - ---------------------------------------------------- - - - -======= -Ped2Bed -======= - ------------ -Description ------------ - - | PLINK is a free, open-source whole genome association analysis toolset, designed to perform a range of basic, large-scale analyses in a computationally efficient manner. - | For further informations, please visite the plink website_. - -.. _website: http://pngu.mgh.harvard.edu/purcell/plink/ - - ------------------ -Workflow position ------------------ - -**Upstream tool** - -=============== ========================== =========== -Name output file(s) format -=============== ========================== =========== -VCFtools filter PED and map files ped and map -=============== ========================== =========== - - -**Downstream tool** - -=========== ========================== ======= -Name input file(s) format -=========== ========================== ======= -Admixture Bed, fam and bim file txt -=========== ========================== ======= - - ----------- -Input file ----------- - -PED file - Allelic file in PED format - -MAP file - - - ------------- -Output files ------------- - -Bed file - -Fam file - -Bim file - -All logs - Log file - - ------------- -Dependencies ------------- -plink - version 1.07 - ---------------------------------------------------- - ---------------- -Working example ---------------- - -Input files -=========== - -PED file --------- - -:: - - AZUCENA AZUCENA 0 0 0 0 G G A A C C T T T - BULUPANDAK BULUPANDAK 0 0 0 0 G G A A A A T - -MAP file --------- - -:: - - 0 Chr1:4299 0 4299 - 0 Chr1:26710 0 26710 - 0 Chr1:56184 0 56184 - 0 Chr1:93272 0 93272 - - -Output files -============ - -Bed file --------- - -:: - - binary file - -Fam file --------- - -:: - - AZUCENA AZUCENA 0 0 0 -9 - BULUPANDAK BULUPANDAK 0 0 0 -9 - -Bim file --------- - -:: - - 0 Chr1:4299 0 4299 A G - 0 Chr8:18058 0 18058 C T - - - </help> - <citations> - <!-- [HELP] As DOI or BibTex entry --> - <citation type="bibtex">@article{Dereeper03062015, -author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel}, -title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations}, -year = {2015}, -doi = {10.1093/nar/gkv351}, -abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.}, -URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract}, -eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html}, -journal = {Nucleic Acids Research} -} - - </citation> - - </citations> -</tool> |
b |
diff -r 420b57c3c185 -r feb40a9a8eae tool_dependencies.xml --- a/tool_dependencies.xml Fri Jul 10 04:39:30 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="plink" version="1.07"> - <repository changeset_revision="65400c333b88" name="package_plink_1_07" owner="dereeper" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency> |