Repository 'sniplay'
hg clone https://toolshed.g2.bx.psu.edu/repos/dereeper/sniplay

Changeset 0:3e19d0dfcf3e (2015-03-23)
Next changeset 1:420b57c3c185 (2015-07-10)
Commit message:
Uploaded
added:
MDSplot/MDSbasedOnIBSmatrix.pl
MDSplot/mdsplot.sh
MDSplot/mdsplot.xml
MDSplot/test-data/analyse.ibs_matrix.txt
MDSplot/test-data/analyse.log
MDSplot/test-data/analyse.mds_plot.txt
MDSplot/test-data/input.map
MDSplot/test-data/input.ped
VCFToolFilter/VCFToolsFilter.pl
VCFToolFilter/test-data/result.log
VCFToolFilter/test-data/result.vcf
VCFToolFilter/test-data/sample.vcf
VCFToolFilter/vcfToolsFilter.sh
VCFToolFilter/vcfToolsFilter.xml
VCFToolsStats/VCFToolsStats.pl
VCFToolsStats/test-data/result.TsTv.summary
VCFToolsStats/test-data/result.annotation
VCFToolsStats/test-data/result.het
VCFToolsStats/test-data/result.imiss
VCFToolsStats/test-data/result.log
VCFToolsStats/test-data/sample.vcf
VCFToolsStats/vcfToolsStats.sh
VCFToolsStats/vcfToolsStats.xml
admixture/Admixture.pl
admixture/admixture.sh
admixture/admixture.xml
admixture/transpose.awk
snpEff/SnpEff.pl
snpEff/snpEff-pipe.sh
snpEff/snpEff.xml
tassel/tassel.sh
tassel/tassel.xml
tassel/tool-data/tool_dependencies.xml
tool_dependencies.xml
b
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/MDSbasedOnIBSmatrix.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/MDSplot/MDSbasedOnIBSmatrix.pl Mon Mar 23 05:57:27 2015 -0400
[
@@ -0,0 +1,110 @@
+#!/usr/bin/perl
+
+use strict;
+use Switch;
+use Getopt::Long;
+use Bio::SeqIO;
+
+my $PLINK_EXE= "plink";
+
+my $usage = qq~Usage:$0 <args> [<opts>]
+where <args> are:
+    -i, --in         <input>
+    -o, --out        <output>
+~;
+$usage .= "\n";
+
+my ($in,$out);
+
+
+GetOptions(
+ "in=s"        => \$in,
+ "out=s"       => \$out
+);
+
+die $usage
+  if ( !$in || !$out);
+  
+
+my $plink_command = $PLINK_EXE . " --file $in --noweb --cluster --matrix --mds-plot 2 --out $out >>$in.plink.log 2>&1";
+system($plink_command);
+
+my $awk_cmd = "awk \{\'print \$1\'\} $in.ped";
+my $inds = `$awk_cmd`;
+my @individuals = split("\n",$inds);
+
+my %populations;
+if (-e "$in.individual_info.txt")
+{
+ open(my $I,"$in.individual_info.txt");
+ while(<$I>)
+ {
+ my $line = $_;
+ $line =~s/\n//g;
+ $line =~s/\r//g;
+ my ($ind,$pop) = split(/;/,$line);
+ $populations{$ind} = $pop;
+ }
+ close($I);
+}
+
+open(my $OUT,">$out.mds_plot.txt");
+my $go = 0;
+open(my $O,"$out.mds");
+while(<$O>)
+{
+ if ($go)
+ {
+ my $line = $_;
+ $line =~s/\n//g;
+ $line =~s/\r//g;
+ my @i = split(/\s+/,$line);
+ if ($line =~/^ /)
+ {
+ my $ind = $i[1];
+ my $pop = "Pop1";
+ if ($populations{$ind})
+ {
+ $pop = $populations{$ind};
+ }
+ print $OUT "$pop $ind ".$i[4]." ".$i[5]."\n";
+ }
+ if ($line =~/^\w/)
+ {
+ my $ind = $i[0];
+ my $pop = "Pop1";
+ if ($populations{$ind})
+ {
+ $pop = $populations{$ind};
+ }
+ print $OUT "$pop $ind ".$i[3]." ".$i[4]."\n";
+ }
+
+ }
+ if (/C1/){$go = 1;}
+}
+close($O);
+close($OUT);
+
+
+my $j = 0;
+open(my $IBS,">$out.ibs_matrix.txt");
+print $IBS "Individuals " . join("\t",@individuals)."\n";
+open(my $O2,"$out.mibs");
+while(<$O2>)
+{
+ my $line = $_;
+ $line =~s/\n//g;
+ $line =~s/\r//g;
+ my @i = split(/\s+/,$line);
+ print $IBS $individuals[$j]. " ". join("\t",@i)."\n";
+ $j++;
+}
+close($O2);
+close($IBS);
+
+
+
+
+
+
b
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/mdsplot.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/MDSplot/mdsplot.sh Mon Mar 23 05:57:27 2015 -0400
b
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+tool_path=$(dirname $0)
+ped=$1
+map=$2
+fileout_label=$3
+fileout_matrix=$4
+fileout_plot=$5
+fileout_log=$6
+
+rsync -a $ped input.ped 
+rsync -a $map input.map
+
+perl $tool_path/MDSbasedOnIBSmatrix.pl --in input --out $fileout_label
+
+rm -f input.ped input.map
+
+cp $fileout_label.ibs_matrix.txt $fileout_matrix
+cp $fileout_label.mds_plot.txt $fileout_plot
+cp input.plink.log $fileout_log
+
+
+rm -f $fileout_label.ibs_matrix.txt $fileout_label.mds_plot.txt input.plink.log
b
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/mdsplot.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/MDSplot/mdsplot.xml Mon Mar 23 05:57:27 2015 -0400
[
@@ -0,0 +1,203 @@
+<tool id="sniplay_mdsplot" name="MDS plot" version="1.1.1">
+    
+    <!-- [REQUIRED] Tool description displayed after the tool name -->
+    <description> IBS matrix / multi-dimensional scaling</description>
+    
+    <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->
+    <requirements>
+        <requirement type="binary">perl</requirement>
+ <requirement type="package" version="0.1.13">plink</requirement>
+    </requirements>
+    
+    <!-- [OPTIONAL] Command to be executed to get the tool's version string -->
+    <version_command>
+<!--
+        tool_binary -v
+-->
+    </version_command>
+    
+    <!-- [REQUIRED] The command to execute -->
+    <command interpreter="bash">
+ mdsplot.sh $fileped $filemap $fileout_label $fileout_matrix $fileout_plot $fileout_log
+    </command>
+   
+    <!-- [REQUIRED] Input files and tool parameters -->
+    <inputs>
+ <param name="fileped" type="data" format="txt" optional="false" label="PED input" />
+ <param name="filemap" type="data" format="txt" optional="false" label="MAP input" help="4 columns tabular file: chromosome, snp id, genetic distance, bp position"/>
+ <param name="fileout_label" type="text" value="analyse" label="Output name" help="Output name for tabular files" />
+    </inputs>
+    
+    <!-- [REQUIRED] Output files -->
+    <outputs>
+ <data name="fileout_matrix" type="data" format="tabular" label="${fileout_label}.ibs_matrix.txt" />
+ <data name="fileout_plot" type="data" format="tabular" label="${fileout_label}.mds_plot.txt" />
+ <data name="fileout_log" type="data" format="txt" label="${fileout_label}.log" />
+    </outputs>
+    
+    <!-- [STRONGLY RECOMMANDED] Exit code rules -->
+    <stdio>
+        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+    
+    <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
+    <tests>
+        <!-- [HELP] Test files have to be in the ~/test-data directory -->
+
+        <test>
+         <param name="fileped" value="input.ped" />
+  <param name="filemap" value="input.map" />
+         <output name="fileout_matrix" file="output.ibs_matrix.txt" />
+  <output name="fileout_plot" file="output.mds_plot.txt" />
+  <output name="fileout_log" file="output.log" />
+        </test>
+
+        <!-- [HELP] Multiple tests can be defined with different parameters -->
+<!--
+        <test>
+        </test>
+-->
+    </tests>
+    
+    <!-- [OPTIONAL] Help displayed in Galaxy -->
+    <help>
+
+.. class:: infomark
+
+**Authors** 
+
+---------------------------------------------------
+
+.. class:: infomark
+
+**Please cite** If you use this tool, please cite dereeper et al. 2015 in prep.
+
+---------------------------------------------------
+
+========
+MDS plot
+========
+
+-----------
+Description
+-----------
+
+  Compute an IBS matrix and a multi-dimensional scaling.
+
+
+-----------------
+Workflow position
+-----------------
+
+**Upstream tools**
+
+=========== ========================== =======
+Name            output file(s)         format 
+=========== ========================== =======
+=========== ========================== =======
+
+
+**Downstream tools**
+
+=========== ========================== =======
+Name            output file(s)         format
+=========== ========================== =======
+=========== ========================== =======
+
+
+----------
+Input file
+----------
+
+PED file
+
+MAP file
+ 4 columns tabular file: chromosome, snp id, genetic distance, bp position 
+
+
+----------
+Parameters
+----------
+
+Output name
+        Output base name for the ouput files
+
+
+------------
+Output files
+------------
+
+Output_name.ibs_matrix.txt
+ Tabular file with IBS matrix 
+
+Output_name.mds_plot.txt
+ File to construct mds plot
+
+Output_name.log
+ Log file
+
+
+---------------------------------------------------
+
+---------------
+Working example
+---------------
+
+Input files
+===========
+
+PED file
+-----------
+
+::
+
+ IRAT112 1 0 0 1 1 1 1 4 4 ...
+ IAC25 1 0 0 1 1 1 1 4 4 ...
+ CIRAD409 1 0 0 1 1 3 3 1 1 ...
+
+
+MAP file
+-----------
+
+::
+
+ Chr1 Chr1:4299 0 4299
+ Chr1 Chr1:26710 0 26710
+ Chr1 Chr1:56184 0 56184
+ Chr1 Chr1:93272 0 93272
+
+
+
+Parameters
+==========
+
+Output name -> densities
+
+
+Output files
+============
+
+densities.ibs_matrix.txt
+------------------------
+
+::
+
+ Individuals IRAT112 IAC25 IAC165 KARASUKARASURANKASU DOURADOPRECOCE ...
+ IRAT112 1 0.93691 0.937407 0.734724 0.943368 ...
+ IAC25 0.93691 1 0.958768 0.723299 0.965723 ...
+
+
+densities.mds_plot.txt
+----------------------
+
+::
+
+ IRAT112 -0.0969382 0.0376036
+ IAC25 -0.0918126 0.0501177
+
+
+
+    </help>
+    
+</tool>
b
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/test-data/analyse.ibs_matrix.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/MDSplot/test-data/analyse.ibs_matrix.txt Mon Mar 23 05:57:27 2015 -0400
b
b'@@ -0,0 +1,94 @@\n+Individuals\tIRAT112\tIAC25\tIAC165\tKARASUKARASURANKASU\tDOURADOPRECOCE\tCUIABANA\tBICOBRANCO\tCAAWA/FORTUNA6\tCANELADEFERRO\tCIRAD358\tCOLOMBIA1\tCIRAD409\tBAGANANASALAO\tBAKUNGH\tFOHISOMOTRA\tKAKANI2\tGUARANI\tDOURADOAGULHA\tIRAT13\tDAWASANRED\tGRAZI\tIRAT144\tIAC47\tMOROBEREKAN\tIRAT362\tGIZA171\tIDSA77\tIRAT216\tIRAT177\tCHAPHUMA\tCIRAD392\tARROZCEBADA\tINDANE\tDINORADO\t63-104\tIR63380-16\tCANAROXA\tCICIHBETON\tIR68704-145-1-1-B\tHAWMOM\tIRAT364\tIRAT212\tKHAODAM\t62667\tIRAT234\tNIPPONBARE_D\tJUMULA2\tCUBA65\tBABER\tCIRAD488\tBENGALYVAKARINA\tESPERANZA\tBULUPANDAK\tARIAS\tGOMPA2\tCT13582-15-5-M\tIRAT335\tM202\tIR65907-188-1-B\tIR71525-19-1-1\tCNA-7_BO_1_1_33-13-6-1\tIRAT257\tIRAT109\tKANIRANGA\tIR66421-096-2-1-1\tGOGO\tGEMJYAJYANAM\tDANGREY\tBINULAWAN\tCAIAPO\tIGUAPECATETO\tIRAT170\tIRAT380\tARAGUAIA\tGOGOLEMPAK\tJAOHAW\tCHALOYOE\tGANIGI\tGUNDILKUNING\tIRAT2\tIRAT366\tIRAT104\tDAVAO\tHD1-4\tCURINCA\tDAM\tCIRAD403\tJUMALI\tJIMBRUKJOLOWORO\tGOGOLEMPUK\tCIRAD394\tIR63372-08\tIR60080-46A\n+IRAT112\t1\t0.93691\t0.937407\t0.734724\t0.943368\t0.819672\t0.903626\t0.850969\t0.842524\t0.802285\t0.717834\t0.836562\t0.804272\t0.832588\t0.81073\t0.614009\t0.946846\t0.943368\t0.944362\t0.688525\t0.807253\t0.712866\t0.929955\t0.782414\t0.902136\t0.692499\t0.817188\t0.796821\t0.943865\t0.697963\t0.790859\t0.787879\t0.704918\t0.833582\t0.917039\t0.769001\t0.938897\t0.682563\t0.814704\t0.707402\t0.859911\t0.787879\t0.709389\t0.945355\t0.883259\t0.703924\t0.697963\t0.843517\t0.725286\t0.725782\t0.84302\t0.876304\t0.69995\t0.74466\t0.651267\t0.836066\t0.958271\t0.651764\t0.755092\t0.859911\t0.861898\t0.905614\t0.892201\t0.739692\t0.828117\t0.80924\t0.650273\t0.644312\t0.860407\t0.853949\t0.922504\t0.944858\t0.81073\t0.908097\t0.756085\t0.674118\t0.744163\t0.785892\t0.698957\t0.944858\t0.80775\t0.857427\t0.838053\t0.931446\t0.831098\t0.721311\t0.798808\t0.592151\t0.739692\t0.757079\t0.767014\t0.781918\t0.899155\n+IAC25\t0.93691\t1\t0.958768\t0.723299\t0.965723\t0.825137\t0.93691\t0.836562\t0.850969\t0.772976\t0.713363\t0.807253\t0.791853\t0.819175\t0.814208\t0.602583\t0.934426\t0.966716\t0.904123\t0.676105\t0.817685\t0.706408\t0.948336\t0.811724\t0.892697\t0.693989\t0.819672\t0.784401\t0.902633\t0.677596\t0.806259\t0.812221\t0.717337\t0.813214\t0.876801\t0.757576\t0.923497\t0.706905\t0.806259\t0.69995\t0.833582\t0.777447\t0.693989\t0.904123\t0.896672\t0.703428\t0.694486\t0.813214\t0.728763\t0.747144\t0.82464\t0.864878\t0.710383\t0.758073\t0.643815\t0.834575\t0.925981\t0.674118\t0.745653\t0.839543\t0.845504\t0.900149\t0.851962\t0.764034\t0.817685\t0.814704\t0.655738\t0.644809\t0.819175\t0.834575\t0.90313\t0.90462\t0.838053\t0.928465\t0.751615\t0.666667\t0.733731\t0.800298\t0.707402\t0.902633\t0.805266\t0.846995\t0.822653\t0.955787\t0.809737\t0.700944\t0.788376\t0.591654\t0.750124\t0.751615\t0.790363\t0.769498\t0.879781\n+IAC165\t0.937407\t0.958768\t1\t0.722802\t0.929459\t0.839543\t0.932439\t0.862891\t0.85544\t0.786388\t0.739692\t0.817685\t0.790363\t0.817685\t0.822653\t0.596125\t0.968703\t0.929459\t0.920517\t0.677596\t0.832091\t0.704918\t0.9846\t0.822156\t0.92002\t0.688525\t0.834078\t0.794834\t0.921013\t0.685047\t0.805763\t0.823646\t0.717834\t0.826627\t0.895181\t0.769995\t0.945852\t0.688525\t0.825633\t0.694486\t0.846001\t0.77993\t0.697466\t0.920517\t0.90611\t0.694983\t0.689021\t0.809737\t0.723299\t0.745653\t0.810233\t0.87233\t0.692002\t0.76155\t0.636364\t0.849975\t0.91853\t0.66766\t0.755092\t0.866865\t0.855936\t0.916542\t0.868356\t0.746647\t0.822156\t0.813214\t0.651267\t0.632389\t0.82464\t0.844014\t0.927471\t0.921013\t0.832588\t0.963736\t0.774963\t0.661202\t0.743169\t0.796821\t0.702931\t0.921013\t0.800795\t0.864382\t0.80924\t0.98609\t0.821162\t0.709389\t0.797814\t0.588177\t0.735718\t0.774963\t0.783905\t0.780924\t0.901143\n+KARASUKARASURANKASU\t0.734724\t0.723299\t0.722802\t1\t0.73075\t0.748137\t0.745653\t0.766518\t0.747144\t0.796324\t0.673125\t0.738202\t0.774466\t0.753105\t0.780924\t0.630899\t0.731247\t0.73075\t0.769498\t0.802782\t0.769498\t0.677099\t0.73224\t0.749627\t0.754098\t0.817685\t0.715847\t0.721311\t0.769001\t0.837059\t0.777943\t0.775956\t0.827124\t0.740189\t0.778937\t0.707402\t0.743169\t0.675609\t0.704421\t0.80775\t0.780427\t0.748137\t0.879285\t0.770492\t0.759066\t0.828117\t0.731744\t0.743169\t0.761053\t0.669151\t0.764531\t0.71237\t0.693989\t0.737705\t0.668157\t0.735718\t0.723795\t0.781918\t0.728266\t0.764531\t0.750621\t0.741679\t0.731247\t0.689'..b'\t0.74764\t0.723795\t0.61997\t0.77546\t0.692002\t0.730253\t0.850969\t0.703428\t0.672628\t0.654247\t0.640835\t0.722305\t0.694983\t0.726279\t0.74615\t0.727769\t0.724292\t0.71535\t0.653751\t0.740189\t0.724292\t0.656731\t0.687531\t0.811227\t0.726279\t0.688525\t0.723795\t0.800795\t0.680079\t0.663189\t0.748137\t0.658718\t0.663189\t0.791356\t0.721311\t0.737705\t0.759563\t0.631396\t0.682067\t0.741679\t0.647789\t0.671634\t0.77844\t0.689021\t0.774466\t0.682563\t0.865872\t0.707899\t0.814208\t0.616493\t0.60457\t0.774963\t0.726776\t0.736711\t0.724292\t0.733234\t0.709389\t0.77993\t0.691008\t0.679583\t0.775956\t0.727769\t0.723299\t0.796821\t0.777943\t0.763537\t0.734724\t0.651267\t0.678589\t0.643815\t0.600099\t1\t0.77993\t0.680576\t0.641828\t0.739195\n+GOGOLEMPUK\t0.757079\t0.751615\t0.774963\t0.710383\t0.756085\t0.791356\t0.783905\t0.780924\t0.771485\t0.801788\t0.63686\t0.69995\t0.750124\t0.79533\t0.835072\t0.597615\t0.789369\t0.756085\t0.782911\t0.727769\t0.801788\t0.685544\t0.787382\t0.852459\t0.776453\t0.693989\t0.669647\t0.673125\t0.782414\t0.71535\t0.717834\t0.79533\t0.730253\t0.754595\t0.77546\t0.685047\t0.790363\t0.748634\t0.706905\t0.729757\t0.795827\t0.765524\t0.725782\t0.783905\t0.815201\t0.697466\t0.689518\t0.729757\t0.73075\t0.693492\t0.795827\t0.715847\t0.777943\t0.738202\t0.645802\t0.72926\t0.762047\t0.643318\t0.674118\t0.813711\t0.751118\t0.782911\t0.733731\t0.840537\t0.772976\t0.789866\t0.73224\t0.718331\t0.780427\t0.743169\t0.761053\t0.783408\t0.722802\t0.749627\t0.998013\t0.734228\t0.721808\t0.838053\t0.785892\t0.783408\t0.808246\t0.853949\t0.759066\t0.787879\t0.660705\t0.728763\t0.675112\t0.577745\t0.77993\t1\t0.677099\t0.675112\t0.791356\n+CIRAD394\t0.767014\t0.790363\t0.783905\t0.761053\t0.778937\t0.725782\t0.783905\t0.742176\t0.750621\t0.693492\t0.725286\t0.755589\t0.71535\t0.742673\t0.705912\t0.677099\t0.764531\t0.778937\t0.803775\t0.700944\t0.727273\t0.718331\t0.781421\t0.717337\t0.811227\t0.834078\t0.755092\t0.759563\t0.803279\t0.692499\t0.865872\t0.736711\t0.685544\t0.76155\t0.813214\t0.787382\t0.784401\t0.674118\t0.73075\t0.680079\t0.723299\t0.794337\t0.706905\t0.804769\t0.793343\t0.838549\t0.783905\t0.729757\t0.722802\t0.748137\t0.752111\t0.770492\t0.676602\t0.714357\t0.733234\t0.783905\t0.742176\t0.79235\t0.745653\t0.735221\t0.813711\t0.766021\t0.764531\t0.699454\t0.790859\t0.761053\t0.690512\t0.685544\t0.738698\t0.789866\t0.76304\t0.804272\t0.824143\t0.758569\t0.676105\t0.69846\t0.723795\t0.693989\t0.680576\t0.803279\t0.655241\t0.752608\t0.728266\t0.781918\t0.745156\t0.702931\t0.763537\t0.645306\t0.680576\t0.677099\t1\t0.773472\t0.781421\n+IR63372-08\t0.781918\t0.769498\t0.780924\t0.735221\t0.76006\t0.712866\t0.778937\t0.796821\t0.74764\t0.697466\t0.868356\t0.944362\t0.747144\t0.729757\t0.717834\t0.605564\t0.781421\t0.761053\t0.834575\t0.707899\t0.728266\t0.642822\t0.77844\t0.702434\t0.835072\t0.700944\t0.945852\t0.981123\t0.835072\t0.700447\t0.763537\t0.741679\t0.710383\t0.74466\t0.836066\t0.93542\t0.798311\t0.619473\t0.846001\t0.698957\t0.757079\t0.721808\t0.720815\t0.835569\t0.773472\t0.71237\t0.683557\t0.742673\t0.707899\t0.690512\t0.719324\t0.769498\t0.61699\t0.697466\t0.634873\t0.85544\t0.759066\t0.690015\t0.963239\t0.767014\t0.849478\t0.757079\t0.798311\t0.660705\t0.837556\t0.750124\t0.663686\t0.672628\t0.745653\t0.764034\t0.753105\t0.835072\t0.760556\t0.79235\t0.674118\t0.687531\t0.711873\t0.691008\t0.627919\t0.835072\t0.670144\t0.746647\t0.72926\t0.77993\t0.938897\t0.709886\t0.981123\t0.592648\t0.641828\t0.675112\t0.773472\t1\t0.812221\n+IR60080-46A\t0.899155\t0.879781\t0.901143\t0.753105\t0.867362\t0.807253\t0.869349\t0.878291\t0.845007\t0.808743\t0.738202\t0.861898\t0.787879\t0.817188\t0.84004\t0.617486\t0.924491\t0.868356\t0.946846\t0.702931\t0.799801\t0.769001\t0.898659\t0.818679\t0.934426\t0.704918\t0.801788\t0.82613\t0.946349\t0.722305\t0.799305\t0.812221\t0.714357\t0.837059\t0.919523\t0.797317\t0.925484\t0.702931\t0.846001\t0.725782\t0.861401\t0.84302\t0.742673\t0.947839\t0.865872\t0.711376\t0.719324\t0.835072\t0.725782\t0.741182\t0.80775\t0.858917\t0.721311\t0.762047\t0.672628\t0.882265\t0.880278\t0.680079\t0.806259\t0.942871\t0.891207\t0.923\t0.903626\t0.743169\t0.832588\t0.813711\t0.674615\t0.66468\t0.842027\t0.871336\t0.87233\t0.947342\t0.812221\t0.871833\t0.790363\t0.718331\t0.743666\t0.798311\t0.722305\t0.947342\t0.79533\t0.910581\t0.837556\t0.901143\t0.785892\t0.726776\t0.828117\t0.603577\t0.739195\t0.791356\t0.781421\t0.812221\t1\n'
b
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/test-data/analyse.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/MDSplot/test-data/analyse.log Mon Mar 23 05:57:27 2015 -0400
[
@@ -0,0 +1,56 @@
+
+@----------------------------------------------------------@
+|        PLINK!       |     v1.07      |   10/Aug/2009     |
+|----------------------------------------------------------|
+|  (C) 2009 Shaun Purcell, GNU General Public License, v2  |
+|----------------------------------------------------------|
+|  For documentation, citation & bug-report instructions:  |
+|        http://pngu.mgh.harvard.edu/purcell/plink/        |
+@----------------------------------------------------------@
+
+Skipping web check... [ --noweb ] 
+Writing this text to log file [ analyse.log ]
+Analysis started: Tue Jan 20 09:30:56 2015
+
+Options in effect:
+ --file input
+ --noweb
+ --cluster
+ --matrix
+ --mds-plot 2
+ --out analyse
+
+2013 (of 2013) markers to be included from [ input.map ]
+93 individuals read from [ input.ped ] 
+93 individuals with nonmissing phenotypes
+Assuming a disease phenotype (1=unaff, 2=aff, 0=miss)
+Missing phenotype value is also -9
+0 cases, 93 controls and 0 missing
+93 males, 0 females, and 0 of unspecified sex
+Before frequency and genotyping pruning, there are 2013 SNPs
+Converting data to SNP-major format
+93 founders and 0 non-founders found
+Total genotyping rate in remaining individuals is 1
+0 SNPs failed missingness test ( GENO > 1 )
+0 SNPs failed frequency test ( MAF < 0 )
+Converting data to Individual-major format
+After frequency and genotyping pruning, there are 2013 SNPs
+After filtering, 0 cases, 93 controls and 0 missing
+After filtering, 93 males, 0 females, and 0 of unspecified sex
+
+ **Warning** this analysis typically requires whole-genome level data
+             to give accurate results 
+
+Clustering individuals based on genome-wide IBS
+Merge distance p-value constraint = 0
+IBS(g) calculation: 0 of 4278          IBS(g) calculation: 100 of 4278          IBS(g) calculation: 200 of 4278          IBS(g) calculation: 300 of 4278          IBS(g) calculation: 400 of 4278          IBS(g) calculation: 500 of 4278          IBS(g) calculation: 600 of 4278          IBS(g) calculation: 700 of 4278          IBS(g) calculation: 800 of 4278          IBS(g) calculation: 900 of 4278          IBS(g) calculation: 1000 of 4278          IBS(g) calculation: 1100 of 4278          IBS(g) calculation: 1200 of 4278          IBS(g) calculation: 1300 of 4278          IBS(g) calculation: 1400 of 4278          IBS(g) calculation: 1500 of 4278          IBS(g) calculation: 1600 of 4278          IBS(g) calculation: 1700 of 4278          IBS(g) calculation: 1800 of 4278          IBS(g) calculation: 1900 of 4278          IBS(g) calculation: 2000 of 4278          IBS(g) calculation: 2100 of 4278          IBS(g) calculation: 2200 of 4278          IBS(g) calculation: 2300 of 4278          IBS(g) calculation: 2400 of 4278          IBS(g) calculation: 2500 of 4278          IBS(g) calculation: 2600 of 4278          IBS(g) calculation: 2700 of 4278          IBS(g) calculation: 2800 of 4278          IBS(g) calculation: 2900 of 4278          IBS(g) calculation: 3000 of 4278          IBS(g) calculation: 3100 of 4278          IBS(g) calculation: 3200 of 4278          IBS(g) calculation: 3300 of 4278          IBS(g) calculation: 3400 of 4278          IBS(g) calculation: 3500 of 4278          IBS(g) calculation: 3600 of 4278          IBS(g) calculation: 3700 of 4278          IBS(g) calculation: 3800 of 4278          IBS(g) calculation: 3900 of 4278          IBS(g) calculation: 4000 of 4278          IBS(g) calculation: 4100 of 4278          IBS(g) calculation: 4200 of 4278          Writing IBS similarity matrix to [ analyse.mibs ]
+Of these, 4278 are pairable based on constraints
+Writing cluster progress to [ analyse.cluster0 ]
+Writing cluster solution (1) [ analyse.cluster1 ]
+Writing cluster solution (2) [ analyse.cluster2 ]
+Writing cluster solution (3) [ analyse.cluster3 ]
+Writing MDS solution to [ analyse.mds ] 
+MDS plot of individuals (not clusters)
+
+Analysis finished: Tue Jan 20 09:30:57 2015
+
b
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/test-data/analyse.mds_plot.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/MDSplot/test-data/analyse.mds_plot.txt Mon Mar 23 05:57:27 2015 -0400
b
@@ -0,0 +1,93 @@
+IRAT112 -0.0969382 0.0376036
+IAC25 -0.0918126 0.0501177
+IAC165 -0.104815 0.0453876
+KARASUKARASURANKASU 0.107782 -0.0683546
+DOURADOPRECOCE -0.0831697 0.0516412
+CUIABANA 0.0327877 0.0497752
+BICOBRANCO -0.0554177 0.0501857
+CAAWA/FORTUNA6 -0.052355 0.0170584
+CANELADEFERRO -0.0142283 0.0598394
+CIRAD358 0.0497841 0.0645487
+COLOMBIA1 -0.110347 -0.172742
+CIRAD409 -0.113614 -0.105898
+BAGANANASALAO 0.0247582 0.0186246
+BAKUNGH 0.0199022 0.0613692
+FOHISOMOTRA 0.0344309 0.0697592
+KAKANI2 0.147713 -0.0780455
+GUARANI -0.0892761 0.0387007
+DOURADOAGULHA -0.0831373 0.0514523
+IRAT13 -0.0757215 0.00312413
+DAWASANRED 0.131309 -0.102364
+GRAZI 0.0177091 0.0870472
+IRAT144 0.0146443 0.0067734
+IAC47 -0.0968934 0.0477667
+MOROBEREKAN 0.0395544 0.0952695
+IRAT362 -0.0921075 -0.00860852
+GIZA171 0.14498 -0.0932001
+IDSA77 -0.122827 -0.120219
+IRAT216 -0.120205 -0.136803
+IRAT177 -0.0762395 0.00230332
+CHAPHUMA 0.149543 -0.0680637
+CIRAD392 0.0187265 -0.047646
+ARROZCEBADA 0.0110416 0.0787137
+INDANE 0.108691 -0.0338065
+DINORADO -0.00819237 0.0386578
+63-104 -0.053974 -0.00204494
+IR63380-16 -0.0968028 -0.0931822
+CANAROXA -0.0756689 0.0253328
+CICIHBETON 0.108857 0.142633
+IR68704-145-1-1-B -0.103292 -0.071382
+HAWMOM 0.129651 -0.0473323
+IRAT364 -0.0118523 0.0472973
+IRAT212 0.0231616 -0.00146178
+KHAODAM 0.123936 -0.0654516
+62667 -0.0752552 0.00278556
+IRAT234 -0.0346607 0.0551203
+NIPPONBARE_D 0.133199 -0.0963396
+JUMULA2 0.104558 -0.0524994
+CUBA65 -0.0124835 0.013679
+BABER 0.090299 -0.0460696
+CIRAD488 -0.0201481 -0.0262103
+BENGALYVAKARINA 0.028631 0.0637662
+ESPERANZA -0.108888 -0.00812147
+BULUPANDAK 0.119762 0.143912
+ARIAS 0.0327948 0.0827878
+GOMPA2 0.133341 -0.0543414
+CT13582-15-5-M -0.0922561 -0.0543988
+IRAT335 -0.0886849 0.0522465
+M202 0.13463 -0.106462
+IR65907-188-1-B -0.0755893 -0.136821
+IR71525-19-1-1 -0.0353444 0.0478985
+CNA-7_BO_1_1_33-13-6-1 -0.0823478 -0.0435581
+IRAT257 -0.0606191 0.0564989
+IRAT109 -0.1006 -0.00959445
+KANIRANGA 0.0705459 0.129888
+IR66421-096-2-1-1 -0.00859728 -0.0570793
+GOGO 0.0232414 0.0351889
+GEMJYAJYANAM 0.170032 -0.0857315
+DANGREY 0.175792 -0.113683
+BINULAWAN -0.0208225 0.0611919
+CAIAPO -0.0149329 -0.0122987
+IGUAPECATETO -0.0740464 0.0552386
+IRAT170 -0.0756928 0.00297643
+IRAT380 -0.0210318 -0.00092536
+ARAGUAIA -0.123443 0.0273298
+GOGOLEMPAK 0.0598291 0.107462
+JAOHAW 0.17563 -0.0587865
+CHALOYOE 0.0904761 -0.0565325
+GANIGI 0.0526286 0.0960475
+GUNDILKUNING 0.116788 0.130234
+IRAT2 -0.0762849 0.00262428
+IRAT366 0.0273652 0.13551
+IRAT104 -0.00310702 0.0636479
+DAVAO -0.00538403 0.0564886
+HD1-4 -0.0960109 0.0459137
+CURINCA -0.128876 -0.121141
+DAM 0.129029 -0.0660183
+CIRAD403 -0.117849 -0.134651
+JUMALI 0.123992 -0.0843623
+JIMBRUKJOLOWORO 0.0549906 0.135017
+GOGOLEMPUK 0.0597267 0.10733
+CIRAD394 0.0040454 -0.0885135
+IR63372-08 -0.0951857 -0.140804
+IR60080-46A -0.0732581 0.0197832
b
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/test-data/input.map
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/MDSplot/test-data/input.map Mon Mar 23 05:57:27 2015 -0400
b
b'@@ -0,0 +1,2013 @@\n+Chr1 Chr1:4299 0 4299\n+Chr1 Chr1:26710 0 26710\n+Chr1 Chr1:56184 0 56184\n+Chr1 Chr1:93272 0 93272\n+Chr1 Chr1:93274 0 93274\n+Chr1 Chr1:146252 0 146252\n+Chr1 Chr1:171433 0 171433\n+Chr1 Chr1:186286 0 186286\n+Chr1 Chr1:198907 0 198907\n+Chr1 Chr1:205550 0 205550\n+Chr1 Chr1:206108 0 206108\n+Chr1 Chr1:214680 0 214680\n+Chr1 Chr1:214775 0 214775\n+Chr1 Chr1:215711 0 215711\n+Chr1 Chr1:217307 0 217307\n+Chr1 Chr1:238821 0 238821\n+Chr1 Chr1:240448 0 240448\n+Chr1 Chr1:240579 0 240579\n+Chr1 Chr1:300528 0 300528\n+Chr1 Chr1:312532 0 312532\n+Chr1 Chr1:337330 0 337330\n+Chr1 Chr1:351841 0 351841\n+Chr1 Chr1:353617 0 353617\n+Chr1 Chr1:405193 0 405193\n+Chr1 Chr1:405217 0 405217\n+Chr1 Chr1:409167 0 409167\n+Chr1 Chr1:415515 0 415515\n+Chr1 Chr1:424861 0 424861\n+Chr1 Chr1:425116 0 425116\n+Chr1 Chr1:429660 0 429660\n+Chr1 Chr1:439393 0 439393\n+Chr1 Chr1:456979 0 456979\n+Chr1 Chr1:479030 0 479030\n+Chr1 Chr1:479033 0 479033\n+Chr1 Chr1:525146 0 525146\n+Chr1 Chr1:535685 0 535685\n+Chr1 Chr1:536093 0 536093\n+Chr1 Chr1:572628 0 572628\n+Chr1 Chr1:577906 0 577906\n+Chr1 Chr1:630114 0 630114\n+Chr1 Chr1:653644 0 653644\n+Chr1 Chr1:660019 0 660019\n+Chr1 Chr1:660070 0 660070\n+Chr1 Chr1:715091 0 715091\n+Chr1 Chr1:715130 0 715130\n+Chr1 Chr1:725844 0 725844\n+Chr1 Chr1:735420 0 735420\n+Chr1 Chr1:735757 0 735757\n+Chr1 Chr1:735810 0 735810\n+Chr1 Chr1:746855 0 746855\n+Chr1 Chr1:753267 0 753267\n+Chr1 Chr1:754061 0 754061\n+Chr1 Chr1:769014 0 769014\n+Chr1 Chr1:782878 0 782878\n+Chr1 Chr1:787761 0 787761\n+Chr1 Chr1:789186 0 789186\n+Chr1 Chr1:792359 0 792359\n+Chr1 Chr1:805716 0 805716\n+Chr1 Chr1:841793 0 841793\n+Chr1 Chr1:845425 0 845425\n+Chr1 Chr1:847587 0 847587\n+Chr1 Chr1:853188 0 853188\n+Chr1 Chr1:853218 0 853218\n+Chr1 Chr1:854818 0 854818\n+Chr1 Chr1:855663 0 855663\n+Chr1 Chr1:859613 0 859613\n+Chr1 Chr1:859635 0 859635\n+Chr1 Chr1:917609 0 917609\n+Chr1 Chr1:917933 0 917933\n+Chr1 Chr1:925316 0 925316\n+Chr1 Chr1:930060 0 930060\n+Chr1 Chr1:930069 0 930069\n+Chr1 Chr1:937007 0 937007\n+Chr1 Chr1:937014 0 937014\n+Chr1 Chr1:938444 0 938444\n+Chr1 Chr1:938619 0 938619\n+Chr1 Chr1:961472 0 961472\n+Chr1 Chr1:961537 0 961537\n+Chr1 Chr1:963122 0 963122\n+Chr1 Chr1:963313 0 963313\n+Chr1 Chr1:973080 0 973080\n+Chr1 Chr1:986764 0 986764\n+Chr1 Chr1:1004926 0 1004926\n+Chr1 Chr1:1009979 0 1009979\n+Chr1 Chr1:1011056 0 1011056\n+Chr1 Chr1:1011714 0 1011714\n+Chr1 Chr1:1012017 0 1012017\n+Chr1 Chr1:1012026 0 1012026\n+Chr1 Chr1:1012111 0 1012111\n+Chr1 Chr1:1026043 0 1026043\n+Chr1 Chr1:1083377 0 1083377\n+Chr1 Chr1:1095742 0 1095742\n+Chr1 Chr1:1124378 0 1124378\n+Chr1 Chr1:1125022 0 1125022\n+Chr1 Chr1:1167717 0 1167717\n+Chr1 Chr1:1172746 0 1172746\n+Chr1 Chr1:1173064 0 1173064\n+Chr1 Chr1:1180585 0 1180585\n+Chr1 Chr1:1182388 0 1182388\n+Chr1 Chr1:1184650 0 1184650\n+Chr1 Chr1:1188838 0 1188838\n+Chr1 Chr1:1197390 0 1197390\n+Chr1 Chr1:1197550 0 1197550\n+Chr1 Chr1:1197663 0 1197663\n+Chr1 Chr1:1208561 0 1208561\n+Chr1 Chr1:1229816 0 1229816\n+Chr1 Chr1:1250047 0 1250047\n+Chr1 Chr1:1286631 0 1286631\n+Chr1 Chr1:1303497 0 1303497\n+Chr1 Chr1:1306055 0 1306055\n+Chr1 Chr1:1306058 0 1306058\n+Chr1 Chr1:1306085 0 1306085\n+Chr1 Chr1:1318689 0 1318689\n+Chr1 Chr1:1358972 0 1358972\n+Chr1 Chr1:1359766 0 1359766\n+Chr1 Chr1:1359769 0 1359769\n+Chr1 Chr1:1365854 0 1365854\n+Chr1 Chr1:1404921 0 1404921\n+Chr1 Chr1:1407386 0 1407386\n+Chr1 Chr1:1443131 0 1443131\n+Chr1 Chr1:1446645 0 1446645\n+Chr1 Chr1:1486728 0 1486728\n+Chr1 Chr1:1492913 0 1492913\n+Chr1 Chr1:1496524 0 1496524\n+Chr1 Chr1:1509728 0 1509728\n+Chr1 Chr1:1515087 0 1515087\n+Chr1 Chr1:1546579 0 1546579\n+Chr1 Chr1:1551208 0 1551208\n+Chr1 Chr1:1563026 0 1563026\n+Chr1 Chr1:1563029 0 1563029\n+Chr1 Chr1:1565548 0 1565548\n+Chr1 Chr1:1598133 0 1598133\n+Chr1 Chr1:1627278 0 1627278\n+Chr1 Chr1:1627327 0 1627327\n+Chr1 Chr1:1632085 0 1632085\n+Chr1 Chr1:1655772 0 1655772\n+Chr1 Chr1:1655775 0 1655775\n+Chr1 Chr1:1671697 0 1671697\n+Chr1 Chr1:1685316 0 1685316\n+Chr1 Chr1:1687456 0 1687456\n+Chr1 Chr1:1703238 0 1703238\n+Chr1 Chr1:1703264 0 1703264\n+Chr1 Chr1:1722207 0 1722207\n+Chr1 Chr'..b'+Chr1 Chr1:41945638 0 41945638\n+Chr1 Chr1:41954934 0 41954934\n+Chr1 Chr1:41958052 0 41958052\n+Chr1 Chr1:41961218 0 41961218\n+Chr1 Chr1:41961221 0 41961221\n+Chr1 Chr1:41965592 0 41965592\n+Chr1 Chr1:41965659 0 41965659\n+Chr1 Chr1:41968928 0 41968928\n+Chr1 Chr1:41968936 0 41968936\n+Chr1 Chr1:41970211 0 41970211\n+Chr1 Chr1:41998459 0 41998459\n+Chr1 Chr1:42001355 0 42001355\n+Chr1 Chr1:42047135 0 42047135\n+Chr1 Chr1:42047157 0 42047157\n+Chr1 Chr1:42061862 0 42061862\n+Chr1 Chr1:42063149 0 42063149\n+Chr1 Chr1:42081350 0 42081350\n+Chr1 Chr1:42081405 0 42081405\n+Chr1 Chr1:42088625 0 42088625\n+Chr1 Chr1:42089983 0 42089983\n+Chr1 Chr1:42097607 0 42097607\n+Chr1 Chr1:42106144 0 42106144\n+Chr1 Chr1:42138453 0 42138453\n+Chr1 Chr1:42150639 0 42150639\n+Chr1 Chr1:42155417 0 42155417\n+Chr1 Chr1:42156013 0 42156013\n+Chr1 Chr1:42164107 0 42164107\n+Chr1 Chr1:42165734 0 42165734\n+Chr1 Chr1:42169797 0 42169797\n+Chr1 Chr1:42169819 0 42169819\n+Chr1 Chr1:42187340 0 42187340\n+Chr1 Chr1:42192288 0 42192288\n+Chr1 Chr1:42196386 0 42196386\n+Chr1 Chr1:42227135 0 42227135\n+Chr1 Chr1:42229251 0 42229251\n+Chr1 Chr1:42231379 0 42231379\n+Chr1 Chr1:42242069 0 42242069\n+Chr1 Chr1:42244104 0 42244104\n+Chr1 Chr1:42276315 0 42276315\n+Chr1 Chr1:42301791 0 42301791\n+Chr1 Chr1:42302347 0 42302347\n+Chr1 Chr1:42324275 0 42324275\n+Chr1 Chr1:42332621 0 42332621\n+Chr1 Chr1:42352040 0 42352040\n+Chr1 Chr1:42352085 0 42352085\n+Chr1 Chr1:42352093 0 42352093\n+Chr1 Chr1:42352397 0 42352397\n+Chr1 Chr1:42354188 0 42354188\n+Chr1 Chr1:42354191 0 42354191\n+Chr1 Chr1:42357534 0 42357534\n+Chr1 Chr1:42357591 0 42357591\n+Chr1 Chr1:42367404 0 42367404\n+Chr1 Chr1:42367442 0 42367442\n+Chr1 Chr1:42419046 0 42419046\n+Chr1 Chr1:42419769 0 42419769\n+Chr1 Chr1:42421769 0 42421769\n+Chr1 Chr1:42423110 0 42423110\n+Chr1 Chr1:42427556 0 42427556\n+Chr1 Chr1:42460597 0 42460597\n+Chr1 Chr1:42460615 0 42460615\n+Chr1 Chr1:42469025 0 42469025\n+Chr1 Chr1:42471750 0 42471750\n+Chr1 Chr1:42479925 0 42479925\n+Chr1 Chr1:42487487 0 42487487\n+Chr1 Chr1:42487514 0 42487514\n+Chr1 Chr1:42548763 0 42548763\n+Chr1 Chr1:42583428 0 42583428\n+Chr1 Chr1:42586452 0 42586452\n+Chr1 Chr1:42594255 0 42594255\n+Chr1 Chr1:42596478 0 42596478\n+Chr1 Chr1:42602822 0 42602822\n+Chr1 Chr1:42602885 0 42602885\n+Chr1 Chr1:42610308 0 42610308\n+Chr1 Chr1:42612885 0 42612885\n+Chr1 Chr1:42616231 0 42616231\n+Chr1 Chr1:42620187 0 42620187\n+Chr1 Chr1:42620190 0 42620190\n+Chr1 Chr1:42620878 0 42620878\n+Chr1 Chr1:42620881 0 42620881\n+Chr1 Chr1:42631741 0 42631741\n+Chr1 Chr1:42634538 0 42634538\n+Chr1 Chr1:42664013 0 42664013\n+Chr1 Chr1:42665624 0 42665624\n+Chr1 Chr1:42666354 0 42666354\n+Chr1 Chr1:42669650 0 42669650\n+Chr1 Chr1:42669653 0 42669653\n+Chr1 Chr1:42673304 0 42673304\n+Chr1 Chr1:42674707 0 42674707\n+Chr1 Chr1:42682058 0 42682058\n+Chr1 Chr1:42682061 0 42682061\n+Chr1 Chr1:42690508 0 42690508\n+Chr1 Chr1:42706257 0 42706257\n+Chr1 Chr1:42711112 0 42711112\n+Chr1 Chr1:42762398 0 42762398\n+Chr1 Chr1:42763941 0 42763941\n+Chr1 Chr1:42763944 0 42763944\n+Chr1 Chr1:42766241 0 42766241\n+Chr1 Chr1:42777810 0 42777810\n+Chr1 Chr1:42780446 0 42780446\n+Chr1 Chr1:42825046 0 42825046\n+Chr1 Chr1:42826857 0 42826857\n+Chr1 Chr1:42880271 0 42880271\n+Chr1 Chr1:42916070 0 42916070\n+Chr1 Chr1:42916090 0 42916090\n+Chr1 Chr1:42920553 0 42920553\n+Chr1 Chr1:42928342 0 42928342\n+Chr1 Chr1:42959997 0 42959997\n+Chr1 Chr1:42968423 0 42968423\n+Chr1 Chr1:43046967 0 43046967\n+Chr1 Chr1:43048104 0 43048104\n+Chr1 Chr1:43065469 0 43065469\n+Chr1 Chr1:43068624 0 43068624\n+Chr1 Chr1:43072051 0 43072051\n+Chr1 Chr1:43073361 0 43073361\n+Chr1 Chr1:43079457 0 43079457\n+Chr1 Chr1:43079480 0 43079480\n+Chr1 Chr1:43093204 0 43093204\n+Chr1 Chr1:43100601 0 43100601\n+Chr1 Chr1:43123958 0 43123958\n+Chr1 Chr1:43132577 0 43132577\n+Chr1 Chr1:43141118 0 43141118\n+Chr1 Chr1:43141179 0 43141179\n+Chr1 Chr1:43154143 0 43154143\n+Chr1 Chr1:43158899 0 43158899\n+Chr1 Chr1:43179527 0 43179527\n+Chr1 Chr1:43214669 0 43214669\n+Chr1 Chr1:43229591 0 43229591\n+Chr1 Chr1:43249859 0 43249859\n+Chr1 Chr1:43269458 0 43269458\n'
b
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/test-data/input.ped
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/MDSplot/test-data/input.ped Mon Mar 23 05:57:27 2015 -0400
b
b'@@ -0,0 +1,93 @@\n+IRAT112\t1\t0\t0\t1\t1\t1 1\t4 4\t2 2\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t3 3\t4 4\t2 2\t1 1\t3 3\t1 1\t1 1\t4 4\t1 1\t1 1\t2 2\t1 1\t4 4\t1 1\t1 1\t4 4\t2 2\t4 4\t1 1\t1 1\t4 4\t3 3\t1 1\t1 1\t1 1\t1 1\t4 4\t2 2\t1 1\t4 4\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t3 3\t4 4\t4 4\t2 2\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t2 2\t4 4\t2 2\t1 1\t1 1\t3 3\t3 3\t4 4\t4 4\t3 3\t3 3\t2 2\t1 1\t2 2\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t4 4\t3 3\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t3 3\t4 4\t4 4\t3 3\t4 4\t1 1\t3 3\t3 3\t3 3\t1 1\t2 2\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t3 3\t2 2\t4 4\t4 4\t1 1\t3 3\t2 2\t3 3\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t3 3\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t1 1\t2 2\t4 4\t4 4\t1 1\t4 4\t2 2\t3 3\t3 3\t1 1\t3 3\t4 4\t1 1\t3 3\t4 4\t1 1\t4 4\t2 2\t2 2\t1 1\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t2 2\t2 2\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t3 3\t3 3\t1 1\t4 4\t1 1\t2 2\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t3 3\t4 4\t2 2\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t2 2\t1 1\t2 2\t4 4\t2 2\t3 3\t4 4\t1 1\t4 4\t2 2\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t3 3\t4 4\t4 4\t1 1\t2 2\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t4 4\t3 3\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t2 2\t4 4\t1 1\t3 3\t2 2\t2 2\t3 3\t3 3\t1 1\t2 2\t1 1\t3 3\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t2 2\t2 2\t1 1\t4 4\t4 4\t1 1\t2 2\t4 4\t1 1\t4 4\t1 1\t2 2\t1 1\t2 2\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t3 3\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t3 3\t1 1\t4 4\t3 3\t2 2\t1 1\t4 4\t4 4\t3 3\t1 1\t2 2\t2 2\t1 1\t3 3\t4 4\t1 1\t1 1\t3 3\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t2 2\t4 4\t4 4\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t2 2\t4 4\t2 2\t2 2\t3 3\t4 4\t3 3\t2 2\t1 1\t1 1\t2 2\t1 1\t4 4\t3 3\t2 2\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t2 2\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t3 3\t1 1\t2 2\t1 1\t1 1\t2 2\t1 1\t1 1\t2 2\t1 1\t2 2\t3 3\t4 4\t1 1\t2 2\t1 1\t2 2\t4 4\t4 4\t1 1\t1 1\t4 4\t3 3\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t3 3\t4 4\t3 3\t4 4\t3 3\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t3 3\t4 4\t1 1\t4 4\t3 3\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t3 3\t4 4\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t2 2\t4 4\t2 2\t1 1\t3 3\t1 1\t4 4\t1 1\t2 2\t1 1\t3 3\t2 2\t4 4\t4 4\t1 1\t4 4\t3 3\t4 4\t4 4\t4 4\t1 1\t3 3\t4 4\t4 4\t4 4\t3 3\t4 4\t4 4\t1 1\t2 2\t1 1\t4 4\t4 4\t2 2\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t3 3\t2 2\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t3 3\t2 2\t1 1\t3 3\t4 4\t2 2\t2 2\t4 4\t2 2\t2 2\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t3 3\t4 4\t1 1\t1 1\t1 1\t3 3\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t3 3\t1 1\t4 4\t3 3\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t2 2\t1 1\t4 4\t3 3\t4 4\t1 1\t3 3\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t3 3\t1 1\t4 4\t1 1\t2 2\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t2 2\t4 4\t1 1\t3 3\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t2 2\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t3 3\t1 1\t1 1\t1 1\t4 4\t1 1\t3 3\t2 2\t2 2\t2 2\t3 3\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t3 3\t4 4\t3 3\t2 2\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t3 3\t4 4\t4 4\t3 3\t4 4\t3 3\t3 3\t3 3\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t4 4\t1 1\t4 4\t3 3\t3 3\t3 3\t4 4\t3 3\t4 4\t4 4\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t3 3\t1 1\t3 3\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t4 4\t4 4\t2 2\t1 1\t4 4\t3 3\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t1 1\t3 3\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t2 2\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t4 4\t4 4\t4 4\t1 1\t3 3\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t3 3\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t3 3\t1 1\t1 1\t4 4\t3 3\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t2 2\t1 1\t4 4\t4 4\t4 4\t3 3\t1 1\t2 2\t1 1\t1 1\t4 4\t4 4\t2 2\t1 1\t1 1\t2 2\t4 4\t1 1\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t3 3\t3 3\t2 2\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t3 3\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t2 2\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t2 2\t4 4\t1 1\t3 3\t3 3\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t2 2\t3 3\t1 1\t3 3\t1 1\t4 4\t4 4\t2 2\t4 4\t4 4\t1 1\t3 3\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t3 3'..b' 1\t4 4\t1 1\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t3 3\t1 1\t3 3\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t3 3\t2 2\t4 4\t1 1\t4 4\t1 1\t4 4\t2 2\t3 3\t4 4\t1 1\t4 4\t1 1\t4 4\t2 2\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t4 4\t2 2\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t4 4\t3 3\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t3 3\t2 2\t4 4\t4 4\t4 4\t4 4\t1 1\t4 4\t2 2\t1 1\t4 4\t1 1\t2 2\t4 4\t1 1\t3 3\t1 1\t1 1\t3 3\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t3 3\t3 3\t2 2\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t4 4\t4 4\t1 1\t4 4\t4 4\t3 3\t1 1\t3 3\t4 4\t3 3\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t2 2\t4 4\t2 2\t2 2\t1 1\t4 4\t4 4\t2 2\t4 4\t4 4\t4 4\t3 3\t1 1\t3 3\t3 3\t3 3\t4 4\t4 4\t2 2\t1 1\t2 2\t4 4\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t1 1\t2 2\t1 1\t3 3\t1 1\t4 4\t1 1\t4 4\t1 1\t2 2\t3 3\t4 4\t1 1\t3 3\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t2 2\t1 1\t3 3\t3 3\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t2 2\t4 4\t2 2\t4 4\t1 1\t2 2\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t3 3\t1 1\t4 4\t4 4\t3 3\t3 3\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t3 3\t3 3\t2 2\t1 1\t4 4\t3 3\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t2 2\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t3 3\t4 4\t4 4\t1 1\t3 3\t2 2\t2 2\t1 1\t1 1\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t2 2\t1 1\t3 3\t1 1\t1 1\t4 4\t4 4\t2 2\t4 4\t3 3\t3 3\t1 1\t2 2\t2 2\t4 4\t2 2\t3 3\t1 1\t1 1\t3 3\t3 3\t1 1\t4 4\t1 1\t1 1\t3 3\t4 4\t2 2\t4 4\t4 4\t3 3\t2 2\t3 3\t3 3\t4 4\t4 4\t2 2\t2 2\t4 4\t2 2\t4 4\t4 4\t4 4\t2 2\t4 4\t4 4\t1 1\t4 4\t4 4\t3 3\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t3 3\t1 1\t1 1\t3 3\t1 1\t4 4\t3 3\t3 3\t1 1\t4 4\t1 1\t3 3\t3 3\t3 3\t1 1\t4 4\t4 4\t1 1\t4 4\t3 3\t4 4\t4 4\t3 3\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t3 3\t4 4\t4 4\t1 1\t3 3\t4 4\t1 1\t1 1\t2 2\t4 4\t3 3\t2 2\t3 3\t1 1\t1 1\t4 4\t4 4\t4 4\t3 3\t2 2\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t2 2\t1 1\t1 1\t4 4\t1 1\t3 3\t1 1\t3 3\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t4 4\t4 4\t1 1\t4 4\t3 3\t1 1\t3 3\t3 3\t1 1\t3 3\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t4 4\t4 4\t2 2\t4 4\t2 2\t4 4\t2 2\t1 1\t3 3\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t4 4\t4 4\t2 2\t1 1\t2 2\t4 4\t1 1\t1 1\t1 1\t3 3\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t2 2\t3 3\t4 4\t1 1\t4 4\t3 3\t3 3\t3 3\t1 1\t4 4\t3 3\t3 3\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t2 2\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t2 2\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t2 2\t4 4\t4 4\t3 3\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t2 2\t1 1\t4 4\t4 4\t4 4\t1 1\t2 2\t4 4\t1 1\t1 1\t3 3\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t2 2\t4 4\t4 4\t3 3\t1 1\t1 1\t4 4\t2 2\t1 1\t3 3\t1 1\t1 1\t4 4\t4 4\t3 3\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t2 2\t3 3\t4 4\t2 2\t4 4\t4 4\t2 2\t1 1\t2 2\t1 1\t2 2\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t3 3\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t3 3\t4 4\t2 2\t1 1\t2 2\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t3 3\t1 1\t1 1\t4 4\t4 4\t1 1\t2 2\t4 4\t1 1\t1 1\t2 2\t1 1\t4 4\t4 4\t4 4\t4 4\t3 3\t3 3\t4 4\t4 4\t2 2\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t3 3\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t3 3\t4 4\t3 3\t4 4\t4 4\t1 1\t3 3\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t4 4\t2 2\t4 4\t3 3\t2 2\t4 4\t4 4\t4 4\t4 4\t4 4\t4 4\t4 4\t1 1\t4 4\t3 3\t1 1\t4 4\t1 1\t2 2\t3 3\t2 2\t4 4\t1 1\t1 1\t4 4\t3 3\t2 2\t4 4\t2 2\t1 1\t3 3\t4 4\t1 1\t1 1\t1 1\t3 3\t1 1\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t3 3\t4 4\t2 2\t4 4\t1 1\t1 1\t4 4\t2 2\t4 4\t4 4\t4 4\t3 3\t1 1\t1 1\t3 3\t1 1\t2 2\t1 1\t3 3\t2 2\t1 1\t2 2\t1 1\t1 1\t1 1\t2 2\t1 1\t4 4\t3 3\t1 1\t3 3\t1 1\t1 1\t3 3\t4 4\t1 1\t3 3\t4 4\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t2 2\t3 3\t3 3\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t2 2\t1 1\t4 4\t2 2\t2 2\t4 4\t4 4\t1 1\t2 2\t4 4\t3 3\t4 4\t4 4\t4 4\t1 1\t3 3\t4 4\t2 2\t1 1\t4 4\t3 3\t1 1\t4 4\t2 2\t1 1\t4 4\t3 3\t1 1\t1 1\t4 4\t2 2\t4 4\t4 4\t4 4\t1 1\t4 4\t3 3\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t1 1\t1 1\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t3 3\t1 1\t1 1\t4 4\t3 3\t1 1\t4 4\t2 2\t4 4\t1 1\t2 2\t1 1\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t2 2\t4 4\t3 3\t1 1\t2 2\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t1 1\t3 3\t1 1\t2 2\t3 3\t2 2\t3 3\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t3 3\t4 4\t2 2\t1 1\t1 1\t4 4\t4 4\t4 4\t2 2\t4 4\t4 4\t4 4\t4 4\t2 2\t1 1\t3 3\t4 4\t4 4\t4 4\t1 1\t1 1\t3 3\t1 1\t3 3\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t3 3\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t3 3\t3 3\t1 1\t1 1\t2 2\t4 4\t1 1\t1 1\t2 2\t4 4\t3 3\t2 2\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\n'
b
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolFilter/VCFToolsFilter.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolFilter/VCFToolsFilter.pl Mon Mar 23 05:57:27 2015 -0400
[
@@ -0,0 +1,214 @@
+
+#!/usr/bin/perl
+
+use strict;
+use Switch;
+use Getopt::Long;
+use Bio::SeqIO;
+
+my $usage = qq~Usage:$0 <args> [<opts>]
+
+where <args> are:
+
+    -i, --input          <VCF input>
+    -o, --out            <Output basename>
+      
+      <opts> are:
+
+    -s, --samples        <Samples to be analyzed. Comma separated list>
+    -c, --chromosomes    <Chromosomes to be analyzed. Comma separated list>
+    -e, --export         <Output format (VCF/freq/plink. Default: VCF>
+    -f, --frequency      <Minimum MAF. Default: 0.001>
+    -m, --max_freq       <Maximum MAF. Default: 0.5>
+    -a, --allow_missing  <Allowed missing data proportion per site. Must be comprised between 0 and 1. Default: 0>
+    -n, --nb_alleles     <Accepted number of alleles (min,max). Default: 2,4>
+    -t, --type           <Type of polymorphisms to keep (ALL/SNP/INDEL). Default: ALL>
+    -b, --bounds         <Lower bound and upper bound for a range of sites to be processed (start,end). Default: 1, 100000000>
+~;
+$usage .= "\n";
+
+my ($input,$out);
+
+
+#my $indel_size_max = 500;
+#my $indel_size_min = 1;
+my $frequency_max = 0.5;
+my $frequency_min = 0.001;
+my $pos_max = 100000000000;
+my $pos_min = 0;
+my $filter_snp_type = "all";
+
+my $missing_data = 0;
+my $export = "VCF";
+my $type = "ALL";
+my $nb_alleles;
+my $bounds;
+my $samples;
+my $chromosomes;
+
+GetOptions(
+ "input=s"        => \$input,
+ "out=s"          => \$out,
+ "samples=s"      => \$samples,
+ "chromosomes=s"  => \$chromosomes,
+ "frequency=s"    => \$frequency_min,
+ "max_freq=s"     => \$frequency_max,
+ "allow_missing=s"=> \$missing_data,
+ "export=s"       => \$export,
+ "type=s"         => \$type,
+ "nb_alleles=s"   => \$nb_alleles,
+ "bounds=s"       => \$bounds,
+);
+
+
+die $usage
+  if ( !$input || !$out);
+
+if ($samples && $samples =~/^([\w\,]+)\s*$/){
+        $samples = $1;
+}
+elsif ($samples){
+        die "Error: Samples must be a comma separated list of string\n";
+}
+if ($chromosomes && $chromosomes =~/^([\w\,]+)\s*$/){
+        $chromosomes = $1;
+}
+elsif($chromosomes){
+        die "Error: Chromosomes must be a comma separated list of string\n";
+}
+if ($bounds && $bounds =~/^([\d\,]+)\s*$/){
+        $bounds = $1;
+}
+elsif($bounds){
+        die "Error: Bounds must be a comma separated list of integers\n";
+}
+
+if ($frequency_min && $frequency_min =~/^([\d\.]+)\s*$/){
+        $frequency_min = $1;
+}
+elsif ($frequency_min){
+        die "Error: frequency must be an integer\n";
+}
+if ($frequency_max && $frequency_max =~/^([\d\.]+)\s*$/){
+        $frequency_max = $1;
+}
+elsif($frequency_max){
+        die "Error: frequency must be an integer\n";
+}
+if ($missing_data && $missing_data =~/^([\d\.]+)\s*$/){
+        $missing_data = $1;
+}
+elsif ($missing_data){
+        die "Error: Missing data must be an integer\n";
+}
+if ($nb_alleles && $nb_alleles =~/^([\d\.\,]+)\s*$/){
+        $nb_alleles = $1;
+}
+elsif($nb_alleles){
+        die "Error: Nb alleles must be two integers\n";
+}
+if ($export && $export =~/^([\w]+)\s*$/){
+        $export = $1;
+}
+elsif($export){
+        die "Error: Export must be a string\n";
+}
+if ($type && $type =~/^([\w]+)\s*$/){
+        $type = $1;
+}
+elsif($type){
+        die "Error: Type must be a string\n";
+}
+
+
+my @dnasamples;
+if ($samples)
+{
+ @dnasamples = split(",",$samples);
+}
+my @nalleles;
+if ($nb_alleles)
+{
+ @nalleles = split(",",$nb_alleles);
+}
+my @boundaries;
+if ($bounds)
+{
+ @boundaries = split(",",$bounds);
+}
+my @chromosomes_list;
+if ($chromosomes)
+{
+ @chromosomes_list = split(",",$chromosomes);
+}
+
+
+my $experiment = "chromosomes";
+my $table = "";
+my %genes;
+my @snp_ids;
+my @snp_ids_and_positions;
+my @snp_ids_and_positions_all;
+my $gene;
+my $snp_num = 0;
+my %ref_sequences;
+my %snps_of_gene;
+
+
+
+
+my $indiv_cmd = "";
+if (@dnasamples)
+{
+ $indiv_cmd = "--indv " . join(" --indv ",@dnasamples);
+}
+
+my $chrom_cmd = "";
+if (@chromosomes_list)
+{
+ $chrom_cmd = "--chr " . join(" --chr ",@chromosomes_list);
+}
+
+my $export_cmd = "--recode";
+if ($export eq "freq")
+{
+ $export_cmd = "--freq";
+}
+if ($export eq "plink")
+{
+ $export_cmd = "--plink";
+}

+
+
+my $nb_alleles_cmd = "--min-alleles 1 --max-alleles 4";
+if (@nalleles)
+{
+ $nb_alleles_cmd = "--min-alleles $nalleles[0] --max-alleles $nalleles[1]";
+}
+my $bounds_cmd = "--from-bp 1 --to-bp 100000000";
+if (@boundaries)
+{
+        $bounds_cmd = "--from-bp $boundaries[0] --to-bp $boundaries[1]";
+}
+

+my $type_cmd = "";
+if ($type eq "INDEL")
+{
+ $type_cmd = "--keep-only-indels";
+}
+if ($type eq "SNP")
+{
+ $type_cmd = "--remove-indels";
+}
+
+
+system("vcftools --vcf $input --out $out --keep-INFO-all --remove-filtered-all $type_cmd $export_cmd $chrom_cmd $indiv_cmd $nb_alleles_cmd --maf $frequency_min --max-maf $frequency_max --max-missing $missing_data >>vcftools.log 2>&1");
+
+
+
+
+
+
+
b
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolFilter/test-data/result.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolFilter/test-data/result.log Mon Mar 23 05:57:27 2015 -0400
b
@@ -0,0 +1,21 @@
+
+VCFtools - v0.1.12b
+(C) Adam Auton and Anthony Marcketta 2009
+
+Parameters as interpreted:
+ --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat
+ --chr chr1
+ --recode-INFO-all
+ --maf 0.001
+ --max-alleles 4
+ --max-maf 0.5
+ --min-alleles 2
+ --max-missing 1
+ --out filtered
+ --recode
+ --remove-filtered-all
+
+After filtering, kept 1 out of 1 Individuals
+Outputting VCF file...
+After filtering, kept 3616 out of a possible 4955 Sites
+Run Time = 0.00 seconds
b
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolFilter/test-data/result.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolFilter/test-data/result.vcf Mon Mar 23 05:57:27 2015 -0400
[
b'@@ -0,0 +1,3661 @@\n+##fileformat=VCFv4.1\n+##FILTER=<ID=LowQual,Description="Low quality">\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AF,Number=A,Type'..b'0012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:30,25:55:99:802,0,993\n+chr1\t188173\t.\tG\tA\t697.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.617;DP=42;Dels=0.00;FS=2.786;HaplotypeScore=1.9991;MLEAC=1;MLEAF=0.500;MQ=59.09;MQ0=0;MQRankSum=-0.013;QD=16.61;ReadPosRankSum=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n+chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n+chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n+chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n+chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n+chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n+chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n+chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n+chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n+chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n'
b
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolFilter/test-data/sample.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolFilter/test-data/sample.vcf Mon Mar 23 05:57:27 2015 -0400
[
b'@@ -0,0 +1,5000 @@\n+##fileformat=VCFv4.1\n+##FILTER=<ID=LowQual,Description="Low quality">\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AF,Number=A,Type'..b'm=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n+chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n+chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n+chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n+chr1\t188357\t.\tC\tT\t1327.77\t.\tAC=2;AF=1.00;AN=2;DP=36;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=24.46;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t1/1:0,36:36:99:1356,105,0\n+chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n+chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n+chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n+chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n+chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n+chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n'
b
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolFilter/vcfToolsFilter.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolFilter/vcfToolsFilter.sh Mon Mar 23 05:57:27 2015 -0400
[
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+tool_path=$(dirname $0)
+
+filein=$1
+fileout_label=$2
+fileout=$3
+filelog=$4
+export=$5
+frequency=$6
+max_freq=$7
+allow_missing=$8
+nb_alleles_min=$9
+nb_alleles_max=${10}
+type=${11}
+bound_start=${12}
+bound_end=${13}
+
+
+if [ "${14}" != "None" ]
+then samples="--samples ${14}"
+fi
+
+if [ "${15}" != "None" ]
+then chromosomes="--chromosomes ${15}"
+fi
+
+if [ "$bound_start" -gt "$bound_end" ]
+then tmp=$bound_start ; bound_start=$bound_end ; bound_end=$tmp ; echo "Warning : Lower bound must be lower than greater bound!" >&2
+fi
+
+if [ "$nb_alleles_min" -gt "$nb_alleles_max" ]
+then tmp=$nb_alleles_min ; nb_alleles_min=$nb_alleles_max ; nb_alleles_max=$tmp ; echo "Warning : Minimum number of alleles must be lower than maximum number of allele!" >&2
+fi
+
+perl $tool_path/VCFToolsFilter.pl --input $filein --out $fileout_label --export $export --frequency $frequency --max_freq $max_freq --allow_missing $allow_missing --nb_alleles $nb_alleles_min','$nb_alleles_max --type $type --bounds $bound_start','$bound_end $samples $chromosomes
+
+if [ "$export" = "VCF" ]
+then cp  $fileout_label.recode.vcf $fileout ; rm $fileout_label.recode.vcf
+elif [ "$export" = "freq" ]
+then cp  $fileout_label.frq $fileout ; rm $fileout_label.frq
+else cp  $fileout_label.ped $fileout; cp $fileout_label.map ${16} ; rm $fileout_label.ped $fileout_label.map
+fi
+
+cp vcftools.log $filelog
+rm vcftools.log
b
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolFilter/vcfToolsFilter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolFilter/vcfToolsFilter.xml Mon Mar 23 05:57:27 2015 -0400
[
b'@@ -0,0 +1,268 @@\n+<tool id="sniplay_vcftoolsfilter" name="VCF tools filter" version="1.1.1">\n+    \n+    <!-- [REQUIRED] Tool description displayed after the tool name -->\n+    <description> Filter VCF using VCFtools</description>\n+    \n+    <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->\n+    <requirements>\n+        <requirement type="binary">perl</requirement>\n+\t<requirement type="package" version="0.1.12b">VCFtools</requirement>\n+    </requirements>\n+    \n+    <!-- [OPTIONAL] Command to be executed to get the tool\'s version string -->\n+    <version_command>\n+<!--\n+        tool_binary -v\n+-->\n+    </version_command>\n+    \n+    <!-- [REQUIRED] The command to execute -->\n+    <command interpreter="perl">\n+\tvcfToolsFilter.sh $filein $fileout_label $fileout $filelog $export $frequency $max_freq $allow_missing $nb_alleles_min $nb_alleles_max $type_p $bound_start $bound_end\n+\t#if str( $samples ) == "":\n+\t\'None\'\n+\t#else\n+\t$samples\n+\t#end if\n+\t#if str( $chromosomes ) == "":\n+\t\'None\'\n+\t#else\n+\t$chromosomes\n+\t#end if\n+\t#if str( $export ) == "plink":\n+\t$fileout_map\n+\t#else\n+\t\'\'\n+\t#end if\n+    </command>\n+     \n+    <!-- [REQUIRED] Input files and tool parameters -->\n+    <inputs>\n+\t<param name="filein" type="data" format="vcf" optional="false" label="VCF input" />\n+\t<param name="fileout_label" type="text" value="filtered" optional="false" label="Output file basename"/>\n+\t<param name="samples" type="text" optional="true" label="Samples" help="Samples to be analyzed. Comma separated list">\n+\t\t<validator type="regex" message="Please enter a comma separated list.">^\\w+(,\\w+)*$</validator>\n+\t</param>\n+\t<param name="chromosomes" type="text" optional="true" label="Chromosomes" help="Chromosomes to be analyzed. Comma separated list">\n+\t        <validator type="regex" message="Please enter a comma separated list.">^\\w+(,\\w+)*$</validator>\n+        </param>\n+\t<param name="export" type="select" label="Output format" >\n+\t    <option value="VCF" selected="true">VCF</option>\n+\t    <option value="freq">freq</option>\n+            <option value="plink">plink</option>\n+        </param>\n+\t<param name="frequency" type="float" value="0.001" label="Minimum MAF." help="Minimum frequency." />\n+\t<param name="max_freq" type="float" value="0.5" label="Maximum MAF." help="Maximum frequency." />\n+\t<param name="allow_missing" type="float" value="1" min="0" max="1" label="Missing data proportion" help="Allowed missing data proportion per site. Must be comprised between 0 and 1." />\n+\t<param name="nb_alleles_min" type="integer" value="2" label="Minimum number of alleles" help="Minimum accepted number of alleles." min="2" max="4" />\n+\t<param name="nb_alleles_max" type="integer" value="2" label="Maximum number of alleles" help="Maximum accepted number of alleles." min="2" max="4" />\n+        <param name="type_p" type="select" label="Polymorphisms" help="Type of polymorphisms to keep." >\n+            <option value="ALL" selected="true">All</option>\n+            <option value="SNP">SNP</option>\n+            <option value="INDEL">Indel</option>\n+        </param>\n+\t<param name="bound_start" type="integer" value="1" label="Lower bound" help="Lower bound for a range of sites to be processed." />\n+\t<param name="bound_end" type="integer" value="100000000" label="Upper bound" help="Upper bound for a range of sites to be processed." />\n+    </inputs>\n+    \n+    <!-- [REQUIRED] Output files -->\n+    <outputs>\n+\t<data name="fileout" format="vcf" label="${fileout_label}.#if str($export)==\'plink\' then \'ped\' else \'\' # #if str($export)==\'freq\' then \'frq\' else \'\' # #if str($export)==\'VCF\' then \'vcf\' else \'\' #" >\n+\t\t<change_format>\n+                \t<when input="export" value="freq" format="tabular" />\n+\t\t\t<when input="export" value="plink" format="txt" />\n+\t\t</change_format>\t\n+\t</data>\n+\t<data name="fileout_map" format="txt" label="${fileout_label}.map">\n+\t\t<filter>(export == \'plink\')</filter>\n+\t</data>\n+\t<data name="filelog" format="txt" label='..b'alue="0.001" />\n+         <param name="max_freq" value="0.5" />\n+         <param name="allow_missing" value="0" />\n+         <param name="nb_alleles_min" value="2" />\n+\t<param name="nb_alleles_max" value="4" />\n+         <param name="type_p" value="ALL" />\n+         <param name="bound_start" value="1" />\n+         <param name="bound_end" value="100000000" />\n+         <output name="fileout" file="result.vcf" />\n+         <output name="filelog" file="result.log" />\n+        </test>\n+    </tests>\n+    \n+    <!-- [OPTIONAL] Help displayed in Galaxy -->\n+    <help>\n+\n+.. class:: infomark\n+\n+**Authors** \n+\n+---------------------------------------------------\n+\n+.. class:: infomark\n+\n+**Please cite** If you use this tool, please cite Dereeper et al. 2015 in prep.\n+\n+---------------------------------------------------\n+\n+================\n+VCF tools filter\n+================\n+\n+-----------\n+Description\n+-----------\n+\n+  Filter VCF file \n+\n+-----------------\n+Workflow position\n+-----------------\n+\n+**Upstream tools**\n+\n+=========== ========================== =======\n+Name            output file(s)         format \n+=========== ========================== =======\n+=========== ========================== =======\n+\n+\n+**Downstream tools**\n+\n+=========== ========================== =======\n+Name            output file(s)         format\n+=========== ========================== =======\n+=========== ========================== =======\n+\n+\n+----------\n+Input file\n+----------\n+\n+VCF file\n+\tVCF file with all SNPs\n+\n+----------\n+Parameters\n+----------\n+\n+Output file basename\n+\tPrefix for the output VCF file\n+\n+Samples\n+        Samples to be analyzed. Comma separated list\n+\n+Chromosomes\n+\tChromosomes to be analyzed. Comma separated list\n+\n+Output format\n+\tVCF/freq/plink\n+\n+Minimum MAF\n+\tMinimum frequency\n+\n+Maximum MAF\n+\tMaximum frequency\n+\n+Missing data proportion\n+\tAllowed missing data proportion per site. Must be comprised between 0 and 1.\n+\n+Number of alleles\n+\tAccepted number of alleles min and max.\n+\n+Polymorphisms\n+\tType of polymorphisms to keep (ALL/SNP/INDEL).\n+Bounds\n+\tLower bound and upper bound for a range of sites to be processed.\n+\n+------------\n+Output files\n+------------\n+\n+VCF file\n+\tVCF file filtered \n+\n+Log file\n+\n+---------------------------------------------------\n+\n+---------------\n+Working example\n+---------------\n+\n+Input files\n+===========\n+\n+VCF file\n+---------\n+\n+::\n+\n+\t#fileformat=VCFv4.1\n+\t#FILTER=&lt;ID=LowQual,Description="Low quality">\n+\t#FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+\t[...]\n+\tCHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tCATB1\n+\tchr1\t2209\t.\tG\tT\t213.84\t.\tAC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)\tGT:AD:DP:GQ:PL\t1/1:0,7:7:18:242,18,0\n+\n+\n+Parameters\n+==========\n+\n+Output name -> filtered_chr1\n+\n+Chromosomes -> chr1\n+\n+Output format -> VCF\n+\n+Minimum MAF -> 0.001\n+\n+Maximum MAF -> 0.5\n+\n+Missing data proportion -> 1\n+\n+Number of alleles min -> 2\n+\n+Number of alleles max -> 4\n+\n+Polymorphisms -> All\n+\n+Lower bound -> 1\n+\n+Upper bound -> 100000000\n+\n+\n+Output files\n+============\n+\n+filtered_genelist_intron.vcf\n+---------\n+\n+::\n+\n+        #fileformat=VCFv4.1\n+        #FILTER=&lt;ID=LowQual,Description="Low quality"&gt;\n+        #FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+        [...]\n+        CHROM   POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  CATB1\n+\tchr1\t5059\t.\tC\tG\t146.84\t.\tAC=2;AF=1.00;AN=2;DP=8;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=24.14;MQ0=1;QD=18.35;EFF=INTRON(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)\tGT:AD:DP:GQ:PL\t1/1:0,8:8:18:175,18,0\n+\n+\n+    </help>\n+    \n+</tool>\n'
b
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/VCFToolsStats.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolsStats/VCFToolsStats.pl Mon Mar 23 05:57:27 2015 -0400
[
@@ -0,0 +1,71 @@
+
+#!/usr/bin/perl
+
+use strict;
+use Switch;
+use Getopt::Long;
+use Bio::SeqIO;
+
+my $usage = qq~Usage:$0 <args> [<opts>]
+
+where <args> are:
+
+    -i, --input          <VCF input>
+    -o, --out            <output basename>
+~;
+$usage .= "\n";
+
+my ($input,$out);
+
+GetOptions(
+ "input=s"        => \$input,
+ "out=s"          => \$out
+);
+
+
+die $usage
+  if ( !$input);
+
+
+
+my $nb_gene = `grep -c mRNA $input`;
+$nb_gene =~s/\n//g;
+my $nb_intergenic = `grep -c INTERGENIC $input`;
+$nb_intergenic =~s/\n//g;
+
+my $nb_intron = `grep -c INTRON $input`;
+$nb_intron =~s/\n//g;
+my $nb_UTR = `grep -c UTR $input`;
+$nb_UTR =~s/\n//g;
+my $nb_exon = $nb_gene - $nb_intron - $nb_UTR;
+
+my $nb_ns = `grep -c NON_SYNONYMOUS_CODING $input`;
+$nb_ns =~s/\n//g;
+my $nb_s = $nb_exon - $nb_ns;
+
+
+
+
+#system("$VCFTOOLS_EXE --vcf $input --remove-filtered-all --out $out --hardy >>vcftools.log 2>&1");
+system("vcftools --vcf $input --remove-filtered-all --out $out --het >>vcftools.log 2>&1");
+system("vcftools --vcf $input --remove-filtered-all --out $out --TsTv-summary >>vcftools.log 2>&1");
+system("vcftools --vcf $input --remove-filtered-all --out $out --missing-indv >>vcftools.log 2>&1");
+
+open(my $G,">$out.annotation");
+print $G "Genic $nb_gene\n";
+print $G "Intergenic $nb_intergenic\n";
+print $G "========\n";
+print $G "Intron $nb_intron\n";
+print $G "Exon $nb_exon\n";
+print $G "UTR $nb_UTR\n";
+print $G "========\n";
+print $G "Non-syn $nb_ns\n";
+print $G "Synonym $nb_s\n";
+close($G);
+
+
+
+
+
+
+
b
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/test-data/result.TsTv.summary
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolsStats/test-data/result.TsTv.summary Mon Mar 23 05:57:27 2015 -0400
b
@@ -0,0 +1,9 @@
+MODEL COUNT
+AC 371
+AG 1467
+AT 562
+CG 330
+CT 1659
+GT 397
+Ts 3126
+Tv 1660
b
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/test-data/result.annotation
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolsStats/test-data/result.annotation Mon Mar 23 05:57:27 2015 -0400
b
@@ -0,0 +1,9 @@
+Genic 4489
+Intergenic 466
+========
+Intron 960
+Exon 3248
+UTR 281
+========
+Non-syn 226
+Synonym 3022
b
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/test-data/result.het
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolsStats/test-data/result.het Mon Mar 23 05:57:27 2015 -0400
b
@@ -0,0 +1,2 @@
+INDV O(HOM) E(HOM) N_SITES F
+CATB1 0 0.0 3616 0.00000
b
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/test-data/result.imiss
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolsStats/test-data/result.imiss Mon Mar 23 05:57:27 2015 -0400
b
@@ -0,0 +1,2 @@
+INDV N_DATA N_GENOTYPES_FILTERED N_MISS F_MISS
+CATB1 4813 0 0 0
b
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/test-data/result.log
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolsStats/test-data/result.log Mon Mar 23 05:57:27 2015 -0400
b
@@ -0,0 +1,44 @@
+
+VCFtools - v0.1.12b
+(C) Adam Auton and Anthony Marcketta 2009
+
+Parameters as interpreted:
+ --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat
+ --het
+ --out vcf_stats
+ --remove-filtered-all
+
+After filtering, kept 1 out of 1 Individuals
+Outputting Individual Heterozygosity
+ Individual Heterozygosity: Only using biallelic SNPs.
+After filtering, kept 4813 out of a possible 4955 Sites
+Run Time = 0.00 seconds
+
+VCFtools - v0.1.12b
+(C) Adam Auton and Anthony Marcketta 2009
+
+Parameters as interpreted:
+ --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat
+ --out vcf_stats
+ --TsTv-summary
+ --remove-filtered-all
+
+After filtering, kept 1 out of 1 Individuals
+Outputting Ts/Tv summary
+Ts/Tv ratio: 1.883
+After filtering, kept 4813 out of a possible 4955 Sites
+Run Time = 0.00 seconds
+
+VCFtools - v0.1.12b
+(C) Adam Auton and Anthony Marcketta 2009
+
+Parameters as interpreted:
+ --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat
+ --missing-indv
+ --out vcf_stats
+ --remove-filtered-all
+
+After filtering, kept 1 out of 1 Individuals
+Outputting Individual Missingness
+After filtering, kept 4813 out of a possible 4955 Sites
+Run Time = 0.00 seconds
b
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/test-data/sample.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolsStats/test-data/sample.vcf Mon Mar 23 05:57:27 2015 -0400
[
b'@@ -0,0 +1,5000 @@\n+##fileformat=VCFv4.1\n+##FILTER=<ID=LowQual,Description="Low quality">\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AF,Number=A,Type'..b'm=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n+chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n+chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n+chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n+chr1\t188357\t.\tC\tT\t1327.77\t.\tAC=2;AF=1.00;AN=2;DP=36;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=24.46;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t1/1:0,36:36:99:1356,105,0\n+chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n+chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n+chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n+chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n+chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n+chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n'
b
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/vcfToolsStats.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolsStats/vcfToolsStats.sh Mon Mar 23 05:57:27 2015 -0400
b
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+tool_path=$(dirname $0)
+
+filein=$1
+fileout_label=$2
+fileout_annot=$3
+fileout_het=$4
+fileout_imiss=$5
+fileout_sum=$6
+filelog=$7
+
+
+
+perl $tool_path/VCFToolsStats.pl --input $filein --out $fileout_label
+
+cp  $fileout_label.annotation $fileout_annot ; rm $fileout_label.annotation
+cp  $fileout_label.het $fileout_het ; rm $fileout_label.het
+cp  $fileout_label.imiss $fileout_imiss ; rm $fileout_label.imiss
+cp  $fileout_label.TsTv.summary $fileout_sum ; rm $fileout_label.TsTv.summary
+
+cp vcftools.log $filelog
+rm vcftools.log
b
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/vcfToolsStats.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolsStats/vcfToolsStats.xml Mon Mar 23 05:57:27 2015 -0400
[
@@ -0,0 +1,214 @@
+<tool id="sniplay_vcftoolsstats" name="VCF tools Stats" version="1.0.0">
+    
+    <!-- [REQUIRED] Tool description displayed after the tool name -->
+    <description> Various statistics from VCF using VCFtools</description>
+    
+    <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->
+    <requirements>
+        <requirement type="binary">perl</requirement>
+ <requirement type="package" version="0.1.13">VCFtools</requirement>
+    </requirements>
+    
+    <!-- [OPTIONAL] Command to be executed to get the tool's version string -->
+    <version_command>
+<!--
+        tool_binary -v
+-->
+    </version_command>
+    
+    <!-- [REQUIRED] The command to execute -->
+    <command interpreter="perl">
+ vcfToolsStats.sh $filein $fileout_label $fileout_annot $fileout_het $fileout_imiss $fileout_sum $filelog
+    </command>
+     
+    <!-- [REQUIRED] Input files and tool parameters -->
+    <inputs>
+ <param name="filein" type="data" format="vcf" optional="false" label="VCF input" />
+ <param name="fileout_label" type="text" value="vcf_stats" optional="false" label="Output file basename"/>
+    </inputs>
+    
+    <!-- [REQUIRED] Output files -->
+    <outputs>
+ <data name="fileout_annot" format="txt" label="${fileout_label}.annotation" />
+ <data name="fileout_het" format="txt" label="${fileout_label}.het" />
+ <data name="fileout_imiss" format="txt" label="${fileout_label}.imiss" />
+ <data name="fileout_sum" format="txt" label="${fileout_label}.TsTv.summary" />
+ <data name="filelog" format="txt" label="${fileout_label}.log" />
+    </outputs>
+    
+    <!-- [STRONGLY RECOMMANDED] Exit code rules -->
+    <stdio>
+        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+    
+    <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
+    <tests>
+        <!-- [HELP] Test files have to be in the ~/test-data directory -->
+        <test>
+         <param name="filein" value="sample.vcf" />
+         <output name="fileout_annot" file="result.annotation" />
+         <output name="fileout_het" file="result.het" />
+         <output name="fileout_imiss" file="result.imiss" />
+         <output name="fileout_sum" file="result.TsTv.summary" />
+         <output name="filelog" file="result.log" />
+        </test>
+    </tests>
+    
+    <!-- [OPTIONAL] Help displayed in Galaxy -->
+    <help>
+
+.. class:: infomark
+
+**Authors** 
+
+---------------------------------------------------
+
+.. class:: infomark
+
+**Please cite** If you use this tool, please cite Dereeper et al. 2015 in prep.
+
+---------------------------------------------------
+
+================
+VCF tools filter
+================
+
+-----------
+Description
+-----------
+
+  Compute statistics on VCF file 
+
+-----------------
+Workflow position
+-----------------
+
+**Upstream tools**
+
+=========== ========================== =======
+Name            output file(s)         format 
+=========== ========================== =======
+=========== ========================== =======
+
+
+**Downstream tools**
+
+=========== ========================== =======
+Name            output file(s)         format
+=========== ========================== =======
+=========== ========================== =======
+
+
+----------
+Input file
+----------
+
+VCF file
+ VCF file with all SNPs
+
+----------
+Parameters
+----------
+
+Output file basename
+ Prefix for the output VCF file
+
+------------
+Output files
+------------
+
+.annotation file
+ Statistics on annotation/location along genome
+
+.het file 
+ Statistics on heterozygosity of the individuals
+
+.imiss
+ Statistics on missing data of the inidividuals
+.TsTv.summary
+ Statistics on mutation types and transition/transvertion number
+
+.log file
+
+---------------------------------------------------
+
+---------------
+Working example
+---------------
+
+Input files
+===========
+
+VCF file
+---------
+
+::
+
+ #fileformat=VCFv4.1
+ #FILTER=&lt;ID=LowQual,Description="Low quality">
+ #FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+ [...]
+ CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1
+ chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0
+
+
+Parameters
+==========
+
+Output name -> vcf_stat
+
+
+Output files
+============
+
+.annotation file
+----------------
+
+::
+
+ Genic 4489
+ Intergenic 466
+ ========
+ Intron 960
+ Exon 3248
+ UTR 281
+ ========
+ Non-syn 226
+ Synonym 3022
+        
+.het file
+---------
+
+::
+
+ INDV O(HOM) E(HOM) N_SITES F
+ CATB1 0 0.0 3616 0.00000
+
+.imiss file
+-----------
+
+::
+
+ INDV N_DATA N_GENOTYPES_FILTERED N_MISS F_MISS
+ CATB1 4813 0 0 0
+
+.TsTv.summary file
+------------------
+
+::
+
+ MODEL COUNT
+ AC 371
+ AG 1467
+ AT 562
+ CG 330
+ CT 1659
+ GT 397
+ Ts 3126
+ Tv 1660
+
+
+    </help>
+    
+</tool>
b
diff -r 000000000000 -r 3e19d0dfcf3e admixture/Admixture.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/admixture/Admixture.pl Mon Mar 23 05:57:27 2015 -0400
[
@@ -0,0 +1,159 @@
+#!/usr/bin/perl
+
+use strict;
+use Switch;
+use Getopt::Long;
+use Bio::SeqIO;
+
+my $usage = qq~Usage:$0 <args> [<opts>]
+where <args> are:
+    -i, --input         <input HAPMAP>
+    -o, --output        <output>
+    -k, --kmin          <K min. int>
+    -m, --maxK          <K max. int>
+    -d, --directory     <temporary directory>
+    -p, --path          <path to executables>
+~;
+$usage .= "\n";
+
+my ($input,$output,$kmin,$kmax,$directory,$path);
+
+
+GetOptions(
+ "input=s"      => \$input,
+ "output=s"     => \$output,
+ "kmin=s"       => \$kmin,
+ "maxK=s"       => \$kmax,
+ "directory=s"  => \$directory,
+ "path=s"       => \$path
+);
+
+
+die $usage
+  if ( !$input || !$output || !$kmin || !$kmax || !$directory || !$path);
+
+if ($kmin =~/^(\d+)\s*$/){
+        $kmin = $1;
+}
+else{
+        die "Error: kmin must be an integer\n";
+}
+if ($kmax =~/^(\d+)\s*$/){
+        $kmax = $1;
+}
+else{
+        die "Error: kmax must be an integer\n";
+}
+
+  
+######################
+# create map file
+######################
+open(my $M,">$directory/input.map");
+open(my $H,$input);
+<$H>;
+while(<$H>)
+{
+ my @infos = split(/\t/,$_);
+ print $M $infos[2] . "\t" . $infos[0] . "\t" . "0" . "\t" . $infos[3] . "\n";
+}
+close($H);
+close($M);
+
+######################
+# create ped file
+######################
+system("$path/transpose.awk $input >$directory/input.ped.2");
+
+open(my $P,">$directory/input.ped");
+open(my $P2,"$directory/input.ped.2");
+my $n = 0;
+my $ind_num = 0;
+my @individus;
+while(<$P2>)
+{
+ $n++;
+ if ($n > 11)
+ {
+ my $line = $_;
+ $line =~s/N/0/g;
+ if (/^([^\s]+)\s+(.*)$/)
+ {
+ $ind_num++;
+ my $ind = $1;
+ push(@individus,$ind);
+ my $genoyping_line = $2;
+ print $P "$ind $ind_num 0 0 1 2";
+ my @genotypes = split(/\s/,$genoyping_line);
+ foreach my $genotype(@genotypes)
+ {
+ $genotype =~s/N/0/g;
+ my @alleles = split("",$genotype);
+ print $P " " . join(" ",@alleles);
+ }
+
+ print $P "\n";
+ }
+ }
+}
+close($P2);
+close($P);
+
+unlink("$directory/input.ped.2");
+
+system("plink --file $directory/input --out $directory/out --make-bed --noweb >>$directory/plink.log 2>&1");
+
+
+###################################
+# launch admixture for different K
+###################################
+my %errors;
+for (my $k = $kmin; $k <= $kmax; $k++)
+{
+ system("admixture --cv $directory/out.bed $k >>$directory/log.$k 2>&1");
+ my $cv_error_line = `grep -h CV $directory/log.$k`;
+ if ($cv_error_line =~/: (\d+\.*\d*)$/)
+ {
+ $errors{$1} = $k;
+ }
+ system("cat $directory/log.$k >>$directory/logs");
+ system("echo '\n\n====================================\n\n' >>$directory/logs");
+ system("cat out.$k.Q >>$directory/outputs.Q");
+ system("echo '\n\n====================================\n\n' >>$directory/outputs.Q");
+ system("cat out.$k.P >>$directory/outputs.P");
+ system("echo '\n\n====================================\n\n' >>$directory/outputs.P");
+}
+
+my @sorted_errors = sort {$a<=>$b} keys(%errors);
+my $best_K = $errors{@sorted_errors[0]};
+
+
+#system("cp -rf out.$best_K.Q $directory/output");
+
+open(BEST1,"out.$best_K.Q");
+open(BEST2,">$directory/output");
+print BEST2 "<Covariate>\n";
+print BEST2 "<Trait>";
+for (my $j=1;$j<=$best_K;$j++)
+{
+ print BEST2 " Q" . $j;
+}
+print BEST2 "\n";
+my $i = 0;
+while(<BEST1>)
+{
+ my $line = $_;
+ $line =~s/ /\t/g;
+ my $ind = $individus[$i];
+ print BEST2 "$ind ";
+ print BEST2 $line;
+ $i++;
+}
+close(BEST1);
+close(BEST2);
+
+system("cp -rf $directory/log.$best_K $directory/log");
+
+
+
+
b
diff -r 000000000000 -r 3e19d0dfcf3e admixture/admixture.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/admixture/admixture.sh Mon Mar 23 05:57:27 2015 -0400
b
@@ -0,0 +1,21 @@
+#!/bin/bash
+input=$1
+outputs=$2
+logs=$3
+best_k_output=$4
+best_k_logfile=$5
+kmin=$6
+kmax=$7
+
+directory=`dirname $0`
+mkdir tmpdir$$
+cp -rf $input tmpdir$$/input

+/usr/bin/perl $directory/Admixture.pl -i $input -o $outputs -k $kmin -m $kmax -d tmpdir$$ -p $directory
+
+mv tmpdir$$/output $best_k_output
+mv tmpdir$$/log $best_k_logfile
+mv tmpdir$$/outputs.Q $outputs
+mv tmpdir$$/logs $logs
+
+
b
diff -r 000000000000 -r 3e19d0dfcf3e admixture/admixture.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/admixture/admixture.xml Mon Mar 23 05:57:27 2015 -0400
b
@@ -0,0 +1,55 @@
+<tool id="admixture" name="Admixture" version="1.23">
+ <description>a population structure from large SNP genotype datasets</description>
+ <requirements>
+ <requirement type="package" version="1.07">plink</requirement>
+ <requirement type="package" version="1.23">admixture</requirement>
+ </requirements>
+ <command interpreter="bash">./admixture.sh $input $outputs $logs $best_k_output $best_k_logfile $kmin $kmax
+    </command>
+ <inputs>
+ <param format="txt" name="input" type="data" label="Allelic file in Hapmap format" help="Allelic file in Hapmap format"/>
+ <param type="text" name="kmin" label="K min" value="1"/>
+ <param type="text" name="kmax" label="K max" value="5"/>
+ </inputs>
+ <outputs>
+ <data format="txt" name="best_k_output" label="Best K Output"/>
+ <data format="txt" name="best_k_logfile" label="Best K Logfile"/>
+ <data format="txt" name="outputs" label="All Outputs"/>
+ <data format="txt" name="logs" label="All Logs"/>
+ </outputs>
+ <help>
+
+
+.. class:: infomark
+
+**Program encapsulated in Galaxy by Southgreen**
+
+.. class:: infomark
+
+**Admixture version 1.23**
+
+-----
+
+==============
+ Please cite: 
+==============
+
+"Fast model-based estimation of ancestry in unrelated individuals.", **D.H. Alexander, J. Novembre, and K. Lange.**, Genome Research, 19:1655{1664, 2009.
+
+-----
+
+===========
+ Overview:
+===========
+
+ADMIXTURE is a program for estimating ancestry in a model-based manner from large autosomal SNP genotype datasets, where the individuals are unrelated (for example, the individuals in a case-control association study).
+
+-----
+
+For further informations, please visite the Admixture_ website.
+
+
+.. _Admixture: http://www.genetics.ucla.edu/software/admixture/index.html
+ </help>
+
+</tool>
b
diff -r 000000000000 -r 3e19d0dfcf3e admixture/transpose.awk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/admixture/transpose.awk Mon Mar 23 05:57:27 2015 -0400
[
@@ -0,0 +1,27 @@
+#!/usr/bin/gawk -f
+
+BEGIN {
+    max_x =0;
+    max_y =0;
+}
+
+{
+    max_y++;
+    for( i=1; i<=NF; i++ )
+    {
+        if (i>max_x) max_x=i;
+        A[i,max_y] = $i;
+    }
+}
+
+END {
+    for ( x=1; x<=max_x; x++ )
+    {
+        for ( y=1; y<=max_y; y++ )
+        {
+            if ( (x,y) in A ) printf "%s",A[x,y];
+            if ( y!=max_y ) printf " ";
+        }
+        printf "\n";
+    }
+}
b
diff -r 000000000000 -r 3e19d0dfcf3e snpEff/SnpEff.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpEff/SnpEff.pl Mon Mar 23 05:57:27 2015 -0400
[
@@ -0,0 +1,82 @@
+#!/usr/bin/perl
+
+use strict;
+use Getopt::Long;
+
+my $usage = qq~Usage:$0 <args> [<opts>]
+where <args> are:
+    -i, --input         <input VCF>
+    -o, --output        <output>
+    -g, --gff           <GFF annotation>
+    -f, --fasta         <Fasta of chromosomes>
+    -h, --html          <HTML output>
+~;
+$usage .= "\n";
+
+my ($input,$output,$gff,$fasta,$html);
+
+
+GetOptions(
+ "input=s"      => \$input,
+ "output=s"     => \$output,
+ "gff=s"        => \$gff,
+ "fasta=s"      => \$fasta,
+ "html=s"       => \$html
+);
+
+
+die $usage
+  if ( !$input || !$output || !$fasta || !$gff || !$html);
+
+
+if (!-e $gff){
+        die "Error: GFF input does not exist\n"
+}
+if (!-e $fasta){
+        die "Error: Fasta input does not exist\n"
+}
+
+#my $SNPEFF_PATH = "/usr/local/bioinfo/galaxy/galaxy_dist/tools/SNiPlay/SnpEff/snpEff";
+my $SNPEFF_PATH = $ENV{SNPEFF_JAR_PATH};
+
+
+my $session = $$;
+mkdir($session);
+mkdir("$session/data");
+mkdir("$session/data/genomes");
+mkdir("$session/data/myspecies");
+
+system("cp -rf $fasta $session/data/genomes/myspecies.fa");
+system("cp -rf $gff $session/data/myspecies/genes.gff");
+
+open(my $C,"$SNPEFF_PATH/snpEff.config");
+open(my $C2,">$session/snpEff.config");
+while(<$C>)
+{
+ if (/data_dir/)
+ {
+ print $C2 "data_dir = ./data\n";
+ }
+ elsif (/^genomes/)
+ {
+ print $C2 "genomes : \\n";
+         print $C2 "myspecies, myspecies \\n";
+ }
+ else
+ {
+ print $C2 $_;
+ }
+}
+print $C2 "myspecies.genome : myspecies\n";
+close($C);
+close($C2);
+
+
+my $build_cmd = "java -jar $SNPEFF_PATH/snpEff.jar build -c $session/snpEff.config -gff3 myspecies";
+system($build_cmd);
+
+my $eff_cmd = "java -jar $SNPEFF_PATH/snpEff.jar eff -c $session/snpEff.config -o vcf -no-downstream -no-upstream myspecies -s $html $input >$output";
+system($eff_cmd);
+
+
+system("rm -rf $session");
b
diff -r 000000000000 -r 3e19d0dfcf3e snpEff/snpEff-pipe.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpEff/snpEff-pipe.sh Mon Mar 23 05:57:27 2015 -0400
b
@@ -0,0 +1,14 @@
+#!/bin/bash
+vcf=$1
+genome=$2
+gff=$3
+output=$4
+html=$5
+log=$6
+
+directory=`dirname $0`

+/usr/bin/perl $directory/SnpEff.pl -i $vcf -f $genome -g $gff -o $output -h $html >>$log 2>&1
+
+
+
b
diff -r 000000000000 -r 3e19d0dfcf3e snpEff/snpEff.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpEff/snpEff.xml Mon Mar 23 05:57:27 2015 -0400
b
@@ -0,0 +1,63 @@
+<tool id="snpEff" name="SnpEff" version="4.0">
+ <description>predicts SNP effect from a genomic VCF file</description>
+ <requirements>
+ <requirement type="package" version="4.0">snpEff</requirement>
+ </requirements> 
+
+ <!-- 
+     You will need to change the path to wherever your installation is.
+ You can change the amount of memory used by snpEff, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
+ -->
+ <command interpreter="bash">./snpEff-pipe.sh $vcf $genome $gff $output $statsFile $log</command>
+ <inputs>
+ <param format="vcf" name="vcf" type="data" label="VCF input file" help="Positions must be genomic positions"/>
+ <param format="fasta" name="genome" type="data" label="Reference genome in Fasta" help=""/>
+ <param format="gff3" name="gff" type="data" label="GFF annotation of the genome" help=""/>
+
+ </inputs>
+ <outputs>
+ <data format="vcf" name="output" label="Annotated VCF" />
+ <data format="html" name="statsFile" label="HTML statistics output"/>
+ <data format="txt" name="log" label="Log file"/>
+ </outputs>
+
+ <help>
+
+
+
+
+
+.. class:: infomark
+
+**Program encapsulated in Galaxy by Southgreen**
+
+.. class:: infomark
+
+**SnpEff version 4.0**
+
+-----
+
+==============
+ Please cite:
+==============
+
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", **Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM.**, Fly (Austin). 2012 Apr-Jun;6(2):80-92.
+
+-----
+
+===========
+ Overview:
+===========
+
+Genetic variant annotation and effect prediction toolbox. It annotates and predicts the effects of variants on genes (such as amino acid changes).
+
+-----
+
+For further informations, please visite the SnpEff_ website.
+
+
+.. _SnpEff: http://snpeff.sourceforge.net/
+        </help>
+
+</tool>
+
b
diff -r 000000000000 -r 3e19d0dfcf3e tassel/tassel.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tassel/tassel.sh Mon Mar 23 05:57:27 2015 -0400
[
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+analyseType=$1;
+out1=$2;
+out2=$3;
+out3=$4;
+log1=$5;
+galaxyOutDir=$6;
+
+
+mkdir $galaxyOutDir
+
+# Suppression des 6 premiers arguments de la liste des arguments $@
+shift; shift; shift; shift; shift; shift;
+
+if [[ $analyseType == glm ]] 
+then
+ run_pipeline.pl $* >> $log1 2>&1
+ mv "$galaxyOutDir/TASSELGLM1.txt" $out1
+ mv "$galaxyOutDir/TASSELGLM2.txt" $out2
+fi
+
+if [[ $analyseType == mlm ]] 
+then
+ run_pipeline.pl $* >> $log1 2>&1
+ mv "$galaxyOutDir/TASSELMLM1.txt" $out1
+ mv "$galaxyOutDir/TASSELMLM2.txt" $out2
+ mv "$galaxyOutDir/TASSELMLM3.txt" $out3
+fi
+
+if [[ $analyseType == ld ]] 
+then
+ run_pipeline.pl $* >> $log1 2>&1
+fi
+
+
+if [[ $analyseType == ck ]]
+then
+ run_pipeline.pl $* >> $log1 2>&1
+ mv "$galaxyOutDir/kinship.txt" $out1
+fi
b
diff -r 000000000000 -r 3e19d0dfcf3e tassel/tassel.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tassel/tassel.xml Mon Mar 23 05:57:27 2015 -0400
[
@@ -0,0 +1,153 @@
+<tool id="Tassel" name="Tassel" version="5.0">
+ <description> Software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium. </description>
+ <requirements>
+                <requirement type="package" version="5.0">tassel</requirement>
+        </requirements>
+ <command interpreter="bash">./tassel.sh $analysis_opts.fonction_selector $output1 $output2 $output3 $log tmpdir$$/
+#if $analysis_opts.fonction_selector == "mlm":
+ -fork1 -h $hapmap -filterAlign -filterAlignMinFreq $filterAlignMinFreq
+ -fork2 -r $analysis_opts.trait
+ #if $analysis_opts.add_structure_file.additional_file_selector == "yes":
+  -fork3 -q $analysis_opts.add_structure_file.structure -excludeLastTrait
+ #end if
+ -fork4 -k $analysis_opts.kinship
+ -combineA -input1 -input2 
+ #if $analysis_opts.add_structure_file.additional_file_selector == "yes":
+  -input3
+ #end if
+ -intersect -combineB -inputA -input4 -mlm -mlmVarCompEst $analysis_opts.mlmVarCompEst -mlmCompressionLevel $analysis_opts.mlmCompressionLevel -export tmpdir$$/TASSELMLM -runfork1 -runfork2 
+ #if $analysis_opts.add_structure_file.additional_file_selector == "yes":
+  -runfork3
+ #end if
+ -runfork4

+#else if $analysis_opts.fonction_selector == "glm":
+ -fork1 -h $hapmap -filterAlign -filterAlignMinFreq $filterAlignMinFreq
+ -fork2 -r $analysis_opts.trait
+ #if $analysis_opts.add_file.additional_file_selector == "yes":
+  -fork3 -q $analysis_opts.add_file.structure -excludeLastTrait
+ #end if
+ -combineA -input1 -input2 
+ #if $analysis_opts.add_file.additional_file_selector == "yes":
+  -input3
+ #end if
+ -intersect -glm -export tmpdir$$/TASSELGLM -runfork1 -runfork2
+ #if $analysis_opts.add_file.additional_file_selector == "yes":
+  -runfork3
+ #end if 
+#else if $analysis_opts.fonction_selector == "ld":
+ -fork1 -h $hapmap
+ -ld -ldType All -ldd png -ldplotsize 3000 -o $output1 -runfork1
+#else if $analysis_opts.fonction_selector == "ck":
+ -fork1 -h $hapmap
+ -ck -export tmpdir$$/kinship -runfork1
+#end if
+ </command>
+ <inputs>
+ <param format="txt" name="hapmap" type="data" label="HapMap file"/>
+ <conditional name="analysis_opts">
+ <param name="fonction_selector" type="select" label="Type of analysis">
+ <option value="mlm" selected="True">MLM</option>
+ <option value="glm">GLM</option>
+ <option value="ld">Linkage Disequilibrium</option>
+ <option value="ck">Kinship</option>
+ </param>
+ <when value="glm">
+                 <param format="txt" name="trait" type="data" label="Trait file"/>
+ <conditional name="add_file">
+ <param name="additional_file_selector" type="select" label="Add structure file">
+                                  <option value="no" selected="True">no</option>
+                                  <option value="yes">yes</option>
+                          </param>
+ <when value="yes">
+ <param format="txt" name="structure" type="data" label="Structure file"/>
+ </when>
+ </conditional>
+ </when>
+ <when value="mlm">
+                 <param format="txt" name="trait" type="data" label="Trait file"/>
+                 <param format="txt" name="kinship" type="data" label="Kinship file"/>
+ <conditional name="add_structure_file">
+ <param name="additional_file_selector" type="select" label="Add structure file">
+                                  <option value="no" selected="True">no</option>
+                                  <option value="yes">yes</option>
+                          </param>
+ <when value="yes">
+ <param format="txt" name="structure" type="data" label="Structure file"/>
+ </when>
+ </conditional>
+ <param name="mlmVarCompEst" type="select" label="Variance Component Estimation">
+ <option value="P3D" selected="True">P3D</option>
+ <option value="EachMarker">EachMarker</option>
+ </param>
+ <param name="mlmCompressionLevel" type="select" label="Compression Level">
+ <option value="Optimum" selected="True">Optimum</option>
+ <option value="Custom">Custom</option>
+ <option value="None">None</option>
+ </param>
+                        </when>
+ <when value="ld"></when>
+ <when value="ck"></when>
+ </conditional>
+ <param type="text" name="filterAlignMinFreq" label="Filter minimal frequency allele" value="0.05"/>
+ </inputs>
+ <outputs>
+ <data format="txt" name="output1" label="Tassel output">
+ <change_format>
+ <when input="analysis_opts['fonction_selector']" value="ld" format="png"/>
+ </change_format>
+ </data>
+
+ <data format="txt" name="output2" label="Allele effects">
+ <filter>analysis_opts['fonction_selector'] == "glm" or analysis_opts['fonction_selector'] == "mlm"</filter>
+ </data>
+
+ <data format="txt" name="output3" label="Compression file">
+ <filter>analysis_opts['fonction_selector'] == "mlm"</filter>
+ </data>
+
+ <data format="txt" name="log" label="Log file"/>
+ </outputs>
+ <help>
+
+.. class:: infomark
+
+**Program encapsulated in Galaxy by Southgreen**
+
+.. class:: infomark
+
+**Tassel**
+
+-----
+
+==========
+ Authors:
+==========
+
+**Terry Casstevens**
+
+-----
+
+==========
+ Overview
+==========
+
+Software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium.
+
+-----
+
+For further informations, please visite the website of TASSEL_.
+
+
+.. _TASSEL: http://www.maizegenetics.net/tassel/
+
+ </help>
+<!--
+<tests>
+  <test>
+      <param name="input" value="genotyping_file.inp" />
+      <output name="output" file="phase_output" />
+   </test>
+</tests>
+-->
+</tool>
b
diff -r 000000000000 -r 3e19d0dfcf3e tassel/tool-data/tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tassel/tool-data/tool_dependencies.xml Mon Mar 23 05:57:27 2015 -0400
b
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="tassel" version="5.0">
+        <repository changeset_revision="097d4c366e0d" name="package_tassel_5_0" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" />
+    </package>
+</tool_dependency>
+
+
b
diff -r 000000000000 -r 3e19d0dfcf3e tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Mon Mar 23 05:57:27 2015 -0400
b
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="plink" version="1.07">
+        <repository changeset_revision="65400c333b88" name="package_plink_1_07" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" />
+    </package>
+    <package name="admixture" version="1.23">
+        <repository changeset_revision="61e04b2aa621" name="package_admixture_1_23" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" />
+    </package>
+    <package name="tassel" version="5.0">
+        <repository changeset_revision="097d4c366e0d" name="package_tassel_5_0" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" />
+    </package>
+    <package name="vcftools" version="0.1.12b">
+        <repository changeset_revision="a655cb1dfc58" name="package_vcftools_0_1_12b" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu/" />
+    </package>
+    <package name="snpEff" version="4.0">
+        <repository name="package_snpeff_4_0" owner="iuc" changeset_revision="6bc55957927b" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>
+
+