Next changeset 1:420b57c3c185 (2015-07-10) |
Commit message:
Uploaded |
added:
MDSplot/MDSbasedOnIBSmatrix.pl MDSplot/mdsplot.sh MDSplot/mdsplot.xml MDSplot/test-data/analyse.ibs_matrix.txt MDSplot/test-data/analyse.log MDSplot/test-data/analyse.mds_plot.txt MDSplot/test-data/input.map MDSplot/test-data/input.ped VCFToolFilter/VCFToolsFilter.pl VCFToolFilter/test-data/result.log VCFToolFilter/test-data/result.vcf VCFToolFilter/test-data/sample.vcf VCFToolFilter/vcfToolsFilter.sh VCFToolFilter/vcfToolsFilter.xml VCFToolsStats/VCFToolsStats.pl VCFToolsStats/test-data/result.TsTv.summary VCFToolsStats/test-data/result.annotation VCFToolsStats/test-data/result.het VCFToolsStats/test-data/result.imiss VCFToolsStats/test-data/result.log VCFToolsStats/test-data/sample.vcf VCFToolsStats/vcfToolsStats.sh VCFToolsStats/vcfToolsStats.xml admixture/Admixture.pl admixture/admixture.sh admixture/admixture.xml admixture/transpose.awk snpEff/SnpEff.pl snpEff/snpEff-pipe.sh snpEff/snpEff.xml tassel/tassel.sh tassel/tassel.xml tassel/tool-data/tool_dependencies.xml tool_dependencies.xml |
b |
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/MDSbasedOnIBSmatrix.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MDSplot/MDSbasedOnIBSmatrix.pl Mon Mar 23 05:57:27 2015 -0400 |
[ |
@@ -0,0 +1,110 @@ +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; +use Bio::SeqIO; + +my $PLINK_EXE= "plink"; + +my $usage = qq~Usage:$0 <args> [<opts>] +where <args> are: + -i, --in <input> + -o, --out <output> +~; +$usage .= "\n"; + +my ($in,$out); + + +GetOptions( + "in=s" => \$in, + "out=s" => \$out +); + +die $usage + if ( !$in || !$out); + + +my $plink_command = $PLINK_EXE . " --file $in --noweb --cluster --matrix --mds-plot 2 --out $out >>$in.plink.log 2>&1"; +system($plink_command); + +my $awk_cmd = "awk \{\'print \$1\'\} $in.ped"; +my $inds = `$awk_cmd`; +my @individuals = split("\n",$inds); + +my %populations; +if (-e "$in.individual_info.txt") +{ + open(my $I,"$in.individual_info.txt"); + while(<$I>) + { + my $line = $_; + $line =~s/\n//g; + $line =~s/\r//g; + my ($ind,$pop) = split(/;/,$line); + $populations{$ind} = $pop; + } + close($I); +} + +open(my $OUT,">$out.mds_plot.txt"); +my $go = 0; +open(my $O,"$out.mds"); +while(<$O>) +{ + if ($go) + { + my $line = $_; + $line =~s/\n//g; + $line =~s/\r//g; + my @i = split(/\s+/,$line); + if ($line =~/^ /) + { + my $ind = $i[1]; + my $pop = "Pop1"; + if ($populations{$ind}) + { + $pop = $populations{$ind}; + } + print $OUT "$pop $ind ".$i[4]." ".$i[5]."\n"; + } + if ($line =~/^\w/) + { + my $ind = $i[0]; + my $pop = "Pop1"; + if ($populations{$ind}) + { + $pop = $populations{$ind}; + } + print $OUT "$pop $ind ".$i[3]." ".$i[4]."\n"; + } + + } + if (/C1/){$go = 1;} +} +close($O); +close($OUT); + + +my $j = 0; +open(my $IBS,">$out.ibs_matrix.txt"); +print $IBS "Individuals " . join("\t",@individuals)."\n"; +open(my $O2,"$out.mibs"); +while(<$O2>) +{ + my $line = $_; + $line =~s/\n//g; + $line =~s/\r//g; + my @i = split(/\s+/,$line); + print $IBS $individuals[$j]. " ". join("\t",@i)."\n"; + $j++; +} +close($O2); +close($IBS); + + + + + + |
b |
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/mdsplot.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MDSplot/mdsplot.sh Mon Mar 23 05:57:27 2015 -0400 |
b |
@@ -0,0 +1,23 @@ +#!/bin/bash + +tool_path=$(dirname $0) +ped=$1 +map=$2 +fileout_label=$3 +fileout_matrix=$4 +fileout_plot=$5 +fileout_log=$6 + +rsync -a $ped input.ped +rsync -a $map input.map + +perl $tool_path/MDSbasedOnIBSmatrix.pl --in input --out $fileout_label + +rm -f input.ped input.map + +cp $fileout_label.ibs_matrix.txt $fileout_matrix +cp $fileout_label.mds_plot.txt $fileout_plot +cp input.plink.log $fileout_log + + +rm -f $fileout_label.ibs_matrix.txt $fileout_label.mds_plot.txt input.plink.log |
b |
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/mdsplot.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MDSplot/mdsplot.xml Mon Mar 23 05:57:27 2015 -0400 |
[ |
@@ -0,0 +1,203 @@ +<tool id="sniplay_mdsplot" name="MDS plot" version="1.1.1"> + + <!-- [REQUIRED] Tool description displayed after the tool name --> + <description> IBS matrix / multi-dimensional scaling</description> + + <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> + <requirements> + <requirement type="binary">perl</requirement> + <requirement type="package" version="0.1.13">plink</requirement> + </requirements> + + <!-- [OPTIONAL] Command to be executed to get the tool's version string --> + <version_command> +<!-- + tool_binary -v +--> + </version_command> + + <!-- [REQUIRED] The command to execute --> + <command interpreter="bash"> + mdsplot.sh $fileped $filemap $fileout_label $fileout_matrix $fileout_plot $fileout_log + </command> + + <!-- [REQUIRED] Input files and tool parameters --> + <inputs> + <param name="fileped" type="data" format="txt" optional="false" label="PED input" /> + <param name="filemap" type="data" format="txt" optional="false" label="MAP input" help="4 columns tabular file: chromosome, snp id, genetic distance, bp position"/> + <param name="fileout_label" type="text" value="analyse" label="Output name" help="Output name for tabular files" /> + </inputs> + + <!-- [REQUIRED] Output files --> + <outputs> + <data name="fileout_matrix" type="data" format="tabular" label="${fileout_label}.ibs_matrix.txt" /> + <data name="fileout_plot" type="data" format="tabular" label="${fileout_label}.mds_plot.txt" /> + <data name="fileout_log" type="data" format="txt" label="${fileout_label}.log" /> + </outputs> + + <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <stdio> + <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> + <exit_code range="1:" level="fatal" /> + </stdio> + + <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> + <tests> + <!-- [HELP] Test files have to be in the ~/test-data directory --> + + <test> + <param name="fileped" value="input.ped" /> + <param name="filemap" value="input.map" /> + <output name="fileout_matrix" file="output.ibs_matrix.txt" /> + <output name="fileout_plot" file="output.mds_plot.txt" /> + <output name="fileout_log" file="output.log" /> + </test> + + <!-- [HELP] Multiple tests can be defined with different parameters --> +<!-- + <test> + </test> +--> + </tests> + + <!-- [OPTIONAL] Help displayed in Galaxy --> + <help> + +.. class:: infomark + +**Authors** + +--------------------------------------------------- + +.. class:: infomark + +**Please cite** If you use this tool, please cite dereeper et al. 2015 in prep. + +--------------------------------------------------- + +======== +MDS plot +======== + +----------- +Description +----------- + + Compute an IBS matrix and a multi-dimensional scaling. + + +----------------- +Workflow position +----------------- + +**Upstream tools** + +=========== ========================== ======= +Name output file(s) format +=========== ========================== ======= +=========== ========================== ======= + + +**Downstream tools** + +=========== ========================== ======= +Name output file(s) format +=========== ========================== ======= +=========== ========================== ======= + + +---------- +Input file +---------- + +PED file + +MAP file + 4 columns tabular file: chromosome, snp id, genetic distance, bp position + + +---------- +Parameters +---------- + +Output name + Output base name for the ouput files + + +------------ +Output files +------------ + +Output_name.ibs_matrix.txt + Tabular file with IBS matrix + +Output_name.mds_plot.txt + File to construct mds plot + +Output_name.log + Log file + + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +=========== + +PED file +----------- + +:: + + IRAT112 1 0 0 1 1 1 1 4 4 ... + IAC25 1 0 0 1 1 1 1 4 4 ... + CIRAD409 1 0 0 1 1 3 3 1 1 ... + + +MAP file +----------- + +:: + + Chr1 Chr1:4299 0 4299 + Chr1 Chr1:26710 0 26710 + Chr1 Chr1:56184 0 56184 + Chr1 Chr1:93272 0 93272 + + + +Parameters +========== + +Output name -> densities + + +Output files +============ + +densities.ibs_matrix.txt +------------------------ + +:: + + Individuals IRAT112 IAC25 IAC165 KARASUKARASURANKASU DOURADOPRECOCE ... + IRAT112 1 0.93691 0.937407 0.734724 0.943368 ... + IAC25 0.93691 1 0.958768 0.723299 0.965723 ... + + +densities.mds_plot.txt +---------------------- + +:: + + IRAT112 -0.0969382 0.0376036 + IAC25 -0.0918126 0.0501177 + + + + </help> + +</tool> |
b |
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/test-data/analyse.ibs_matrix.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MDSplot/test-data/analyse.ibs_matrix.txt Mon Mar 23 05:57:27 2015 -0400 |
b |
b'@@ -0,0 +1,94 @@\n+Individuals\tIRAT112\tIAC25\tIAC165\tKARASUKARASURANKASU\tDOURADOPRECOCE\tCUIABANA\tBICOBRANCO\tCAAWA/FORTUNA6\tCANELADEFERRO\tCIRAD358\tCOLOMBIA1\tCIRAD409\tBAGANANASALAO\tBAKUNGH\tFOHISOMOTRA\tKAKANI2\tGUARANI\tDOURADOAGULHA\tIRAT13\tDAWASANRED\tGRAZI\tIRAT144\tIAC47\tMOROBEREKAN\tIRAT362\tGIZA171\tIDSA77\tIRAT216\tIRAT177\tCHAPHUMA\tCIRAD392\tARROZCEBADA\tINDANE\tDINORADO\t63-104\tIR63380-16\tCANAROXA\tCICIHBETON\tIR68704-145-1-1-B\tHAWMOM\tIRAT364\tIRAT212\tKHAODAM\t62667\tIRAT234\tNIPPONBARE_D\tJUMULA2\tCUBA65\tBABER\tCIRAD488\tBENGALYVAKARINA\tESPERANZA\tBULUPANDAK\tARIAS\tGOMPA2\tCT13582-15-5-M\tIRAT335\tM202\tIR65907-188-1-B\tIR71525-19-1-1\tCNA-7_BO_1_1_33-13-6-1\tIRAT257\tIRAT109\tKANIRANGA\tIR66421-096-2-1-1\tGOGO\tGEMJYAJYANAM\tDANGREY\tBINULAWAN\tCAIAPO\tIGUAPECATETO\tIRAT170\tIRAT380\tARAGUAIA\tGOGOLEMPAK\tJAOHAW\tCHALOYOE\tGANIGI\tGUNDILKUNING\tIRAT2\tIRAT366\tIRAT104\tDAVAO\tHD1-4\tCURINCA\tDAM\tCIRAD403\tJUMALI\tJIMBRUKJOLOWORO\tGOGOLEMPUK\tCIRAD394\tIR63372-08\tIR60080-46A\n+IRAT112\t1\t0.93691\t0.937407\t0.734724\t0.943368\t0.819672\t0.903626\t0.850969\t0.842524\t0.802285\t0.717834\t0.836562\t0.804272\t0.832588\t0.81073\t0.614009\t0.946846\t0.943368\t0.944362\t0.688525\t0.807253\t0.712866\t0.929955\t0.782414\t0.902136\t0.692499\t0.817188\t0.796821\t0.943865\t0.697963\t0.790859\t0.787879\t0.704918\t0.833582\t0.917039\t0.769001\t0.938897\t0.682563\t0.814704\t0.707402\t0.859911\t0.787879\t0.709389\t0.945355\t0.883259\t0.703924\t0.697963\t0.843517\t0.725286\t0.725782\t0.84302\t0.876304\t0.69995\t0.74466\t0.651267\t0.836066\t0.958271\t0.651764\t0.755092\t0.859911\t0.861898\t0.905614\t0.892201\t0.739692\t0.828117\t0.80924\t0.650273\t0.644312\t0.860407\t0.853949\t0.922504\t0.944858\t0.81073\t0.908097\t0.756085\t0.674118\t0.744163\t0.785892\t0.698957\t0.944858\t0.80775\t0.857427\t0.838053\t0.931446\t0.831098\t0.721311\t0.798808\t0.592151\t0.739692\t0.757079\t0.767014\t0.781918\t0.899155\n+IAC25\t0.93691\t1\t0.958768\t0.723299\t0.965723\t0.825137\t0.93691\t0.836562\t0.850969\t0.772976\t0.713363\t0.807253\t0.791853\t0.819175\t0.814208\t0.602583\t0.934426\t0.966716\t0.904123\t0.676105\t0.817685\t0.706408\t0.948336\t0.811724\t0.892697\t0.693989\t0.819672\t0.784401\t0.902633\t0.677596\t0.806259\t0.812221\t0.717337\t0.813214\t0.876801\t0.757576\t0.923497\t0.706905\t0.806259\t0.69995\t0.833582\t0.777447\t0.693989\t0.904123\t0.896672\t0.703428\t0.694486\t0.813214\t0.728763\t0.747144\t0.82464\t0.864878\t0.710383\t0.758073\t0.643815\t0.834575\t0.925981\t0.674118\t0.745653\t0.839543\t0.845504\t0.900149\t0.851962\t0.764034\t0.817685\t0.814704\t0.655738\t0.644809\t0.819175\t0.834575\t0.90313\t0.90462\t0.838053\t0.928465\t0.751615\t0.666667\t0.733731\t0.800298\t0.707402\t0.902633\t0.805266\t0.846995\t0.822653\t0.955787\t0.809737\t0.700944\t0.788376\t0.591654\t0.750124\t0.751615\t0.790363\t0.769498\t0.879781\n+IAC165\t0.937407\t0.958768\t1\t0.722802\t0.929459\t0.839543\t0.932439\t0.862891\t0.85544\t0.786388\t0.739692\t0.817685\t0.790363\t0.817685\t0.822653\t0.596125\t0.968703\t0.929459\t0.920517\t0.677596\t0.832091\t0.704918\t0.9846\t0.822156\t0.92002\t0.688525\t0.834078\t0.794834\t0.921013\t0.685047\t0.805763\t0.823646\t0.717834\t0.826627\t0.895181\t0.769995\t0.945852\t0.688525\t0.825633\t0.694486\t0.846001\t0.77993\t0.697466\t0.920517\t0.90611\t0.694983\t0.689021\t0.809737\t0.723299\t0.745653\t0.810233\t0.87233\t0.692002\t0.76155\t0.636364\t0.849975\t0.91853\t0.66766\t0.755092\t0.866865\t0.855936\t0.916542\t0.868356\t0.746647\t0.822156\t0.813214\t0.651267\t0.632389\t0.82464\t0.844014\t0.927471\t0.921013\t0.832588\t0.963736\t0.774963\t0.661202\t0.743169\t0.796821\t0.702931\t0.921013\t0.800795\t0.864382\t0.80924\t0.98609\t0.821162\t0.709389\t0.797814\t0.588177\t0.735718\t0.774963\t0.783905\t0.780924\t0.901143\n+KARASUKARASURANKASU\t0.734724\t0.723299\t0.722802\t1\t0.73075\t0.748137\t0.745653\t0.766518\t0.747144\t0.796324\t0.673125\t0.738202\t0.774466\t0.753105\t0.780924\t0.630899\t0.731247\t0.73075\t0.769498\t0.802782\t0.769498\t0.677099\t0.73224\t0.749627\t0.754098\t0.817685\t0.715847\t0.721311\t0.769001\t0.837059\t0.777943\t0.775956\t0.827124\t0.740189\t0.778937\t0.707402\t0.743169\t0.675609\t0.704421\t0.80775\t0.780427\t0.748137\t0.879285\t0.770492\t0.759066\t0.828117\t0.731744\t0.743169\t0.761053\t0.669151\t0.764531\t0.71237\t0.693989\t0.737705\t0.668157\t0.735718\t0.723795\t0.781918\t0.728266\t0.764531\t0.750621\t0.741679\t0.731247\t0.689'..b'\t0.74764\t0.723795\t0.61997\t0.77546\t0.692002\t0.730253\t0.850969\t0.703428\t0.672628\t0.654247\t0.640835\t0.722305\t0.694983\t0.726279\t0.74615\t0.727769\t0.724292\t0.71535\t0.653751\t0.740189\t0.724292\t0.656731\t0.687531\t0.811227\t0.726279\t0.688525\t0.723795\t0.800795\t0.680079\t0.663189\t0.748137\t0.658718\t0.663189\t0.791356\t0.721311\t0.737705\t0.759563\t0.631396\t0.682067\t0.741679\t0.647789\t0.671634\t0.77844\t0.689021\t0.774466\t0.682563\t0.865872\t0.707899\t0.814208\t0.616493\t0.60457\t0.774963\t0.726776\t0.736711\t0.724292\t0.733234\t0.709389\t0.77993\t0.691008\t0.679583\t0.775956\t0.727769\t0.723299\t0.796821\t0.777943\t0.763537\t0.734724\t0.651267\t0.678589\t0.643815\t0.600099\t1\t0.77993\t0.680576\t0.641828\t0.739195\n+GOGOLEMPUK\t0.757079\t0.751615\t0.774963\t0.710383\t0.756085\t0.791356\t0.783905\t0.780924\t0.771485\t0.801788\t0.63686\t0.69995\t0.750124\t0.79533\t0.835072\t0.597615\t0.789369\t0.756085\t0.782911\t0.727769\t0.801788\t0.685544\t0.787382\t0.852459\t0.776453\t0.693989\t0.669647\t0.673125\t0.782414\t0.71535\t0.717834\t0.79533\t0.730253\t0.754595\t0.77546\t0.685047\t0.790363\t0.748634\t0.706905\t0.729757\t0.795827\t0.765524\t0.725782\t0.783905\t0.815201\t0.697466\t0.689518\t0.729757\t0.73075\t0.693492\t0.795827\t0.715847\t0.777943\t0.738202\t0.645802\t0.72926\t0.762047\t0.643318\t0.674118\t0.813711\t0.751118\t0.782911\t0.733731\t0.840537\t0.772976\t0.789866\t0.73224\t0.718331\t0.780427\t0.743169\t0.761053\t0.783408\t0.722802\t0.749627\t0.998013\t0.734228\t0.721808\t0.838053\t0.785892\t0.783408\t0.808246\t0.853949\t0.759066\t0.787879\t0.660705\t0.728763\t0.675112\t0.577745\t0.77993\t1\t0.677099\t0.675112\t0.791356\n+CIRAD394\t0.767014\t0.790363\t0.783905\t0.761053\t0.778937\t0.725782\t0.783905\t0.742176\t0.750621\t0.693492\t0.725286\t0.755589\t0.71535\t0.742673\t0.705912\t0.677099\t0.764531\t0.778937\t0.803775\t0.700944\t0.727273\t0.718331\t0.781421\t0.717337\t0.811227\t0.834078\t0.755092\t0.759563\t0.803279\t0.692499\t0.865872\t0.736711\t0.685544\t0.76155\t0.813214\t0.787382\t0.784401\t0.674118\t0.73075\t0.680079\t0.723299\t0.794337\t0.706905\t0.804769\t0.793343\t0.838549\t0.783905\t0.729757\t0.722802\t0.748137\t0.752111\t0.770492\t0.676602\t0.714357\t0.733234\t0.783905\t0.742176\t0.79235\t0.745653\t0.735221\t0.813711\t0.766021\t0.764531\t0.699454\t0.790859\t0.761053\t0.690512\t0.685544\t0.738698\t0.789866\t0.76304\t0.804272\t0.824143\t0.758569\t0.676105\t0.69846\t0.723795\t0.693989\t0.680576\t0.803279\t0.655241\t0.752608\t0.728266\t0.781918\t0.745156\t0.702931\t0.763537\t0.645306\t0.680576\t0.677099\t1\t0.773472\t0.781421\n+IR63372-08\t0.781918\t0.769498\t0.780924\t0.735221\t0.76006\t0.712866\t0.778937\t0.796821\t0.74764\t0.697466\t0.868356\t0.944362\t0.747144\t0.729757\t0.717834\t0.605564\t0.781421\t0.761053\t0.834575\t0.707899\t0.728266\t0.642822\t0.77844\t0.702434\t0.835072\t0.700944\t0.945852\t0.981123\t0.835072\t0.700447\t0.763537\t0.741679\t0.710383\t0.74466\t0.836066\t0.93542\t0.798311\t0.619473\t0.846001\t0.698957\t0.757079\t0.721808\t0.720815\t0.835569\t0.773472\t0.71237\t0.683557\t0.742673\t0.707899\t0.690512\t0.719324\t0.769498\t0.61699\t0.697466\t0.634873\t0.85544\t0.759066\t0.690015\t0.963239\t0.767014\t0.849478\t0.757079\t0.798311\t0.660705\t0.837556\t0.750124\t0.663686\t0.672628\t0.745653\t0.764034\t0.753105\t0.835072\t0.760556\t0.79235\t0.674118\t0.687531\t0.711873\t0.691008\t0.627919\t0.835072\t0.670144\t0.746647\t0.72926\t0.77993\t0.938897\t0.709886\t0.981123\t0.592648\t0.641828\t0.675112\t0.773472\t1\t0.812221\n+IR60080-46A\t0.899155\t0.879781\t0.901143\t0.753105\t0.867362\t0.807253\t0.869349\t0.878291\t0.845007\t0.808743\t0.738202\t0.861898\t0.787879\t0.817188\t0.84004\t0.617486\t0.924491\t0.868356\t0.946846\t0.702931\t0.799801\t0.769001\t0.898659\t0.818679\t0.934426\t0.704918\t0.801788\t0.82613\t0.946349\t0.722305\t0.799305\t0.812221\t0.714357\t0.837059\t0.919523\t0.797317\t0.925484\t0.702931\t0.846001\t0.725782\t0.861401\t0.84302\t0.742673\t0.947839\t0.865872\t0.711376\t0.719324\t0.835072\t0.725782\t0.741182\t0.80775\t0.858917\t0.721311\t0.762047\t0.672628\t0.882265\t0.880278\t0.680079\t0.806259\t0.942871\t0.891207\t0.923\t0.903626\t0.743169\t0.832588\t0.813711\t0.674615\t0.66468\t0.842027\t0.871336\t0.87233\t0.947342\t0.812221\t0.871833\t0.790363\t0.718331\t0.743666\t0.798311\t0.722305\t0.947342\t0.79533\t0.910581\t0.837556\t0.901143\t0.785892\t0.726776\t0.828117\t0.603577\t0.739195\t0.791356\t0.781421\t0.812221\t1\n' |
b |
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/test-data/analyse.log --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MDSplot/test-data/analyse.log Mon Mar 23 05:57:27 2015 -0400 |
[ |
@@ -0,0 +1,56 @@ + +@----------------------------------------------------------@ +| PLINK! | v1.07 | 10/Aug/2009 | +|----------------------------------------------------------| +| (C) 2009 Shaun Purcell, GNU General Public License, v2 | +|----------------------------------------------------------| +| For documentation, citation & bug-report instructions: | +| http://pngu.mgh.harvard.edu/purcell/plink/ | +@----------------------------------------------------------@ + +Skipping web check... [ --noweb ] +Writing this text to log file [ analyse.log ] +Analysis started: Tue Jan 20 09:30:56 2015 + +Options in effect: + --file input + --noweb + --cluster + --matrix + --mds-plot 2 + --out analyse + +2013 (of 2013) markers to be included from [ input.map ] +93 individuals read from [ input.ped ] +93 individuals with nonmissing phenotypes +Assuming a disease phenotype (1=unaff, 2=aff, 0=miss) +Missing phenotype value is also -9 +0 cases, 93 controls and 0 missing +93 males, 0 females, and 0 of unspecified sex +Before frequency and genotyping pruning, there are 2013 SNPs +Converting data to SNP-major format +93 founders and 0 non-founders found +Total genotyping rate in remaining individuals is 1 +0 SNPs failed missingness test ( GENO > 1 ) +0 SNPs failed frequency test ( MAF < 0 ) +Converting data to Individual-major format +After frequency and genotyping pruning, there are 2013 SNPs +After filtering, 0 cases, 93 controls and 0 missing +After filtering, 93 males, 0 females, and 0 of unspecified sex + + **Warning** this analysis typically requires whole-genome level data + to give accurate results + +Clustering individuals based on genome-wide IBS +Merge distance p-value constraint = 0 +IBS(g) calculation: 0 of 4278 IBS(g) calculation: 100 of 4278 IBS(g) calculation: 200 of 4278 IBS(g) calculation: 300 of 4278 IBS(g) calculation: 400 of 4278 IBS(g) calculation: 500 of 4278 IBS(g) calculation: 600 of 4278 IBS(g) calculation: 700 of 4278 IBS(g) calculation: 800 of 4278 IBS(g) calculation: 900 of 4278 IBS(g) calculation: 1000 of 4278 IBS(g) calculation: 1100 of 4278 IBS(g) calculation: 1200 of 4278 IBS(g) calculation: 1300 of 4278 IBS(g) calculation: 1400 of 4278 IBS(g) calculation: 1500 of 4278 IBS(g) calculation: 1600 of 4278 IBS(g) calculation: 1700 of 4278 IBS(g) calculation: 1800 of 4278 IBS(g) calculation: 1900 of 4278 IBS(g) calculation: 2000 of 4278 IBS(g) calculation: 2100 of 4278 IBS(g) calculation: 2200 of 4278 IBS(g) calculation: 2300 of 4278 IBS(g) calculation: 2400 of 4278 IBS(g) calculation: 2500 of 4278 IBS(g) calculation: 2600 of 4278 IBS(g) calculation: 2700 of 4278 IBS(g) calculation: 2800 of 4278 IBS(g) calculation: 2900 of 4278 IBS(g) calculation: 3000 of 4278 IBS(g) calculation: 3100 of 4278 IBS(g) calculation: 3200 of 4278 IBS(g) calculation: 3300 of 4278 IBS(g) calculation: 3400 of 4278 IBS(g) calculation: 3500 of 4278 IBS(g) calculation: 3600 of 4278 IBS(g) calculation: 3700 of 4278 IBS(g) calculation: 3800 of 4278 IBS(g) calculation: 3900 of 4278 IBS(g) calculation: 4000 of 4278 IBS(g) calculation: 4100 of 4278 IBS(g) calculation: 4200 of 4278 Writing IBS similarity matrix to [ analyse.mibs ] +Of these, 4278 are pairable based on constraints +Writing cluster progress to [ analyse.cluster0 ] +Writing cluster solution (1) [ analyse.cluster1 ] +Writing cluster solution (2) [ analyse.cluster2 ] +Writing cluster solution (3) [ analyse.cluster3 ] +Writing MDS solution to [ analyse.mds ] +MDS plot of individuals (not clusters) + +Analysis finished: Tue Jan 20 09:30:57 2015 + |
b |
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/test-data/analyse.mds_plot.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MDSplot/test-data/analyse.mds_plot.txt Mon Mar 23 05:57:27 2015 -0400 |
b |
@@ -0,0 +1,93 @@ +IRAT112 -0.0969382 0.0376036 +IAC25 -0.0918126 0.0501177 +IAC165 -0.104815 0.0453876 +KARASUKARASURANKASU 0.107782 -0.0683546 +DOURADOPRECOCE -0.0831697 0.0516412 +CUIABANA 0.0327877 0.0497752 +BICOBRANCO -0.0554177 0.0501857 +CAAWA/FORTUNA6 -0.052355 0.0170584 +CANELADEFERRO -0.0142283 0.0598394 +CIRAD358 0.0497841 0.0645487 +COLOMBIA1 -0.110347 -0.172742 +CIRAD409 -0.113614 -0.105898 +BAGANANASALAO 0.0247582 0.0186246 +BAKUNGH 0.0199022 0.0613692 +FOHISOMOTRA 0.0344309 0.0697592 +KAKANI2 0.147713 -0.0780455 +GUARANI -0.0892761 0.0387007 +DOURADOAGULHA -0.0831373 0.0514523 +IRAT13 -0.0757215 0.00312413 +DAWASANRED 0.131309 -0.102364 +GRAZI 0.0177091 0.0870472 +IRAT144 0.0146443 0.0067734 +IAC47 -0.0968934 0.0477667 +MOROBEREKAN 0.0395544 0.0952695 +IRAT362 -0.0921075 -0.00860852 +GIZA171 0.14498 -0.0932001 +IDSA77 -0.122827 -0.120219 +IRAT216 -0.120205 -0.136803 +IRAT177 -0.0762395 0.00230332 +CHAPHUMA 0.149543 -0.0680637 +CIRAD392 0.0187265 -0.047646 +ARROZCEBADA 0.0110416 0.0787137 +INDANE 0.108691 -0.0338065 +DINORADO -0.00819237 0.0386578 +63-104 -0.053974 -0.00204494 +IR63380-16 -0.0968028 -0.0931822 +CANAROXA -0.0756689 0.0253328 +CICIHBETON 0.108857 0.142633 +IR68704-145-1-1-B -0.103292 -0.071382 +HAWMOM 0.129651 -0.0473323 +IRAT364 -0.0118523 0.0472973 +IRAT212 0.0231616 -0.00146178 +KHAODAM 0.123936 -0.0654516 +62667 -0.0752552 0.00278556 +IRAT234 -0.0346607 0.0551203 +NIPPONBARE_D 0.133199 -0.0963396 +JUMULA2 0.104558 -0.0524994 +CUBA65 -0.0124835 0.013679 +BABER 0.090299 -0.0460696 +CIRAD488 -0.0201481 -0.0262103 +BENGALYVAKARINA 0.028631 0.0637662 +ESPERANZA -0.108888 -0.00812147 +BULUPANDAK 0.119762 0.143912 +ARIAS 0.0327948 0.0827878 +GOMPA2 0.133341 -0.0543414 +CT13582-15-5-M -0.0922561 -0.0543988 +IRAT335 -0.0886849 0.0522465 +M202 0.13463 -0.106462 +IR65907-188-1-B -0.0755893 -0.136821 +IR71525-19-1-1 -0.0353444 0.0478985 +CNA-7_BO_1_1_33-13-6-1 -0.0823478 -0.0435581 +IRAT257 -0.0606191 0.0564989 +IRAT109 -0.1006 -0.00959445 +KANIRANGA 0.0705459 0.129888 +IR66421-096-2-1-1 -0.00859728 -0.0570793 +GOGO 0.0232414 0.0351889 +GEMJYAJYANAM 0.170032 -0.0857315 +DANGREY 0.175792 -0.113683 +BINULAWAN -0.0208225 0.0611919 +CAIAPO -0.0149329 -0.0122987 +IGUAPECATETO -0.0740464 0.0552386 +IRAT170 -0.0756928 0.00297643 +IRAT380 -0.0210318 -0.00092536 +ARAGUAIA -0.123443 0.0273298 +GOGOLEMPAK 0.0598291 0.107462 +JAOHAW 0.17563 -0.0587865 +CHALOYOE 0.0904761 -0.0565325 +GANIGI 0.0526286 0.0960475 +GUNDILKUNING 0.116788 0.130234 +IRAT2 -0.0762849 0.00262428 +IRAT366 0.0273652 0.13551 +IRAT104 -0.00310702 0.0636479 +DAVAO -0.00538403 0.0564886 +HD1-4 -0.0960109 0.0459137 +CURINCA -0.128876 -0.121141 +DAM 0.129029 -0.0660183 +CIRAD403 -0.117849 -0.134651 +JUMALI 0.123992 -0.0843623 +JIMBRUKJOLOWORO 0.0549906 0.135017 +GOGOLEMPUK 0.0597267 0.10733 +CIRAD394 0.0040454 -0.0885135 +IR63372-08 -0.0951857 -0.140804 +IR60080-46A -0.0732581 0.0197832 |
b |
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/test-data/input.map --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MDSplot/test-data/input.map Mon Mar 23 05:57:27 2015 -0400 |
b |
b'@@ -0,0 +1,2013 @@\n+Chr1 Chr1:4299 0 4299\n+Chr1 Chr1:26710 0 26710\n+Chr1 Chr1:56184 0 56184\n+Chr1 Chr1:93272 0 93272\n+Chr1 Chr1:93274 0 93274\n+Chr1 Chr1:146252 0 146252\n+Chr1 Chr1:171433 0 171433\n+Chr1 Chr1:186286 0 186286\n+Chr1 Chr1:198907 0 198907\n+Chr1 Chr1:205550 0 205550\n+Chr1 Chr1:206108 0 206108\n+Chr1 Chr1:214680 0 214680\n+Chr1 Chr1:214775 0 214775\n+Chr1 Chr1:215711 0 215711\n+Chr1 Chr1:217307 0 217307\n+Chr1 Chr1:238821 0 238821\n+Chr1 Chr1:240448 0 240448\n+Chr1 Chr1:240579 0 240579\n+Chr1 Chr1:300528 0 300528\n+Chr1 Chr1:312532 0 312532\n+Chr1 Chr1:337330 0 337330\n+Chr1 Chr1:351841 0 351841\n+Chr1 Chr1:353617 0 353617\n+Chr1 Chr1:405193 0 405193\n+Chr1 Chr1:405217 0 405217\n+Chr1 Chr1:409167 0 409167\n+Chr1 Chr1:415515 0 415515\n+Chr1 Chr1:424861 0 424861\n+Chr1 Chr1:425116 0 425116\n+Chr1 Chr1:429660 0 429660\n+Chr1 Chr1:439393 0 439393\n+Chr1 Chr1:456979 0 456979\n+Chr1 Chr1:479030 0 479030\n+Chr1 Chr1:479033 0 479033\n+Chr1 Chr1:525146 0 525146\n+Chr1 Chr1:535685 0 535685\n+Chr1 Chr1:536093 0 536093\n+Chr1 Chr1:572628 0 572628\n+Chr1 Chr1:577906 0 577906\n+Chr1 Chr1:630114 0 630114\n+Chr1 Chr1:653644 0 653644\n+Chr1 Chr1:660019 0 660019\n+Chr1 Chr1:660070 0 660070\n+Chr1 Chr1:715091 0 715091\n+Chr1 Chr1:715130 0 715130\n+Chr1 Chr1:725844 0 725844\n+Chr1 Chr1:735420 0 735420\n+Chr1 Chr1:735757 0 735757\n+Chr1 Chr1:735810 0 735810\n+Chr1 Chr1:746855 0 746855\n+Chr1 Chr1:753267 0 753267\n+Chr1 Chr1:754061 0 754061\n+Chr1 Chr1:769014 0 769014\n+Chr1 Chr1:782878 0 782878\n+Chr1 Chr1:787761 0 787761\n+Chr1 Chr1:789186 0 789186\n+Chr1 Chr1:792359 0 792359\n+Chr1 Chr1:805716 0 805716\n+Chr1 Chr1:841793 0 841793\n+Chr1 Chr1:845425 0 845425\n+Chr1 Chr1:847587 0 847587\n+Chr1 Chr1:853188 0 853188\n+Chr1 Chr1:853218 0 853218\n+Chr1 Chr1:854818 0 854818\n+Chr1 Chr1:855663 0 855663\n+Chr1 Chr1:859613 0 859613\n+Chr1 Chr1:859635 0 859635\n+Chr1 Chr1:917609 0 917609\n+Chr1 Chr1:917933 0 917933\n+Chr1 Chr1:925316 0 925316\n+Chr1 Chr1:930060 0 930060\n+Chr1 Chr1:930069 0 930069\n+Chr1 Chr1:937007 0 937007\n+Chr1 Chr1:937014 0 937014\n+Chr1 Chr1:938444 0 938444\n+Chr1 Chr1:938619 0 938619\n+Chr1 Chr1:961472 0 961472\n+Chr1 Chr1:961537 0 961537\n+Chr1 Chr1:963122 0 963122\n+Chr1 Chr1:963313 0 963313\n+Chr1 Chr1:973080 0 973080\n+Chr1 Chr1:986764 0 986764\n+Chr1 Chr1:1004926 0 1004926\n+Chr1 Chr1:1009979 0 1009979\n+Chr1 Chr1:1011056 0 1011056\n+Chr1 Chr1:1011714 0 1011714\n+Chr1 Chr1:1012017 0 1012017\n+Chr1 Chr1:1012026 0 1012026\n+Chr1 Chr1:1012111 0 1012111\n+Chr1 Chr1:1026043 0 1026043\n+Chr1 Chr1:1083377 0 1083377\n+Chr1 Chr1:1095742 0 1095742\n+Chr1 Chr1:1124378 0 1124378\n+Chr1 Chr1:1125022 0 1125022\n+Chr1 Chr1:1167717 0 1167717\n+Chr1 Chr1:1172746 0 1172746\n+Chr1 Chr1:1173064 0 1173064\n+Chr1 Chr1:1180585 0 1180585\n+Chr1 Chr1:1182388 0 1182388\n+Chr1 Chr1:1184650 0 1184650\n+Chr1 Chr1:1188838 0 1188838\n+Chr1 Chr1:1197390 0 1197390\n+Chr1 Chr1:1197550 0 1197550\n+Chr1 Chr1:1197663 0 1197663\n+Chr1 Chr1:1208561 0 1208561\n+Chr1 Chr1:1229816 0 1229816\n+Chr1 Chr1:1250047 0 1250047\n+Chr1 Chr1:1286631 0 1286631\n+Chr1 Chr1:1303497 0 1303497\n+Chr1 Chr1:1306055 0 1306055\n+Chr1 Chr1:1306058 0 1306058\n+Chr1 Chr1:1306085 0 1306085\n+Chr1 Chr1:1318689 0 1318689\n+Chr1 Chr1:1358972 0 1358972\n+Chr1 Chr1:1359766 0 1359766\n+Chr1 Chr1:1359769 0 1359769\n+Chr1 Chr1:1365854 0 1365854\n+Chr1 Chr1:1404921 0 1404921\n+Chr1 Chr1:1407386 0 1407386\n+Chr1 Chr1:1443131 0 1443131\n+Chr1 Chr1:1446645 0 1446645\n+Chr1 Chr1:1486728 0 1486728\n+Chr1 Chr1:1492913 0 1492913\n+Chr1 Chr1:1496524 0 1496524\n+Chr1 Chr1:1509728 0 1509728\n+Chr1 Chr1:1515087 0 1515087\n+Chr1 Chr1:1546579 0 1546579\n+Chr1 Chr1:1551208 0 1551208\n+Chr1 Chr1:1563026 0 1563026\n+Chr1 Chr1:1563029 0 1563029\n+Chr1 Chr1:1565548 0 1565548\n+Chr1 Chr1:1598133 0 1598133\n+Chr1 Chr1:1627278 0 1627278\n+Chr1 Chr1:1627327 0 1627327\n+Chr1 Chr1:1632085 0 1632085\n+Chr1 Chr1:1655772 0 1655772\n+Chr1 Chr1:1655775 0 1655775\n+Chr1 Chr1:1671697 0 1671697\n+Chr1 Chr1:1685316 0 1685316\n+Chr1 Chr1:1687456 0 1687456\n+Chr1 Chr1:1703238 0 1703238\n+Chr1 Chr1:1703264 0 1703264\n+Chr1 Chr1:1722207 0 1722207\n+Chr1 Chr'..b'+Chr1 Chr1:41945638 0 41945638\n+Chr1 Chr1:41954934 0 41954934\n+Chr1 Chr1:41958052 0 41958052\n+Chr1 Chr1:41961218 0 41961218\n+Chr1 Chr1:41961221 0 41961221\n+Chr1 Chr1:41965592 0 41965592\n+Chr1 Chr1:41965659 0 41965659\n+Chr1 Chr1:41968928 0 41968928\n+Chr1 Chr1:41968936 0 41968936\n+Chr1 Chr1:41970211 0 41970211\n+Chr1 Chr1:41998459 0 41998459\n+Chr1 Chr1:42001355 0 42001355\n+Chr1 Chr1:42047135 0 42047135\n+Chr1 Chr1:42047157 0 42047157\n+Chr1 Chr1:42061862 0 42061862\n+Chr1 Chr1:42063149 0 42063149\n+Chr1 Chr1:42081350 0 42081350\n+Chr1 Chr1:42081405 0 42081405\n+Chr1 Chr1:42088625 0 42088625\n+Chr1 Chr1:42089983 0 42089983\n+Chr1 Chr1:42097607 0 42097607\n+Chr1 Chr1:42106144 0 42106144\n+Chr1 Chr1:42138453 0 42138453\n+Chr1 Chr1:42150639 0 42150639\n+Chr1 Chr1:42155417 0 42155417\n+Chr1 Chr1:42156013 0 42156013\n+Chr1 Chr1:42164107 0 42164107\n+Chr1 Chr1:42165734 0 42165734\n+Chr1 Chr1:42169797 0 42169797\n+Chr1 Chr1:42169819 0 42169819\n+Chr1 Chr1:42187340 0 42187340\n+Chr1 Chr1:42192288 0 42192288\n+Chr1 Chr1:42196386 0 42196386\n+Chr1 Chr1:42227135 0 42227135\n+Chr1 Chr1:42229251 0 42229251\n+Chr1 Chr1:42231379 0 42231379\n+Chr1 Chr1:42242069 0 42242069\n+Chr1 Chr1:42244104 0 42244104\n+Chr1 Chr1:42276315 0 42276315\n+Chr1 Chr1:42301791 0 42301791\n+Chr1 Chr1:42302347 0 42302347\n+Chr1 Chr1:42324275 0 42324275\n+Chr1 Chr1:42332621 0 42332621\n+Chr1 Chr1:42352040 0 42352040\n+Chr1 Chr1:42352085 0 42352085\n+Chr1 Chr1:42352093 0 42352093\n+Chr1 Chr1:42352397 0 42352397\n+Chr1 Chr1:42354188 0 42354188\n+Chr1 Chr1:42354191 0 42354191\n+Chr1 Chr1:42357534 0 42357534\n+Chr1 Chr1:42357591 0 42357591\n+Chr1 Chr1:42367404 0 42367404\n+Chr1 Chr1:42367442 0 42367442\n+Chr1 Chr1:42419046 0 42419046\n+Chr1 Chr1:42419769 0 42419769\n+Chr1 Chr1:42421769 0 42421769\n+Chr1 Chr1:42423110 0 42423110\n+Chr1 Chr1:42427556 0 42427556\n+Chr1 Chr1:42460597 0 42460597\n+Chr1 Chr1:42460615 0 42460615\n+Chr1 Chr1:42469025 0 42469025\n+Chr1 Chr1:42471750 0 42471750\n+Chr1 Chr1:42479925 0 42479925\n+Chr1 Chr1:42487487 0 42487487\n+Chr1 Chr1:42487514 0 42487514\n+Chr1 Chr1:42548763 0 42548763\n+Chr1 Chr1:42583428 0 42583428\n+Chr1 Chr1:42586452 0 42586452\n+Chr1 Chr1:42594255 0 42594255\n+Chr1 Chr1:42596478 0 42596478\n+Chr1 Chr1:42602822 0 42602822\n+Chr1 Chr1:42602885 0 42602885\n+Chr1 Chr1:42610308 0 42610308\n+Chr1 Chr1:42612885 0 42612885\n+Chr1 Chr1:42616231 0 42616231\n+Chr1 Chr1:42620187 0 42620187\n+Chr1 Chr1:42620190 0 42620190\n+Chr1 Chr1:42620878 0 42620878\n+Chr1 Chr1:42620881 0 42620881\n+Chr1 Chr1:42631741 0 42631741\n+Chr1 Chr1:42634538 0 42634538\n+Chr1 Chr1:42664013 0 42664013\n+Chr1 Chr1:42665624 0 42665624\n+Chr1 Chr1:42666354 0 42666354\n+Chr1 Chr1:42669650 0 42669650\n+Chr1 Chr1:42669653 0 42669653\n+Chr1 Chr1:42673304 0 42673304\n+Chr1 Chr1:42674707 0 42674707\n+Chr1 Chr1:42682058 0 42682058\n+Chr1 Chr1:42682061 0 42682061\n+Chr1 Chr1:42690508 0 42690508\n+Chr1 Chr1:42706257 0 42706257\n+Chr1 Chr1:42711112 0 42711112\n+Chr1 Chr1:42762398 0 42762398\n+Chr1 Chr1:42763941 0 42763941\n+Chr1 Chr1:42763944 0 42763944\n+Chr1 Chr1:42766241 0 42766241\n+Chr1 Chr1:42777810 0 42777810\n+Chr1 Chr1:42780446 0 42780446\n+Chr1 Chr1:42825046 0 42825046\n+Chr1 Chr1:42826857 0 42826857\n+Chr1 Chr1:42880271 0 42880271\n+Chr1 Chr1:42916070 0 42916070\n+Chr1 Chr1:42916090 0 42916090\n+Chr1 Chr1:42920553 0 42920553\n+Chr1 Chr1:42928342 0 42928342\n+Chr1 Chr1:42959997 0 42959997\n+Chr1 Chr1:42968423 0 42968423\n+Chr1 Chr1:43046967 0 43046967\n+Chr1 Chr1:43048104 0 43048104\n+Chr1 Chr1:43065469 0 43065469\n+Chr1 Chr1:43068624 0 43068624\n+Chr1 Chr1:43072051 0 43072051\n+Chr1 Chr1:43073361 0 43073361\n+Chr1 Chr1:43079457 0 43079457\n+Chr1 Chr1:43079480 0 43079480\n+Chr1 Chr1:43093204 0 43093204\n+Chr1 Chr1:43100601 0 43100601\n+Chr1 Chr1:43123958 0 43123958\n+Chr1 Chr1:43132577 0 43132577\n+Chr1 Chr1:43141118 0 43141118\n+Chr1 Chr1:43141179 0 43141179\n+Chr1 Chr1:43154143 0 43154143\n+Chr1 Chr1:43158899 0 43158899\n+Chr1 Chr1:43179527 0 43179527\n+Chr1 Chr1:43214669 0 43214669\n+Chr1 Chr1:43229591 0 43229591\n+Chr1 Chr1:43249859 0 43249859\n+Chr1 Chr1:43269458 0 43269458\n' |
b |
diff -r 000000000000 -r 3e19d0dfcf3e MDSplot/test-data/input.ped --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MDSplot/test-data/input.ped Mon Mar 23 05:57:27 2015 -0400 |
b |
b'@@ -0,0 +1,93 @@\n+IRAT112\t1\t0\t0\t1\t1\t1 1\t4 4\t2 2\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t3 3\t4 4\t2 2\t1 1\t3 3\t1 1\t1 1\t4 4\t1 1\t1 1\t2 2\t1 1\t4 4\t1 1\t1 1\t4 4\t2 2\t4 4\t1 1\t1 1\t4 4\t3 3\t1 1\t1 1\t1 1\t1 1\t4 4\t2 2\t1 1\t4 4\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t3 3\t4 4\t4 4\t2 2\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t2 2\t4 4\t2 2\t1 1\t1 1\t3 3\t3 3\t4 4\t4 4\t3 3\t3 3\t2 2\t1 1\t2 2\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t4 4\t3 3\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t3 3\t4 4\t4 4\t3 3\t4 4\t1 1\t3 3\t3 3\t3 3\t1 1\t2 2\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t3 3\t2 2\t4 4\t4 4\t1 1\t3 3\t2 2\t3 3\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t3 3\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t1 1\t2 2\t4 4\t4 4\t1 1\t4 4\t2 2\t3 3\t3 3\t1 1\t3 3\t4 4\t1 1\t3 3\t4 4\t1 1\t4 4\t2 2\t2 2\t1 1\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t2 2\t2 2\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t3 3\t3 3\t1 1\t4 4\t1 1\t2 2\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t3 3\t4 4\t2 2\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t2 2\t1 1\t2 2\t4 4\t2 2\t3 3\t4 4\t1 1\t4 4\t2 2\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t3 3\t4 4\t4 4\t1 1\t2 2\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t4 4\t3 3\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t2 2\t4 4\t1 1\t3 3\t2 2\t2 2\t3 3\t3 3\t1 1\t2 2\t1 1\t3 3\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t2 2\t2 2\t1 1\t4 4\t4 4\t1 1\t2 2\t4 4\t1 1\t4 4\t1 1\t2 2\t1 1\t2 2\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t3 3\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t3 3\t1 1\t4 4\t3 3\t2 2\t1 1\t4 4\t4 4\t3 3\t1 1\t2 2\t2 2\t1 1\t3 3\t4 4\t1 1\t1 1\t3 3\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t2 2\t4 4\t4 4\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t2 2\t4 4\t2 2\t2 2\t3 3\t4 4\t3 3\t2 2\t1 1\t1 1\t2 2\t1 1\t4 4\t3 3\t2 2\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t2 2\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t3 3\t1 1\t2 2\t1 1\t1 1\t2 2\t1 1\t1 1\t2 2\t1 1\t2 2\t3 3\t4 4\t1 1\t2 2\t1 1\t2 2\t4 4\t4 4\t1 1\t1 1\t4 4\t3 3\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t3 3\t4 4\t3 3\t4 4\t3 3\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t3 3\t4 4\t1 1\t4 4\t3 3\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t3 3\t4 4\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t2 2\t4 4\t2 2\t1 1\t3 3\t1 1\t4 4\t1 1\t2 2\t1 1\t3 3\t2 2\t4 4\t4 4\t1 1\t4 4\t3 3\t4 4\t4 4\t4 4\t1 1\t3 3\t4 4\t4 4\t4 4\t3 3\t4 4\t4 4\t1 1\t2 2\t1 1\t4 4\t4 4\t2 2\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t3 3\t2 2\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t3 3\t2 2\t1 1\t3 3\t4 4\t2 2\t2 2\t4 4\t2 2\t2 2\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t3 3\t4 4\t1 1\t1 1\t1 1\t3 3\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t3 3\t1 1\t4 4\t3 3\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t2 2\t1 1\t4 4\t3 3\t4 4\t1 1\t3 3\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t3 3\t1 1\t4 4\t1 1\t2 2\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t2 2\t4 4\t1 1\t3 3\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t2 2\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t3 3\t1 1\t1 1\t1 1\t4 4\t1 1\t3 3\t2 2\t2 2\t2 2\t3 3\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t3 3\t4 4\t3 3\t2 2\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t3 3\t4 4\t4 4\t3 3\t4 4\t3 3\t3 3\t3 3\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t4 4\t1 1\t4 4\t3 3\t3 3\t3 3\t4 4\t3 3\t4 4\t4 4\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t3 3\t1 1\t3 3\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t4 4\t4 4\t2 2\t1 1\t4 4\t3 3\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t1 1\t3 3\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t2 2\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t4 4\t4 4\t4 4\t1 1\t3 3\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t3 3\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t3 3\t1 1\t1 1\t4 4\t3 3\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t2 2\t1 1\t4 4\t4 4\t4 4\t3 3\t1 1\t2 2\t1 1\t1 1\t4 4\t4 4\t2 2\t1 1\t1 1\t2 2\t4 4\t1 1\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t3 3\t3 3\t2 2\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t3 3\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t2 2\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t2 2\t4 4\t1 1\t3 3\t3 3\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t2 2\t3 3\t1 1\t3 3\t1 1\t4 4\t4 4\t2 2\t4 4\t4 4\t1 1\t3 3\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t3 3'..b' 1\t4 4\t1 1\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t3 3\t1 1\t3 3\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t3 3\t2 2\t4 4\t1 1\t4 4\t1 1\t4 4\t2 2\t3 3\t4 4\t1 1\t4 4\t1 1\t4 4\t2 2\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t4 4\t2 2\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t4 4\t3 3\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t1 1\t1 1\t3 3\t2 2\t4 4\t4 4\t4 4\t4 4\t1 1\t4 4\t2 2\t1 1\t4 4\t1 1\t2 2\t4 4\t1 1\t3 3\t1 1\t1 1\t3 3\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t3 3\t3 3\t2 2\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t4 4\t4 4\t1 1\t4 4\t4 4\t3 3\t1 1\t3 3\t4 4\t3 3\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t2 2\t4 4\t2 2\t2 2\t1 1\t4 4\t4 4\t2 2\t4 4\t4 4\t4 4\t3 3\t1 1\t3 3\t3 3\t3 3\t4 4\t4 4\t2 2\t1 1\t2 2\t4 4\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t1 1\t2 2\t1 1\t3 3\t1 1\t4 4\t1 1\t4 4\t1 1\t2 2\t3 3\t4 4\t1 1\t3 3\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t2 2\t1 1\t3 3\t3 3\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t2 2\t4 4\t2 2\t4 4\t1 1\t2 2\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t3 3\t1 1\t4 4\t4 4\t3 3\t3 3\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t3 3\t3 3\t2 2\t1 1\t4 4\t3 3\t4 4\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t2 2\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t3 3\t4 4\t4 4\t1 1\t3 3\t2 2\t2 2\t1 1\t1 1\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t2 2\t1 1\t3 3\t1 1\t1 1\t4 4\t4 4\t2 2\t4 4\t3 3\t3 3\t1 1\t2 2\t2 2\t4 4\t2 2\t3 3\t1 1\t1 1\t3 3\t3 3\t1 1\t4 4\t1 1\t1 1\t3 3\t4 4\t2 2\t4 4\t4 4\t3 3\t2 2\t3 3\t3 3\t4 4\t4 4\t2 2\t2 2\t4 4\t2 2\t4 4\t4 4\t4 4\t2 2\t4 4\t4 4\t1 1\t4 4\t4 4\t3 3\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t3 3\t1 1\t1 1\t3 3\t1 1\t4 4\t3 3\t3 3\t1 1\t4 4\t1 1\t3 3\t3 3\t3 3\t1 1\t4 4\t4 4\t1 1\t4 4\t3 3\t4 4\t4 4\t3 3\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t3 3\t4 4\t4 4\t1 1\t3 3\t4 4\t1 1\t1 1\t2 2\t4 4\t3 3\t2 2\t3 3\t1 1\t1 1\t4 4\t4 4\t4 4\t3 3\t2 2\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t2 2\t1 1\t1 1\t4 4\t1 1\t3 3\t1 1\t3 3\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t4 4\t4 4\t1 1\t4 4\t3 3\t1 1\t3 3\t3 3\t1 1\t3 3\t4 4\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\t4 4\t4 4\t4 4\t2 2\t4 4\t2 2\t4 4\t2 2\t1 1\t3 3\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t4 4\t4 4\t2 2\t1 1\t2 2\t4 4\t1 1\t1 1\t1 1\t3 3\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t2 2\t3 3\t4 4\t1 1\t4 4\t3 3\t3 3\t3 3\t1 1\t4 4\t3 3\t3 3\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t2 2\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t2 2\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t4 4\t2 2\t4 4\t4 4\t3 3\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\t1 1\t2 2\t1 1\t4 4\t4 4\t4 4\t1 1\t2 2\t4 4\t1 1\t1 1\t3 3\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t2 2\t4 4\t4 4\t3 3\t1 1\t1 1\t4 4\t2 2\t1 1\t3 3\t1 1\t1 1\t4 4\t4 4\t3 3\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t2 2\t3 3\t4 4\t2 2\t4 4\t4 4\t2 2\t1 1\t2 2\t1 1\t2 2\t1 1\t1 1\t4 4\t1 1\t1 1\t4 4\t3 3\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t3 3\t4 4\t2 2\t1 1\t2 2\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t4 4\t2 2\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t2 2\t3 3\t1 1\t1 1\t4 4\t4 4\t1 1\t2 2\t4 4\t1 1\t1 1\t2 2\t1 1\t4 4\t4 4\t4 4\t4 4\t3 3\t3 3\t4 4\t4 4\t2 2\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t3 3\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t3 3\t4 4\t3 3\t4 4\t4 4\t1 1\t3 3\t4 4\t1 1\t4 4\t1 1\t1 1\t4 4\t4 4\t4 4\t2 2\t4 4\t3 3\t2 2\t4 4\t4 4\t4 4\t4 4\t4 4\t4 4\t4 4\t1 1\t4 4\t3 3\t1 1\t4 4\t1 1\t2 2\t3 3\t2 2\t4 4\t1 1\t1 1\t4 4\t3 3\t2 2\t4 4\t2 2\t1 1\t3 3\t4 4\t1 1\t1 1\t1 1\t3 3\t1 1\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t3 3\t4 4\t2 2\t4 4\t1 1\t1 1\t4 4\t2 2\t4 4\t4 4\t4 4\t3 3\t1 1\t1 1\t3 3\t1 1\t2 2\t1 1\t3 3\t2 2\t1 1\t2 2\t1 1\t1 1\t1 1\t2 2\t1 1\t4 4\t3 3\t1 1\t3 3\t1 1\t1 1\t3 3\t4 4\t1 1\t3 3\t4 4\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t1 1\t2 2\t3 3\t3 3\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t4 4\t2 2\t1 1\t4 4\t2 2\t2 2\t4 4\t4 4\t1 1\t2 2\t4 4\t3 3\t4 4\t4 4\t4 4\t1 1\t3 3\t4 4\t2 2\t1 1\t4 4\t3 3\t1 1\t4 4\t2 2\t1 1\t4 4\t3 3\t1 1\t1 1\t4 4\t2 2\t4 4\t4 4\t4 4\t1 1\t4 4\t3 3\t4 4\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t1 1\t1 1\t1 1\t3 3\t4 4\t1 1\t1 1\t4 4\t1 1\t4 4\t3 3\t1 1\t1 1\t4 4\t3 3\t1 1\t4 4\t2 2\t4 4\t1 1\t2 2\t1 1\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t1 1\t2 2\t4 4\t3 3\t1 1\t2 2\t1 1\t1 1\t1 1\t4 4\t4 4\t4 4\t1 1\t4 4\t1 1\t3 3\t1 1\t2 2\t3 3\t2 2\t3 3\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t3 3\t4 4\t2 2\t1 1\t1 1\t4 4\t4 4\t4 4\t2 2\t4 4\t4 4\t4 4\t4 4\t2 2\t1 1\t3 3\t4 4\t4 4\t4 4\t1 1\t1 1\t3 3\t1 1\t3 3\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t3 3\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t1 1\t4 4\t1 1\t1 1\t1 1\t4 4\t1 1\t4 4\t4 4\t4 4\t2 2\t3 3\t3 3\t1 1\t1 1\t2 2\t4 4\t1 1\t1 1\t2 2\t4 4\t3 3\t2 2\t4 4\t4 4\t1 1\t4 4\t4 4\t4 4\t1 1\t1 1\t4 4\n' |
b |
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolFilter/VCFToolsFilter.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolFilter/VCFToolsFilter.pl Mon Mar 23 05:57:27 2015 -0400 |
[ |
@@ -0,0 +1,214 @@ + +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; +use Bio::SeqIO; + +my $usage = qq~Usage:$0 <args> [<opts>] + +where <args> are: + + -i, --input <VCF input> + -o, --out <Output basename> + + <opts> are: + + -s, --samples <Samples to be analyzed. Comma separated list> + -c, --chromosomes <Chromosomes to be analyzed. Comma separated list> + -e, --export <Output format (VCF/freq/plink. Default: VCF> + -f, --frequency <Minimum MAF. Default: 0.001> + -m, --max_freq <Maximum MAF. Default: 0.5> + -a, --allow_missing <Allowed missing data proportion per site. Must be comprised between 0 and 1. Default: 0> + -n, --nb_alleles <Accepted number of alleles (min,max). Default: 2,4> + -t, --type <Type of polymorphisms to keep (ALL/SNP/INDEL). Default: ALL> + -b, --bounds <Lower bound and upper bound for a range of sites to be processed (start,end). Default: 1, 100000000> +~; +$usage .= "\n"; + +my ($input,$out); + + +#my $indel_size_max = 500; +#my $indel_size_min = 1; +my $frequency_max = 0.5; +my $frequency_min = 0.001; +my $pos_max = 100000000000; +my $pos_min = 0; +my $filter_snp_type = "all"; + +my $missing_data = 0; +my $export = "VCF"; +my $type = "ALL"; +my $nb_alleles; +my $bounds; +my $samples; +my $chromosomes; + +GetOptions( + "input=s" => \$input, + "out=s" => \$out, + "samples=s" => \$samples, + "chromosomes=s" => \$chromosomes, + "frequency=s" => \$frequency_min, + "max_freq=s" => \$frequency_max, + "allow_missing=s"=> \$missing_data, + "export=s" => \$export, + "type=s" => \$type, + "nb_alleles=s" => \$nb_alleles, + "bounds=s" => \$bounds, +); + + +die $usage + if ( !$input || !$out); + +if ($samples && $samples =~/^([\w\,]+)\s*$/){ + $samples = $1; +} +elsif ($samples){ + die "Error: Samples must be a comma separated list of string\n"; +} +if ($chromosomes && $chromosomes =~/^([\w\,]+)\s*$/){ + $chromosomes = $1; +} +elsif($chromosomes){ + die "Error: Chromosomes must be a comma separated list of string\n"; +} +if ($bounds && $bounds =~/^([\d\,]+)\s*$/){ + $bounds = $1; +} +elsif($bounds){ + die "Error: Bounds must be a comma separated list of integers\n"; +} + +if ($frequency_min && $frequency_min =~/^([\d\.]+)\s*$/){ + $frequency_min = $1; +} +elsif ($frequency_min){ + die "Error: frequency must be an integer\n"; +} +if ($frequency_max && $frequency_max =~/^([\d\.]+)\s*$/){ + $frequency_max = $1; +} +elsif($frequency_max){ + die "Error: frequency must be an integer\n"; +} +if ($missing_data && $missing_data =~/^([\d\.]+)\s*$/){ + $missing_data = $1; +} +elsif ($missing_data){ + die "Error: Missing data must be an integer\n"; +} +if ($nb_alleles && $nb_alleles =~/^([\d\.\,]+)\s*$/){ + $nb_alleles = $1; +} +elsif($nb_alleles){ + die "Error: Nb alleles must be two integers\n"; +} +if ($export && $export =~/^([\w]+)\s*$/){ + $export = $1; +} +elsif($export){ + die "Error: Export must be a string\n"; +} +if ($type && $type =~/^([\w]+)\s*$/){ + $type = $1; +} +elsif($type){ + die "Error: Type must be a string\n"; +} + + +my @dnasamples; +if ($samples) +{ + @dnasamples = split(",",$samples); +} +my @nalleles; +if ($nb_alleles) +{ + @nalleles = split(",",$nb_alleles); +} +my @boundaries; +if ($bounds) +{ + @boundaries = split(",",$bounds); +} +my @chromosomes_list; +if ($chromosomes) +{ + @chromosomes_list = split(",",$chromosomes); +} + + +my $experiment = "chromosomes"; +my $table = ""; +my %genes; +my @snp_ids; +my @snp_ids_and_positions; +my @snp_ids_and_positions_all; +my $gene; +my $snp_num = 0; +my %ref_sequences; +my %snps_of_gene; + + + + +my $indiv_cmd = ""; +if (@dnasamples) +{ + $indiv_cmd = "--indv " . join(" --indv ",@dnasamples); +} + +my $chrom_cmd = ""; +if (@chromosomes_list) +{ + $chrom_cmd = "--chr " . join(" --chr ",@chromosomes_list); +} + +my $export_cmd = "--recode"; +if ($export eq "freq") +{ + $export_cmd = "--freq"; +} +if ($export eq "plink") +{ + $export_cmd = "--plink"; +} + + + +my $nb_alleles_cmd = "--min-alleles 1 --max-alleles 4"; +if (@nalleles) +{ + $nb_alleles_cmd = "--min-alleles $nalleles[0] --max-alleles $nalleles[1]"; +} +my $bounds_cmd = "--from-bp 1 --to-bp 100000000"; +if (@boundaries) +{ + $bounds_cmd = "--from-bp $boundaries[0] --to-bp $boundaries[1]"; +} + + +my $type_cmd = ""; +if ($type eq "INDEL") +{ + $type_cmd = "--keep-only-indels"; +} +if ($type eq "SNP") +{ + $type_cmd = "--remove-indels"; +} + + +system("vcftools --vcf $input --out $out --keep-INFO-all --remove-filtered-all $type_cmd $export_cmd $chrom_cmd $indiv_cmd $nb_alleles_cmd --maf $frequency_min --max-maf $frequency_max --max-missing $missing_data >>vcftools.log 2>&1"); + + + + + + + |
b |
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolFilter/test-data/result.log --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolFilter/test-data/result.log Mon Mar 23 05:57:27 2015 -0400 |
b |
@@ -0,0 +1,21 @@ + +VCFtools - v0.1.12b +(C) Adam Auton and Anthony Marcketta 2009 + +Parameters as interpreted: + --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat + --chr chr1 + --recode-INFO-all + --maf 0.001 + --max-alleles 4 + --max-maf 0.5 + --min-alleles 2 + --max-missing 1 + --out filtered + --recode + --remove-filtered-all + +After filtering, kept 1 out of 1 Individuals +Outputting VCF file... +After filtering, kept 3616 out of a possible 4955 Sites +Run Time = 0.00 seconds |
b |
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolFilter/test-data/result.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolFilter/test-data/result.vcf Mon Mar 23 05:57:27 2015 -0400 |
[ |
b'@@ -0,0 +1,3661 @@\n+##fileformat=VCFv4.1\n+##FILTER=<ID=LowQual,Description="Low quality">\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AF,Number=A,Type'..b'0012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:30,25:55:99:802,0,993\n+chr1\t188173\t.\tG\tA\t697.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.617;DP=42;Dels=0.00;FS=2.786;HaplotypeScore=1.9991;MLEAC=1;MLEAF=0.500;MQ=59.09;MQ0=0;MQRankSum=-0.013;QD=16.61;ReadPosRankSum=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n+chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n+chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n+chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n+chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n+chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n+chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n+chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n+chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n+chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n' |
b |
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolFilter/test-data/sample.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolFilter/test-data/sample.vcf Mon Mar 23 05:57:27 2015 -0400 |
[ |
b'@@ -0,0 +1,5000 @@\n+##fileformat=VCFv4.1\n+##FILTER=<ID=LowQual,Description="Low quality">\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AF,Number=A,Type'..b'm=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n+chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n+chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n+chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n+chr1\t188357\t.\tC\tT\t1327.77\t.\tAC=2;AF=1.00;AN=2;DP=36;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=24.46;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t1/1:0,36:36:99:1356,105,0\n+chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n+chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n+chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n+chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n+chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n+chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n' |
b |
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolFilter/vcfToolsFilter.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolFilter/vcfToolsFilter.sh Mon Mar 23 05:57:27 2015 -0400 |
[ |
@@ -0,0 +1,46 @@ +#!/bin/bash + +tool_path=$(dirname $0) + +filein=$1 +fileout_label=$2 +fileout=$3 +filelog=$4 +export=$5 +frequency=$6 +max_freq=$7 +allow_missing=$8 +nb_alleles_min=$9 +nb_alleles_max=${10} +type=${11} +bound_start=${12} +bound_end=${13} + + +if [ "${14}" != "None" ] +then samples="--samples ${14}" +fi + +if [ "${15}" != "None" ] +then chromosomes="--chromosomes ${15}" +fi + +if [ "$bound_start" -gt "$bound_end" ] +then tmp=$bound_start ; bound_start=$bound_end ; bound_end=$tmp ; echo "Warning : Lower bound must be lower than greater bound!" >&2 +fi + +if [ "$nb_alleles_min" -gt "$nb_alleles_max" ] +then tmp=$nb_alleles_min ; nb_alleles_min=$nb_alleles_max ; nb_alleles_max=$tmp ; echo "Warning : Minimum number of alleles must be lower than maximum number of allele!" >&2 +fi + +perl $tool_path/VCFToolsFilter.pl --input $filein --out $fileout_label --export $export --frequency $frequency --max_freq $max_freq --allow_missing $allow_missing --nb_alleles $nb_alleles_min','$nb_alleles_max --type $type --bounds $bound_start','$bound_end $samples $chromosomes + +if [ "$export" = "VCF" ] +then cp $fileout_label.recode.vcf $fileout ; rm $fileout_label.recode.vcf +elif [ "$export" = "freq" ] +then cp $fileout_label.frq $fileout ; rm $fileout_label.frq +else cp $fileout_label.ped $fileout; cp $fileout_label.map ${16} ; rm $fileout_label.ped $fileout_label.map +fi + +cp vcftools.log $filelog +rm vcftools.log |
b |
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolFilter/vcfToolsFilter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolFilter/vcfToolsFilter.xml Mon Mar 23 05:57:27 2015 -0400 |
[ |
b'@@ -0,0 +1,268 @@\n+<tool id="sniplay_vcftoolsfilter" name="VCF tools filter" version="1.1.1">\n+ \n+ <!-- [REQUIRED] Tool description displayed after the tool name -->\n+ <description> Filter VCF using VCFtools</description>\n+ \n+ <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->\n+ <requirements>\n+ <requirement type="binary">perl</requirement>\n+\t<requirement type="package" version="0.1.12b">VCFtools</requirement>\n+ </requirements>\n+ \n+ <!-- [OPTIONAL] Command to be executed to get the tool\'s version string -->\n+ <version_command>\n+<!--\n+ tool_binary -v\n+-->\n+ </version_command>\n+ \n+ <!-- [REQUIRED] The command to execute -->\n+ <command interpreter="perl">\n+\tvcfToolsFilter.sh $filein $fileout_label $fileout $filelog $export $frequency $max_freq $allow_missing $nb_alleles_min $nb_alleles_max $type_p $bound_start $bound_end\n+\t#if str( $samples ) == "":\n+\t\'None\'\n+\t#else\n+\t$samples\n+\t#end if\n+\t#if str( $chromosomes ) == "":\n+\t\'None\'\n+\t#else\n+\t$chromosomes\n+\t#end if\n+\t#if str( $export ) == "plink":\n+\t$fileout_map\n+\t#else\n+\t\'\'\n+\t#end if\n+ </command>\n+ \n+ <!-- [REQUIRED] Input files and tool parameters -->\n+ <inputs>\n+\t<param name="filein" type="data" format="vcf" optional="false" label="VCF input" />\n+\t<param name="fileout_label" type="text" value="filtered" optional="false" label="Output file basename"/>\n+\t<param name="samples" type="text" optional="true" label="Samples" help="Samples to be analyzed. Comma separated list">\n+\t\t<validator type="regex" message="Please enter a comma separated list.">^\\w+(,\\w+)*$</validator>\n+\t</param>\n+\t<param name="chromosomes" type="text" optional="true" label="Chromosomes" help="Chromosomes to be analyzed. Comma separated list">\n+\t <validator type="regex" message="Please enter a comma separated list.">^\\w+(,\\w+)*$</validator>\n+ </param>\n+\t<param name="export" type="select" label="Output format" >\n+\t <option value="VCF" selected="true">VCF</option>\n+\t <option value="freq">freq</option>\n+ <option value="plink">plink</option>\n+ </param>\n+\t<param name="frequency" type="float" value="0.001" label="Minimum MAF." help="Minimum frequency." />\n+\t<param name="max_freq" type="float" value="0.5" label="Maximum MAF." help="Maximum frequency." />\n+\t<param name="allow_missing" type="float" value="1" min="0" max="1" label="Missing data proportion" help="Allowed missing data proportion per site. Must be comprised between 0 and 1." />\n+\t<param name="nb_alleles_min" type="integer" value="2" label="Minimum number of alleles" help="Minimum accepted number of alleles." min="2" max="4" />\n+\t<param name="nb_alleles_max" type="integer" value="2" label="Maximum number of alleles" help="Maximum accepted number of alleles." min="2" max="4" />\n+ <param name="type_p" type="select" label="Polymorphisms" help="Type of polymorphisms to keep." >\n+ <option value="ALL" selected="true">All</option>\n+ <option value="SNP">SNP</option>\n+ <option value="INDEL">Indel</option>\n+ </param>\n+\t<param name="bound_start" type="integer" value="1" label="Lower bound" help="Lower bound for a range of sites to be processed." />\n+\t<param name="bound_end" type="integer" value="100000000" label="Upper bound" help="Upper bound for a range of sites to be processed." />\n+ </inputs>\n+ \n+ <!-- [REQUIRED] Output files -->\n+ <outputs>\n+\t<data name="fileout" format="vcf" label="${fileout_label}.#if str($export)==\'plink\' then \'ped\' else \'\' # #if str($export)==\'freq\' then \'frq\' else \'\' # #if str($export)==\'VCF\' then \'vcf\' else \'\' #" >\n+\t\t<change_format>\n+ \t<when input="export" value="freq" format="tabular" />\n+\t\t\t<when input="export" value="plink" format="txt" />\n+\t\t</change_format>\t\n+\t</data>\n+\t<data name="fileout_map" format="txt" label="${fileout_label}.map">\n+\t\t<filter>(export == \'plink\')</filter>\n+\t</data>\n+\t<data name="filelog" format="txt" label='..b'alue="0.001" />\n+ <param name="max_freq" value="0.5" />\n+ <param name="allow_missing" value="0" />\n+ <param name="nb_alleles_min" value="2" />\n+\t<param name="nb_alleles_max" value="4" />\n+ <param name="type_p" value="ALL" />\n+ <param name="bound_start" value="1" />\n+ <param name="bound_end" value="100000000" />\n+ <output name="fileout" file="result.vcf" />\n+ <output name="filelog" file="result.log" />\n+ </test>\n+ </tests>\n+ \n+ <!-- [OPTIONAL] Help displayed in Galaxy -->\n+ <help>\n+\n+.. class:: infomark\n+\n+**Authors** \n+\n+---------------------------------------------------\n+\n+.. class:: infomark\n+\n+**Please cite** If you use this tool, please cite Dereeper et al. 2015 in prep.\n+\n+---------------------------------------------------\n+\n+================\n+VCF tools filter\n+================\n+\n+-----------\n+Description\n+-----------\n+\n+ Filter VCF file \n+\n+-----------------\n+Workflow position\n+-----------------\n+\n+**Upstream tools**\n+\n+=========== ========================== =======\n+Name output file(s) format \n+=========== ========================== =======\n+=========== ========================== =======\n+\n+\n+**Downstream tools**\n+\n+=========== ========================== =======\n+Name output file(s) format\n+=========== ========================== =======\n+=========== ========================== =======\n+\n+\n+----------\n+Input file\n+----------\n+\n+VCF file\n+\tVCF file with all SNPs\n+\n+----------\n+Parameters\n+----------\n+\n+Output file basename\n+\tPrefix for the output VCF file\n+\n+Samples\n+ Samples to be analyzed. Comma separated list\n+\n+Chromosomes\n+\tChromosomes to be analyzed. Comma separated list\n+\n+Output format\n+\tVCF/freq/plink\n+\n+Minimum MAF\n+\tMinimum frequency\n+\n+Maximum MAF\n+\tMaximum frequency\n+\n+Missing data proportion\n+\tAllowed missing data proportion per site. Must be comprised between 0 and 1.\n+\n+Number of alleles\n+\tAccepted number of alleles min and max.\n+\n+Polymorphisms\n+\tType of polymorphisms to keep (ALL/SNP/INDEL).\n+Bounds\n+\tLower bound and upper bound for a range of sites to be processed.\n+\n+------------\n+Output files\n+------------\n+\n+VCF file\n+\tVCF file filtered \n+\n+Log file\n+\n+---------------------------------------------------\n+\n+---------------\n+Working example\n+---------------\n+\n+Input files\n+===========\n+\n+VCF file\n+---------\n+\n+::\n+\n+\t#fileformat=VCFv4.1\n+\t#FILTER=<ID=LowQual,Description="Low quality">\n+\t#FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+\t[...]\n+\tCHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tCATB1\n+\tchr1\t2209\t.\tG\tT\t213.84\t.\tAC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)\tGT:AD:DP:GQ:PL\t1/1:0,7:7:18:242,18,0\n+\n+\n+Parameters\n+==========\n+\n+Output name -> filtered_chr1\n+\n+Chromosomes -> chr1\n+\n+Output format -> VCF\n+\n+Minimum MAF -> 0.001\n+\n+Maximum MAF -> 0.5\n+\n+Missing data proportion -> 1\n+\n+Number of alleles min -> 2\n+\n+Number of alleles max -> 4\n+\n+Polymorphisms -> All\n+\n+Lower bound -> 1\n+\n+Upper bound -> 100000000\n+\n+\n+Output files\n+============\n+\n+filtered_genelist_intron.vcf\n+---------\n+\n+::\n+\n+ #fileformat=VCFv4.1\n+ #FILTER=<ID=LowQual,Description="Low quality">\n+ #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+ [...]\n+ CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1\n+\tchr1\t5059\t.\tC\tG\t146.84\t.\tAC=2;AF=1.00;AN=2;DP=8;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=24.14;MQ0=1;QD=18.35;EFF=INTRON(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)\tGT:AD:DP:GQ:PL\t1/1:0,8:8:18:175,18,0\n+\n+\n+ </help>\n+ \n+</tool>\n' |
b |
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/VCFToolsStats.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/VCFToolsStats.pl Mon Mar 23 05:57:27 2015 -0400 |
[ |
@@ -0,0 +1,71 @@ + +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; +use Bio::SeqIO; + +my $usage = qq~Usage:$0 <args> [<opts>] + +where <args> are: + + -i, --input <VCF input> + -o, --out <output basename> +~; +$usage .= "\n"; + +my ($input,$out); + +GetOptions( + "input=s" => \$input, + "out=s" => \$out +); + + +die $usage + if ( !$input); + + + +my $nb_gene = `grep -c mRNA $input`; +$nb_gene =~s/\n//g; +my $nb_intergenic = `grep -c INTERGENIC $input`; +$nb_intergenic =~s/\n//g; + +my $nb_intron = `grep -c INTRON $input`; +$nb_intron =~s/\n//g; +my $nb_UTR = `grep -c UTR $input`; +$nb_UTR =~s/\n//g; +my $nb_exon = $nb_gene - $nb_intron - $nb_UTR; + +my $nb_ns = `grep -c NON_SYNONYMOUS_CODING $input`; +$nb_ns =~s/\n//g; +my $nb_s = $nb_exon - $nb_ns; + + + + +#system("$VCFTOOLS_EXE --vcf $input --remove-filtered-all --out $out --hardy >>vcftools.log 2>&1"); +system("vcftools --vcf $input --remove-filtered-all --out $out --het >>vcftools.log 2>&1"); +system("vcftools --vcf $input --remove-filtered-all --out $out --TsTv-summary >>vcftools.log 2>&1"); +system("vcftools --vcf $input --remove-filtered-all --out $out --missing-indv >>vcftools.log 2>&1"); + +open(my $G,">$out.annotation"); +print $G "Genic $nb_gene\n"; +print $G "Intergenic $nb_intergenic\n"; +print $G "========\n"; +print $G "Intron $nb_intron\n"; +print $G "Exon $nb_exon\n"; +print $G "UTR $nb_UTR\n"; +print $G "========\n"; +print $G "Non-syn $nb_ns\n"; +print $G "Synonym $nb_s\n"; +close($G); + + + + + + + |
b |
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/test-data/result.TsTv.summary --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/result.TsTv.summary Mon Mar 23 05:57:27 2015 -0400 |
b |
@@ -0,0 +1,9 @@ +MODEL COUNT +AC 371 +AG 1467 +AT 562 +CG 330 +CT 1659 +GT 397 +Ts 3126 +Tv 1660 |
b |
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/test-data/result.annotation --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/result.annotation Mon Mar 23 05:57:27 2015 -0400 |
b |
@@ -0,0 +1,9 @@ +Genic 4489 +Intergenic 466 +======== +Intron 960 +Exon 3248 +UTR 281 +======== +Non-syn 226 +Synonym 3022 |
b |
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/test-data/result.het --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/result.het Mon Mar 23 05:57:27 2015 -0400 |
b |
@@ -0,0 +1,2 @@ +INDV O(HOM) E(HOM) N_SITES F +CATB1 0 0.0 3616 0.00000 |
b |
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/test-data/result.imiss --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/result.imiss Mon Mar 23 05:57:27 2015 -0400 |
b |
@@ -0,0 +1,2 @@ +INDV N_DATA N_GENOTYPES_FILTERED N_MISS F_MISS +CATB1 4813 0 0 0 |
b |
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/test-data/result.log --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/result.log Mon Mar 23 05:57:27 2015 -0400 |
b |
@@ -0,0 +1,44 @@ + +VCFtools - v0.1.12b +(C) Adam Auton and Anthony Marcketta 2009 + +Parameters as interpreted: + --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat + --het + --out vcf_stats + --remove-filtered-all + +After filtering, kept 1 out of 1 Individuals +Outputting Individual Heterozygosity + Individual Heterozygosity: Only using biallelic SNPs. +After filtering, kept 4813 out of a possible 4955 Sites +Run Time = 0.00 seconds + +VCFtools - v0.1.12b +(C) Adam Auton and Anthony Marcketta 2009 + +Parameters as interpreted: + --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat + --out vcf_stats + --TsTv-summary + --remove-filtered-all + +After filtering, kept 1 out of 1 Individuals +Outputting Ts/Tv summary +Ts/Tv ratio: 1.883 +After filtering, kept 4813 out of a possible 4955 Sites +Run Time = 0.00 seconds + +VCFtools - v0.1.12b +(C) Adam Auton and Anthony Marcketta 2009 + +Parameters as interpreted: + --vcf /w/galaxy/galaxy4gwen/galaxy-dist/database/files/000/dataset_21.dat + --missing-indv + --out vcf_stats + --remove-filtered-all + +After filtering, kept 1 out of 1 Individuals +Outputting Individual Missingness +After filtering, kept 4813 out of a possible 4955 Sites +Run Time = 0.00 seconds |
b |
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/test-data/sample.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/test-data/sample.vcf Mon Mar 23 05:57:27 2015 -0400 |
[ |
b'@@ -0,0 +1,5000 @@\n+##fileformat=VCFv4.1\n+##FILTER=<ID=LowQual,Description="Low quality">\n+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##GATKCommandLine=<ID=UnifiedGenotyper,Version=2.7-4-g6f46d11,Date="Fri Nov 01 16:17:42 CET 2013",Epoch=1383319062999,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/scratch/hueber-35211/CATB1.RG.sorted.indelrealigned.bam] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[BadCigar] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/projects/coffee_snp/donnees_genomiques/pseudomolecules.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=4 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=SNP pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name= source=UNBOUND) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">\n+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">\n+##INFO=<ID=AF,Number=A,Type'..b'm=1.599;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:20,22:42:99:726,0,669\n+chr1\t188266\t.\tA\tG\t878.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.261;DP=56;Dels=0.00;FS=2.268;HaplotypeScore=3.8663;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.827;QD=15.69;ReadPosRankSum=0.412;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:28,27:56:99:907,0,965\n+chr1\t188270\t.\tA\tG\t850.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.747;DP=54;Dels=0.00;FS=3.828;HaplotypeScore=4.8662;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.692;QD=15.75;ReadPosRankSum=0.709;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,27:54:99:879,0,898\n+chr1\t188311\t.\tT\tG\t901.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.508;DP=53;Dels=0.00;FS=2.345;HaplotypeScore=0.7340;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.454;QD=17.01;ReadPosRankSum=0.223;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,28:53:99:930,0,831\n+chr1\t188357\t.\tC\tT\t1327.77\t.\tAC=2;AF=1.00;AN=2;DP=36;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=24.46;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t1/1:0,36:36:99:1356,105,0\n+chr1\t188364\t.\tG\tC\t578.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.285;DP=40;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.585;QD=14.47;ReadPosRankSum=-0.612;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:22,18:40:99:607,0,770\n+chr1\t188393\t.\tT\tC\t515.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.106;DP=42;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.191;QD=12.28;ReadPosRankSum=-1.385;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:24,18:42:99:544,0,828\n+chr1\t188395\t.\tC\tG\t543.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-1.484;DP=41;Dels=0.00;FS=0.000;HaplotypeScore=2.5781;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.775;QD=13.26;ReadPosRankSum=-1.773;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:23,18:41:99:572,0,791\n+chr1\t188416\t.\tT\tC\t397.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.717;DP=39;Dels=0.00;FS=1.302;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-0.893;QD=10.20;ReadPosRankSum=0.571;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:25,14:39:99:426,0,821\n+chr1\t188438\t.\tC\tA\t930.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.123;DP=50;Dels=0.00;FS=5.900;HaplotypeScore=0.0000;MLEAC=1;MLEAF=0.500;MQ=59.41;MQ0=0;MQRankSum=0.020;QD=18.62;ReadPosRankSum=-0.472;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),UTR_5_PRIME(MODIFIER||||Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:21,29:50:99:959,0,659\n+chr1\t188621\t.\tG\tA\t704.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.090;DP=49;Dels=0.00;FS=5.986;HaplotypeScore=0.9996;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=0.090;QD=14.38;ReadPosRankSum=-0.774;EFF=DOWNSTREAM(MODIFIER||||Cc01g00190|mRNA||GSCOCT00012415001|),SYNONYMOUS_CODING(LOW|SILENT|ttG/ttA|L4|Cc01g00180|mRNA||GSCOCT00012416001|Exon_chr1_188034_188856)\tGT:AD:DP:GQ:PL\t0/1:27,22:49:99:733,0,926\n' |
b |
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/vcfToolsStats.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/vcfToolsStats.sh Mon Mar 23 05:57:27 2015 -0400 |
b |
@@ -0,0 +1,23 @@ +#!/bin/bash + +tool_path=$(dirname $0) + +filein=$1 +fileout_label=$2 +fileout_annot=$3 +fileout_het=$4 +fileout_imiss=$5 +fileout_sum=$6 +filelog=$7 + + + +perl $tool_path/VCFToolsStats.pl --input $filein --out $fileout_label + +cp $fileout_label.annotation $fileout_annot ; rm $fileout_label.annotation +cp $fileout_label.het $fileout_het ; rm $fileout_label.het +cp $fileout_label.imiss $fileout_imiss ; rm $fileout_label.imiss +cp $fileout_label.TsTv.summary $fileout_sum ; rm $fileout_label.TsTv.summary + +cp vcftools.log $filelog +rm vcftools.log |
b |
diff -r 000000000000 -r 3e19d0dfcf3e VCFToolsStats/vcfToolsStats.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFToolsStats/vcfToolsStats.xml Mon Mar 23 05:57:27 2015 -0400 |
[ |
@@ -0,0 +1,214 @@ +<tool id="sniplay_vcftoolsstats" name="VCF tools Stats" version="1.0.0"> + + <!-- [REQUIRED] Tool description displayed after the tool name --> + <description> Various statistics from VCF using VCFtools</description> + + <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work --> + <requirements> + <requirement type="binary">perl</requirement> + <requirement type="package" version="0.1.13">VCFtools</requirement> + </requirements> + + <!-- [OPTIONAL] Command to be executed to get the tool's version string --> + <version_command> +<!-- + tool_binary -v +--> + </version_command> + + <!-- [REQUIRED] The command to execute --> + <command interpreter="perl"> + vcfToolsStats.sh $filein $fileout_label $fileout_annot $fileout_het $fileout_imiss $fileout_sum $filelog + </command> + + <!-- [REQUIRED] Input files and tool parameters --> + <inputs> + <param name="filein" type="data" format="vcf" optional="false" label="VCF input" /> + <param name="fileout_label" type="text" value="vcf_stats" optional="false" label="Output file basename"/> + </inputs> + + <!-- [REQUIRED] Output files --> + <outputs> + <data name="fileout_annot" format="txt" label="${fileout_label}.annotation" /> + <data name="fileout_het" format="txt" label="${fileout_label}.het" /> + <data name="fileout_imiss" format="txt" label="${fileout_label}.imiss" /> + <data name="fileout_sum" format="txt" label="${fileout_label}.TsTv.summary" /> + <data name="filelog" format="txt" label="${fileout_label}.log" /> + </outputs> + + <!-- [STRONGLY RECOMMANDED] Exit code rules --> + <stdio> + <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> + <exit_code range="1:" level="fatal" /> + </stdio> + + <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin --> + <tests> + <!-- [HELP] Test files have to be in the ~/test-data directory --> + <test> + <param name="filein" value="sample.vcf" /> + <output name="fileout_annot" file="result.annotation" /> + <output name="fileout_het" file="result.het" /> + <output name="fileout_imiss" file="result.imiss" /> + <output name="fileout_sum" file="result.TsTv.summary" /> + <output name="filelog" file="result.log" /> + </test> + </tests> + + <!-- [OPTIONAL] Help displayed in Galaxy --> + <help> + +.. class:: infomark + +**Authors** + +--------------------------------------------------- + +.. class:: infomark + +**Please cite** If you use this tool, please cite Dereeper et al. 2015 in prep. + +--------------------------------------------------- + +================ +VCF tools filter +================ + +----------- +Description +----------- + + Compute statistics on VCF file + +----------------- +Workflow position +----------------- + +**Upstream tools** + +=========== ========================== ======= +Name output file(s) format +=========== ========================== ======= +=========== ========================== ======= + + +**Downstream tools** + +=========== ========================== ======= +Name output file(s) format +=========== ========================== ======= +=========== ========================== ======= + + +---------- +Input file +---------- + +VCF file + VCF file with all SNPs + +---------- +Parameters +---------- + +Output file basename + Prefix for the output VCF file + +------------ +Output files +------------ + +.annotation file + Statistics on annotation/location along genome + +.het file + Statistics on heterozygosity of the individuals + +.imiss + Statistics on missing data of the inidividuals +.TsTv.summary + Statistics on mutation types and transition/transvertion number + +.log file + +--------------------------------------------------- + +--------------- +Working example +--------------- + +Input files +=========== + +VCF file +--------- + +:: + + #fileformat=VCFv4.1 + #FILTER=<ID=LowQual,Description="Low quality"> + #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> + [...] + CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1 + chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0 + + +Parameters +========== + +Output name -> vcf_stat + + +Output files +============ + +.annotation file +---------------- + +:: + + Genic 4489 + Intergenic 466 + ======== + Intron 960 + Exon 3248 + UTR 281 + ======== + Non-syn 226 + Synonym 3022 + +.het file +--------- + +:: + + INDV O(HOM) E(HOM) N_SITES F + CATB1 0 0.0 3616 0.00000 + +.imiss file +----------- + +:: + + INDV N_DATA N_GENOTYPES_FILTERED N_MISS F_MISS + CATB1 4813 0 0 0 + +.TsTv.summary file +------------------ + +:: + + MODEL COUNT + AC 371 + AG 1467 + AT 562 + CG 330 + CT 1659 + GT 397 + Ts 3126 + Tv 1660 + + + </help> + +</tool> |
b |
diff -r 000000000000 -r 3e19d0dfcf3e admixture/Admixture.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/admixture/Admixture.pl Mon Mar 23 05:57:27 2015 -0400 |
[ |
@@ -0,0 +1,159 @@ +#!/usr/bin/perl + +use strict; +use Switch; +use Getopt::Long; +use Bio::SeqIO; + +my $usage = qq~Usage:$0 <args> [<opts>] +where <args> are: + -i, --input <input HAPMAP> + -o, --output <output> + -k, --kmin <K min. int> + -m, --maxK <K max. int> + -d, --directory <temporary directory> + -p, --path <path to executables> +~; +$usage .= "\n"; + +my ($input,$output,$kmin,$kmax,$directory,$path); + + +GetOptions( + "input=s" => \$input, + "output=s" => \$output, + "kmin=s" => \$kmin, + "maxK=s" => \$kmax, + "directory=s" => \$directory, + "path=s" => \$path +); + + +die $usage + if ( !$input || !$output || !$kmin || !$kmax || !$directory || !$path); + +if ($kmin =~/^(\d+)\s*$/){ + $kmin = $1; +} +else{ + die "Error: kmin must be an integer\n"; +} +if ($kmax =~/^(\d+)\s*$/){ + $kmax = $1; +} +else{ + die "Error: kmax must be an integer\n"; +} + + +###################### +# create map file +###################### +open(my $M,">$directory/input.map"); +open(my $H,$input); +<$H>; +while(<$H>) +{ + my @infos = split(/\t/,$_); + print $M $infos[2] . "\t" . $infos[0] . "\t" . "0" . "\t" . $infos[3] . "\n"; +} +close($H); +close($M); + +###################### +# create ped file +###################### +system("$path/transpose.awk $input >$directory/input.ped.2"); + +open(my $P,">$directory/input.ped"); +open(my $P2,"$directory/input.ped.2"); +my $n = 0; +my $ind_num = 0; +my @individus; +while(<$P2>) +{ + $n++; + if ($n > 11) + { + my $line = $_; + $line =~s/N/0/g; + if (/^([^\s]+)\s+(.*)$/) + { + $ind_num++; + my $ind = $1; + push(@individus,$ind); + my $genoyping_line = $2; + print $P "$ind $ind_num 0 0 1 2"; + my @genotypes = split(/\s/,$genoyping_line); + foreach my $genotype(@genotypes) + { + $genotype =~s/N/0/g; + my @alleles = split("",$genotype); + print $P " " . join(" ",@alleles); + } + + print $P "\n"; + } + } +} +close($P2); +close($P); + +unlink("$directory/input.ped.2"); + +system("plink --file $directory/input --out $directory/out --make-bed --noweb >>$directory/plink.log 2>&1"); + + +################################### +# launch admixture for different K +################################### +my %errors; +for (my $k = $kmin; $k <= $kmax; $k++) +{ + system("admixture --cv $directory/out.bed $k >>$directory/log.$k 2>&1"); + my $cv_error_line = `grep -h CV $directory/log.$k`; + if ($cv_error_line =~/: (\d+\.*\d*)$/) + { + $errors{$1} = $k; + } + system("cat $directory/log.$k >>$directory/logs"); + system("echo '\n\n====================================\n\n' >>$directory/logs"); + system("cat out.$k.Q >>$directory/outputs.Q"); + system("echo '\n\n====================================\n\n' >>$directory/outputs.Q"); + system("cat out.$k.P >>$directory/outputs.P"); + system("echo '\n\n====================================\n\n' >>$directory/outputs.P"); +} + +my @sorted_errors = sort {$a<=>$b} keys(%errors); +my $best_K = $errors{@sorted_errors[0]}; + + +#system("cp -rf out.$best_K.Q $directory/output"); + +open(BEST1,"out.$best_K.Q"); +open(BEST2,">$directory/output"); +print BEST2 "<Covariate>\n"; +print BEST2 "<Trait>"; +for (my $j=1;$j<=$best_K;$j++) +{ + print BEST2 " Q" . $j; +} +print BEST2 "\n"; +my $i = 0; +while(<BEST1>) +{ + my $line = $_; + $line =~s/ /\t/g; + my $ind = $individus[$i]; + print BEST2 "$ind "; + print BEST2 $line; + $i++; +} +close(BEST1); +close(BEST2); + +system("cp -rf $directory/log.$best_K $directory/log"); + + + + |
b |
diff -r 000000000000 -r 3e19d0dfcf3e admixture/admixture.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/admixture/admixture.sh Mon Mar 23 05:57:27 2015 -0400 |
b |
@@ -0,0 +1,21 @@ +#!/bin/bash +input=$1 +outputs=$2 +logs=$3 +best_k_output=$4 +best_k_logfile=$5 +kmin=$6 +kmax=$7 + +directory=`dirname $0` +mkdir tmpdir$$ +cp -rf $input tmpdir$$/input + +/usr/bin/perl $directory/Admixture.pl -i $input -o $outputs -k $kmin -m $kmax -d tmpdir$$ -p $directory + +mv tmpdir$$/output $best_k_output +mv tmpdir$$/log $best_k_logfile +mv tmpdir$$/outputs.Q $outputs +mv tmpdir$$/logs $logs + + |
b |
diff -r 000000000000 -r 3e19d0dfcf3e admixture/admixture.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/admixture/admixture.xml Mon Mar 23 05:57:27 2015 -0400 |
b |
@@ -0,0 +1,55 @@ +<tool id="admixture" name="Admixture" version="1.23"> + <description>a population structure from large SNP genotype datasets</description> + <requirements> + <requirement type="package" version="1.07">plink</requirement> + <requirement type="package" version="1.23">admixture</requirement> + </requirements> + <command interpreter="bash">./admixture.sh $input $outputs $logs $best_k_output $best_k_logfile $kmin $kmax + </command> + <inputs> + <param format="txt" name="input" type="data" label="Allelic file in Hapmap format" help="Allelic file in Hapmap format"/> + <param type="text" name="kmin" label="K min" value="1"/> + <param type="text" name="kmax" label="K max" value="5"/> + </inputs> + <outputs> + <data format="txt" name="best_k_output" label="Best K Output"/> + <data format="txt" name="best_k_logfile" label="Best K Logfile"/> + <data format="txt" name="outputs" label="All Outputs"/> + <data format="txt" name="logs" label="All Logs"/> + </outputs> + <help> + + +.. class:: infomark + +**Program encapsulated in Galaxy by Southgreen** + +.. class:: infomark + +**Admixture version 1.23** + +----- + +============== + Please cite: +============== + +"Fast model-based estimation of ancestry in unrelated individuals.", **D.H. Alexander, J. Novembre, and K. Lange.**, Genome Research, 19:1655{1664, 2009. + +----- + +=========== + Overview: +=========== + +ADMIXTURE is a program for estimating ancestry in a model-based manner from large autosomal SNP genotype datasets, where the individuals are unrelated (for example, the individuals in a case-control association study). + +----- + +For further informations, please visite the Admixture_ website. + + +.. _Admixture: http://www.genetics.ucla.edu/software/admixture/index.html + </help> + +</tool> |
b |
diff -r 000000000000 -r 3e19d0dfcf3e admixture/transpose.awk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/admixture/transpose.awk Mon Mar 23 05:57:27 2015 -0400 |
[ |
@@ -0,0 +1,27 @@ +#!/usr/bin/gawk -f + +BEGIN { + max_x =0; + max_y =0; +} + +{ + max_y++; + for( i=1; i<=NF; i++ ) + { + if (i>max_x) max_x=i; + A[i,max_y] = $i; + } +} + +END { + for ( x=1; x<=max_x; x++ ) + { + for ( y=1; y<=max_y; y++ ) + { + if ( (x,y) in A ) printf "%s",A[x,y]; + if ( y!=max_y ) printf " "; + } + printf "\n"; + } +} |
b |
diff -r 000000000000 -r 3e19d0dfcf3e snpEff/SnpEff.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/snpEff/SnpEff.pl Mon Mar 23 05:57:27 2015 -0400 |
[ |
@@ -0,0 +1,82 @@ +#!/usr/bin/perl + +use strict; +use Getopt::Long; + +my $usage = qq~Usage:$0 <args> [<opts>] +where <args> are: + -i, --input <input VCF> + -o, --output <output> + -g, --gff <GFF annotation> + -f, --fasta <Fasta of chromosomes> + -h, --html <HTML output> +~; +$usage .= "\n"; + +my ($input,$output,$gff,$fasta,$html); + + +GetOptions( + "input=s" => \$input, + "output=s" => \$output, + "gff=s" => \$gff, + "fasta=s" => \$fasta, + "html=s" => \$html +); + + +die $usage + if ( !$input || !$output || !$fasta || !$gff || !$html); + + +if (!-e $gff){ + die "Error: GFF input does not exist\n" +} +if (!-e $fasta){ + die "Error: Fasta input does not exist\n" +} + +#my $SNPEFF_PATH = "/usr/local/bioinfo/galaxy/galaxy_dist/tools/SNiPlay/SnpEff/snpEff"; +my $SNPEFF_PATH = $ENV{SNPEFF_JAR_PATH}; + + +my $session = $$; +mkdir($session); +mkdir("$session/data"); +mkdir("$session/data/genomes"); +mkdir("$session/data/myspecies"); + +system("cp -rf $fasta $session/data/genomes/myspecies.fa"); +system("cp -rf $gff $session/data/myspecies/genes.gff"); + +open(my $C,"$SNPEFF_PATH/snpEff.config"); +open(my $C2,">$session/snpEff.config"); +while(<$C>) +{ + if (/data_dir/) + { + print $C2 "data_dir = ./data\n"; + } + elsif (/^genomes/) + { + print $C2 "genomes : \\n"; + print $C2 "myspecies, myspecies \\n"; + } + else + { + print $C2 $_; + } +} +print $C2 "myspecies.genome : myspecies\n"; +close($C); +close($C2); + + +my $build_cmd = "java -jar $SNPEFF_PATH/snpEff.jar build -c $session/snpEff.config -gff3 myspecies"; +system($build_cmd); + +my $eff_cmd = "java -jar $SNPEFF_PATH/snpEff.jar eff -c $session/snpEff.config -o vcf -no-downstream -no-upstream myspecies -s $html $input >$output"; +system($eff_cmd); + + +system("rm -rf $session"); |
b |
diff -r 000000000000 -r 3e19d0dfcf3e snpEff/snpEff-pipe.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/snpEff/snpEff-pipe.sh Mon Mar 23 05:57:27 2015 -0400 |
b |
@@ -0,0 +1,14 @@ +#!/bin/bash +vcf=$1 +genome=$2 +gff=$3 +output=$4 +html=$5 +log=$6 + +directory=`dirname $0` + +/usr/bin/perl $directory/SnpEff.pl -i $vcf -f $genome -g $gff -o $output -h $html >>$log 2>&1 + + + |
b |
diff -r 000000000000 -r 3e19d0dfcf3e snpEff/snpEff.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/snpEff/snpEff.xml Mon Mar 23 05:57:27 2015 -0400 |
b |
@@ -0,0 +1,63 @@ +<tool id="snpEff" name="SnpEff" version="4.0"> + <description>predicts SNP effect from a genomic VCF file</description> + <requirements> + <requirement type="package" version="4.0">snpEff</requirement> + </requirements> + + <!-- + You will need to change the path to wherever your installation is. + You can change the amount of memory used by snpEff, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory) + --> + <command interpreter="bash">./snpEff-pipe.sh $vcf $genome $gff $output $statsFile $log</command> + <inputs> + <param format="vcf" name="vcf" type="data" label="VCF input file" help="Positions must be genomic positions"/> + <param format="fasta" name="genome" type="data" label="Reference genome in Fasta" help=""/> + <param format="gff3" name="gff" type="data" label="GFF annotation of the genome" help=""/> + + </inputs> + <outputs> + <data format="vcf" name="output" label="Annotated VCF" /> + <data format="html" name="statsFile" label="HTML statistics output"/> + <data format="txt" name="log" label="Log file"/> + </outputs> + + <help> + + + + + +.. class:: infomark + +**Program encapsulated in Galaxy by Southgreen** + +.. class:: infomark + +**SnpEff version 4.0** + +----- + +============== + Please cite: +============== + +"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", **Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM.**, Fly (Austin). 2012 Apr-Jun;6(2):80-92. + +----- + +=========== + Overview: +=========== + +Genetic variant annotation and effect prediction toolbox. It annotates and predicts the effects of variants on genes (such as amino acid changes). + +----- + +For further informations, please visite the SnpEff_ website. + + +.. _SnpEff: http://snpeff.sourceforge.net/ + </help> + +</tool> + |
b |
diff -r 000000000000 -r 3e19d0dfcf3e tassel/tassel.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tassel/tassel.sh Mon Mar 23 05:57:27 2015 -0400 |
[ |
@@ -0,0 +1,41 @@ +#!/bin/bash + +analyseType=$1; +out1=$2; +out2=$3; +out3=$4; +log1=$5; +galaxyOutDir=$6; + + +mkdir $galaxyOutDir + +# Suppression des 6 premiers arguments de la liste des arguments $@ +shift; shift; shift; shift; shift; shift; + +if [[ $analyseType == glm ]] +then + run_pipeline.pl $* >> $log1 2>&1 + mv "$galaxyOutDir/TASSELGLM1.txt" $out1 + mv "$galaxyOutDir/TASSELGLM2.txt" $out2 +fi + +if [[ $analyseType == mlm ]] +then + run_pipeline.pl $* >> $log1 2>&1 + mv "$galaxyOutDir/TASSELMLM1.txt" $out1 + mv "$galaxyOutDir/TASSELMLM2.txt" $out2 + mv "$galaxyOutDir/TASSELMLM3.txt" $out3 +fi + +if [[ $analyseType == ld ]] +then + run_pipeline.pl $* >> $log1 2>&1 +fi + + +if [[ $analyseType == ck ]] +then + run_pipeline.pl $* >> $log1 2>&1 + mv "$galaxyOutDir/kinship.txt" $out1 +fi |
b |
diff -r 000000000000 -r 3e19d0dfcf3e tassel/tassel.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tassel/tassel.xml Mon Mar 23 05:57:27 2015 -0400 |
[ |
@@ -0,0 +1,153 @@ +<tool id="Tassel" name="Tassel" version="5.0"> + <description> Software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium. </description> + <requirements> + <requirement type="package" version="5.0">tassel</requirement> + </requirements> + <command interpreter="bash">./tassel.sh $analysis_opts.fonction_selector $output1 $output2 $output3 $log tmpdir$$/ +#if $analysis_opts.fonction_selector == "mlm": + -fork1 -h $hapmap -filterAlign -filterAlignMinFreq $filterAlignMinFreq + -fork2 -r $analysis_opts.trait + #if $analysis_opts.add_structure_file.additional_file_selector == "yes": + -fork3 -q $analysis_opts.add_structure_file.structure -excludeLastTrait + #end if + -fork4 -k $analysis_opts.kinship + -combineA -input1 -input2 + #if $analysis_opts.add_structure_file.additional_file_selector == "yes": + -input3 + #end if + -intersect -combineB -inputA -input4 -mlm -mlmVarCompEst $analysis_opts.mlmVarCompEst -mlmCompressionLevel $analysis_opts.mlmCompressionLevel -export tmpdir$$/TASSELMLM -runfork1 -runfork2 + #if $analysis_opts.add_structure_file.additional_file_selector == "yes": + -runfork3 + #end if + -runfork4 + +#else if $analysis_opts.fonction_selector == "glm": + -fork1 -h $hapmap -filterAlign -filterAlignMinFreq $filterAlignMinFreq + -fork2 -r $analysis_opts.trait + #if $analysis_opts.add_file.additional_file_selector == "yes": + -fork3 -q $analysis_opts.add_file.structure -excludeLastTrait + #end if + -combineA -input1 -input2 + #if $analysis_opts.add_file.additional_file_selector == "yes": + -input3 + #end if + -intersect -glm -export tmpdir$$/TASSELGLM -runfork1 -runfork2 + #if $analysis_opts.add_file.additional_file_selector == "yes": + -runfork3 + #end if +#else if $analysis_opts.fonction_selector == "ld": + -fork1 -h $hapmap + -ld -ldType All -ldd png -ldplotsize 3000 -o $output1 -runfork1 +#else if $analysis_opts.fonction_selector == "ck": + -fork1 -h $hapmap + -ck -export tmpdir$$/kinship -runfork1 +#end if + </command> + <inputs> + <param format="txt" name="hapmap" type="data" label="HapMap file"/> + <conditional name="analysis_opts"> + <param name="fonction_selector" type="select" label="Type of analysis"> + <option value="mlm" selected="True">MLM</option> + <option value="glm">GLM</option> + <option value="ld">Linkage Disequilibrium</option> + <option value="ck">Kinship</option> + </param> + <when value="glm"> + <param format="txt" name="trait" type="data" label="Trait file"/> + <conditional name="add_file"> + <param name="additional_file_selector" type="select" label="Add structure file"> + <option value="no" selected="True">no</option> + <option value="yes">yes</option> + </param> + <when value="yes"> + <param format="txt" name="structure" type="data" label="Structure file"/> + </when> + </conditional> + </when> + <when value="mlm"> + <param format="txt" name="trait" type="data" label="Trait file"/> + <param format="txt" name="kinship" type="data" label="Kinship file"/> + <conditional name="add_structure_file"> + <param name="additional_file_selector" type="select" label="Add structure file"> + <option value="no" selected="True">no</option> + <option value="yes">yes</option> + </param> + <when value="yes"> + <param format="txt" name="structure" type="data" label="Structure file"/> + </when> + </conditional> + <param name="mlmVarCompEst" type="select" label="Variance Component Estimation"> + <option value="P3D" selected="True">P3D</option> + <option value="EachMarker">EachMarker</option> + </param> + <param name="mlmCompressionLevel" type="select" label="Compression Level"> + <option value="Optimum" selected="True">Optimum</option> + <option value="Custom">Custom</option> + <option value="None">None</option> + </param> + </when> + <when value="ld"></when> + <when value="ck"></when> + </conditional> + <param type="text" name="filterAlignMinFreq" label="Filter minimal frequency allele" value="0.05"/> + </inputs> + <outputs> + <data format="txt" name="output1" label="Tassel output"> + <change_format> + <when input="analysis_opts['fonction_selector']" value="ld" format="png"/> + </change_format> + </data> + + <data format="txt" name="output2" label="Allele effects"> + <filter>analysis_opts['fonction_selector'] == "glm" or analysis_opts['fonction_selector'] == "mlm"</filter> + </data> + + <data format="txt" name="output3" label="Compression file"> + <filter>analysis_opts['fonction_selector'] == "mlm"</filter> + </data> + + <data format="txt" name="log" label="Log file"/> + </outputs> + <help> + +.. class:: infomark + +**Program encapsulated in Galaxy by Southgreen** + +.. class:: infomark + +**Tassel** + +----- + +========== + Authors: +========== + +**Terry Casstevens** + +----- + +========== + Overview +========== + +Software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium. + +----- + +For further informations, please visite the website of TASSEL_. + + +.. _TASSEL: http://www.maizegenetics.net/tassel/ + + </help> +<!-- +<tests> + <test> + <param name="input" value="genotyping_file.inp" /> + <output name="output" file="phase_output" /> + </test> +</tests> +--> +</tool> |
b |
diff -r 000000000000 -r 3e19d0dfcf3e tassel/tool-data/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tassel/tool-data/tool_dependencies.xml Mon Mar 23 05:57:27 2015 -0400 |
b |
@@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="tassel" version="5.0"> + <repository changeset_revision="097d4c366e0d" name="package_tassel_5_0" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" /> + </package> +</tool_dependency> + + |
b |
diff -r 000000000000 -r 3e19d0dfcf3e tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Mar 23 05:57:27 2015 -0400 |
b |
@@ -0,0 +1,20 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="plink" version="1.07"> + <repository changeset_revision="65400c333b88" name="package_plink_1_07" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" /> + </package> + <package name="admixture" version="1.23"> + <repository changeset_revision="61e04b2aa621" name="package_admixture_1_23" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" /> + </package> + <package name="tassel" version="5.0"> + <repository changeset_revision="097d4c366e0d" name="package_tassel_5_0" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" /> + </package> + <package name="vcftools" version="0.1.12b"> + <repository changeset_revision="a655cb1dfc58" name="package_vcftools_0_1_12b" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu/" /> + </package> + <package name="snpEff" version="4.0"> + <repository name="package_snpeff_4_0" owner="iuc" changeset_revision="6bc55957927b" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency> + + |