Repository 'admixture'
hg clone https://toolshed.g2.bx.psu.edu/repos/dereeper/admixture

Changeset 1:78b98120be91 (2015-07-02)
Previous changeset 0:781ac6e7a3a1 (2015-02-20) Next changeset 2:92ddf4f7c0c6 (2015-07-02)
Commit message:
Uploaded
added:
tool_dependencies.xml
removed:
Admixture.pl
admixture.sh
admixture.xml
tool-data/tool_dependencies.xml
transpose.awk
b
diff -r 781ac6e7a3a1 -r 78b98120be91 Admixture.pl
--- a/Admixture.pl Fri Feb 20 10:09:18 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,159 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use Switch;
-use Getopt::Long;
-use Bio::SeqIO;
-
-my $usage = qq~Usage:$0 <args> [<opts>]
-where <args> are:
-    -i, --input         <input HAPMAP>
-    -o, --output        <output>
-    -k, --kmin          <K min. int>
-    -m, --maxK          <K max. int>
-    -d, --directory     <temporary directory>
-    -p, --path          <path to executables>
-~;
-$usage .= "\n";
-
-my ($input,$output,$kmin,$kmax,$directory,$path);
-
-
-GetOptions(
- "input=s"      => \$input,
- "output=s"     => \$output,
- "kmin=s"       => \$kmin,
- "maxK=s"       => \$kmax,
- "directory=s"  => \$directory,
- "path=s"       => \$path
-);
-
-
-die $usage
-  if ( !$input || !$output || !$kmin || !$kmax || !$directory || !$path);
-
-if ($kmin =~/^(\d+)\s*$/){
-        $kmin = $1;
-}
-else{
-        die "Error: kmin must be an integer\n";
-}
-if ($kmax =~/^(\d+)\s*$/){
-        $kmax = $1;
-}
-else{
-        die "Error: kmax must be an integer\n";
-}
-
-  
-######################
-# create map file
-######################
-open(my $M,">$directory/input.map");
-open(my $H,$input);
-<$H>;
-while(<$H>)
-{
- my @infos = split(/\t/,$_);
- print $M $infos[2] . "\t" . $infos[0] . "\t" . "0" . "\t" . $infos[3] . "\n";
-}
-close($H);
-close($M);
-
-######################
-# create ped file
-######################
-system("$path/transpose.awk $input >$directory/input.ped.2");
-
-open(my $P,">$directory/input.ped");
-open(my $P2,"$directory/input.ped.2");
-my $n = 0;
-my $ind_num = 0;
-my @individus;
-while(<$P2>)
-{
- $n++;
- if ($n > 11)
- {
- my $line = $_;
- $line =~s/N/0/g;
- if (/^([^\s]+)\s+(.*)$/)
- {
- $ind_num++;
- my $ind = $1;
- push(@individus,$ind);
- my $genoyping_line = $2;
- print $P "$ind $ind_num 0 0 1 2";
- my @genotypes = split(/\s/,$genoyping_line);
- foreach my $genotype(@genotypes)
- {
- $genotype =~s/N/0/g;
- my @alleles = split("",$genotype);
- print $P " " . join(" ",@alleles);
- }
-
- print $P "\n";
- }
- }
-}
-close($P2);
-close($P);
-
-unlink("$directory/input.ped.2");
-
-system("plink --file $directory/input --out $directory/out --make-bed --noweb >>$directory/plink.log 2>&1");
-
-
-###################################
-# launch admixture for different K
-###################################
-my %errors;
-for (my $k = $kmin; $k <= $kmax; $k++)
-{
- system("admixture --cv $directory/out.bed $k >>$directory/log.$k 2>&1");
- my $cv_error_line = `grep -h CV $directory/log.$k`;
- if ($cv_error_line =~/: (\d+\.*\d*)$/)
- {
- $errors{$1} = $k;
- }
- system("cat $directory/log.$k >>$directory/logs");
- system("echo '\n\n====================================\n\n' >>$directory/logs");
- system("cat out.$k.Q >>$directory/outputs.Q");
- system("echo '\n\n====================================\n\n' >>$directory/outputs.Q");
- system("cat out.$k.P >>$directory/outputs.P");
- system("echo '\n\n====================================\n\n' >>$directory/outputs.P");
-}
-
-my @sorted_errors = sort {$a<=>$b} keys(%errors);
-my $best_K = $errors{@sorted_errors[0]};
-
-
-#system("cp -rf out.$best_K.Q $directory/output");
-
-open(BEST1,"out.$best_K.Q");
-open(BEST2,">$directory/output");
-print BEST2 "<Covariate>\n";
-print BEST2 "<Trait>";
-for (my $j=1;$j<=$best_K;$j++)
-{
- print BEST2 " Q" . $j;
-}
-print BEST2 "\n";
-my $i = 0;
-while(<BEST1>)
-{
- my $line = $_;
- $line =~s/ /\t/g;
- my $ind = $individus[$i];
- print BEST2 "$ind ";
- print BEST2 $line;
- $i++;
-}
-close(BEST1);
-close(BEST2);
-
-system("cp -rf $directory/log.$best_K $directory/log");
-
-
-
-
b
diff -r 781ac6e7a3a1 -r 78b98120be91 admixture.sh
--- a/admixture.sh Fri Feb 20 10:09:18 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,21 +0,0 @@
-#!/bin/bash
-input=$1
-outputs=$2
-logs=$3
-best_k_output=$4
-best_k_logfile=$5
-kmin=$6
-kmax=$7
-
-directory=`dirname $0`
-mkdir tmpdir$$
-cp -rf $input tmpdir$$/input

-/usr/bin/perl $directory/Admixture.pl -i $input -o $outputs -k $kmin -m $kmax -d tmpdir$$ -p $directory
-
-mv tmpdir$$/output $best_k_output
-mv tmpdir$$/log $best_k_logfile
-mv tmpdir$$/outputs.Q $outputs
-mv tmpdir$$/logs $logs
-
-
b
diff -r 781ac6e7a3a1 -r 78b98120be91 admixture.xml
--- a/admixture.xml Fri Feb 20 10:09:18 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,55 +0,0 @@
-<tool id="admixture" name="Admixture" version="1.23">
- <description>a population structure from large SNP genotype datasets</description>
- <requirements>
- <requirement type="package" version="1.07">plink</requirement>
- <requirement type="package" version="1.23">admixture</requirement>
- </requirements>
- <command interpreter="bash">./admixture.sh $input $outputs $logs $best_k_output $best_k_logfile $kmin $kmax
-    </command>
- <inputs>
- <param format="txt" name="input" type="data" label="Allelic file in Hapmap format" help="Allelic file in Hapmap format"/>
- <param type="text" name="kmin" label="K min" value="1"/>
- <param type="text" name="kmax" label="K max" value="5"/>
- </inputs>
- <outputs>
- <data format="txt" name="best_k_output" label="Best K Output"/>
- <data format="txt" name="best_k_logfile" label="Best K Logfile"/>
- <data format="txt" name="outputs" label="All Outputs"/>
- <data format="txt" name="logs" label="All Logs"/>
- </outputs>
- <help>
-
-
-.. class:: infomark
-
-**Program encapsulated in Galaxy by Southgreen**
-
-.. class:: infomark
-
-**Admixture version 1.23**
-
------
-
-==============
- Please cite: 
-==============
-
-"Fast model-based estimation of ancestry in unrelated individuals.", **D.H. Alexander, J. Novembre, and K. Lange.**, Genome Research, 19:1655{1664, 2009.
-
------
-
-===========
- Overview:
-===========
-
-ADMIXTURE is a program for estimating ancestry in a model-based manner from large autosomal SNP genotype datasets, where the individuals are unrelated (for example, the individuals in a case-control association study).
-
------
-
-For further informations, please visite the Admixture_ website.
-
-
-.. _Admixture: http://www.genetics.ucla.edu/software/admixture/index.html
- </help>
-
-</tool>
b
diff -r 781ac6e7a3a1 -r 78b98120be91 tool-data/tool_dependencies.xml
--- a/tool-data/tool_dependencies.xml Fri Feb 20 10:09:18 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,11 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="plink" version="1.07">
-        <repository changeset_revision="65400c333b88" name="package_plink_1_07" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" />
-    </package>
-    <package name="admixture" version="1.23">
-        <repository changeset_revision="61e04b2aa621" name="package_admixture_1_23" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" />
-    </package>
-</tool_dependency>
-
-
b
diff -r 781ac6e7a3a1 -r 78b98120be91 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Thu Jul 02 04:08:56 2015 -0400
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="admixture" version="1.23">
+        <repository changeset_revision="61e04b2aa621" name="package_admixture_1_23" owner="dereeper" toolshed="https://toolshed.g2.bx.psu.edu/" />
+    </package>
+</tool_dependency>
b
diff -r 781ac6e7a3a1 -r 78b98120be91 transpose.awk
--- a/transpose.awk Fri Feb 20 10:09:18 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,27 +0,0 @@
-#!/usr/bin/gawk -f
-
-BEGIN {
-    max_x =0;
-    max_y =0;
-}
-
-{
-    max_y++;
-    for( i=1; i<=NF; i++ )
-    {
-        if (i>max_x) max_x=i;
-        A[i,max_y] = $i;
-    }
-}
-
-END {
-    for ( x=1; x<=max_x; x++ )
-    {
-        for ( y=1; y<=max_y; y++ )
-        {
-            if ( (x,y) in A ) printf "%s",A[x,y];
-            if ( y!=max_y ) printf " ";
-        }
-        printf "\n";
-    }
-}