Mercurial > repos > dereeper > snmf
changeset 8:c94c31bde773 draft
Uploaded
author | dereeper |
---|---|
date | Tue, 08 Jan 2019 11:19:23 -0500 |
parents | 22aee300afdf |
children | e44eb2ba80ec |
files | Snmf.pl snmf.sh snmf.xml |
diffstat | 3 files changed, 32 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/Snmf.pl Wed Apr 18 05:44:40 2018 -0400 +++ b/Snmf.pl Tue Jan 08 11:19:23 2019 -0500 @@ -2,7 +2,6 @@ use strict; use Getopt::Long; -use Bio::SeqIO; my $usage = qq~Usage:$0 <args> [<opts>] where <args> are: @@ -34,7 +33,7 @@ my $PLINK_EXE = "plink"; -system("$PLINK_EXE --vcf $input --allow-extra-chr --recode vcf-fid --out $directory/input >>$directory/logs 2>&1"); +system("$PLINK_EXE --vcf $input --allow-extra-chr --recode-vcf --const-fid --out $directory/input >>$directory/logs 2>&1"); system("vcf2geno $directory/input.vcf $directory/polymorphisms.geno >>$directory/logs 2>&1"); @@ -47,6 +46,8 @@ # launch admixture for different K ################################### my %errors; +open(E,">$directory/entropy"); +print E "K, number of ancestal populations Cross-entropy\n"; for (my $k = $kmin; $k <= $kmax; $k++) { system("sNMF -x $directory/polymorphisms.geno -K $k -c >>$directory/log.$k 2>&1"); @@ -64,9 +65,7 @@ } close($LOG); - open(E,">>$directory/entropy"); - print E "K=$k $ent\n"; - close(E); + print E "K=$k $ent\n"; print $O2 "Indiv"; print $O3 "Indiv;Group\n"; @@ -115,7 +114,7 @@ system("cat $directory/out.$k.final.Q >>$directory/outputs.Q"); system("echo '\n\n====================================\n\n' >>$directory/outputs.Q"); } - +close(E); my @sorted_errors = sort {$a<=>$b} keys(%errors); my $best_K = $errors{@sorted_errors[0]}; @@ -125,4 +124,27 @@ system("cp -rf $directory/log.$best_K $directory/log"); system("cp -rf $directory/out.$best_K.group $directory/groups"); +#exit; + + +open(BEST1,"$directory/out.$best_K.final.Q"); +<BEST1>; +open(BEST2,">$directory/output"); +print BEST2 "<Covariate>\n"; +print BEST2 "<Trait>"; +for (my $j=1;$j<=$best_K;$j++) +{ + print BEST2 " Q" . $j; +} +print BEST2 "\n"; +my $i = 0; +while(<BEST1>) +{ + my $line = $_; + $line =~s/ /\t/g; + print BEST2 $line; + $i++; +} +close(BEST1); +close(BEST2);
--- a/snmf.sh Wed Apr 18 05:44:40 2018 -0400 +++ b/snmf.sh Tue Jan 08 11:19:23 2019 -0500 @@ -8,6 +8,7 @@ kmax=$7 groups=$8 threshold_group=$9 +entropy=${10} directory=`dirname $0` mkdir tmpdir$$ @@ -20,5 +21,6 @@ mv tmpdir$$/outputs.Q $outputs mv tmpdir$$/logs $logs mv tmpdir$$/groups $groups +mv tmpdir$$/entropy $entropy
--- a/snmf.xml Wed Apr 18 05:44:40 2018 -0400 +++ b/snmf.xml Tue Jan 08 11:19:23 2019 -0500 @@ -10,7 +10,7 @@ <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> <exit_code range="1:" level="fatal" /> </stdio> - <command interpreter="bash">./snmf.sh $vcf $outputs $logs $best_k_output $best_k_logfile $kmin $kmax $best_k_groups $threshold_group + <command interpreter="bash">./snmf.sh $vcf $outputs $logs $best_k_output $best_k_logfile $kmin $kmax $best_k_groups $threshold_group $entropy </command> <inputs> <param format="vcf" name="vcf" type="data" label="VCF file" help="VCF file"/> @@ -23,6 +23,7 @@ <data format="txt" name="best_k_groups" label="Best K Groups"/> <data format="txt" name="best_k_logfile" label="Best K Logfile"/> <data format="txt" name="outputs" label="Structure by sNMF"/> + <data format="tabular" name="entropy" label="Cross-entropy values"/> <data format="txt" name="logs" label="All Logs"/> </outputs>