Mercurial > repos > iuc > structure
changeset 0:a1574aada200 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/structure commit b4d0a8f3dfee920840c77befdf626c52a5d617cb
author | iuc |
---|---|
date | Wed, 15 Nov 2017 16:31:24 -0500 |
parents | |
children | 64e681a1cad5 |
files | structure.xml test-data/testdata1 test-data/testdata1_f |
diffstat | 3 files changed, 1283 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/structure.xml Wed Nov 15 16:31:24 2017 -0500 @@ -0,0 +1,536 @@ +<tool id="structure" name="Structure" version="2.3.4"> + <description>using multi-locus genotype data to investigate population structure</description> + <requirements> + <requirement type="package" version="2.3.4">structure</requirement> + </requirements> + <version_command><![CDATA[ + structure | grep -E -o 'Version.+' + ]]></version_command> + <command detect_errors="exit_code"><![CDATA[ + mv '$mainparams' '$out_mainparams' && + mv '$extraparams' '$out_extraparams' && + + mkdir out log + + #for $run in range(1, int($nb_run) + 1): + && structure -i '$infile' -o outfile -m '$out_mainparams' -e '$out_extraparams' > 'log/run${run}_K_${main.MAXPOPS}.log' + && mv 'outfile_f' 'out/run${run}_K_${main.MAXPOPS}.out' + #end for + + ]]></command> + <configfiles> + <configfile name="mainparams"><![CDATA[ +KEY PARAMETERS FOR THE PROGRAM structure. YOU WILL NEED TO SET THESE +IN ORDER TO RUN THE PROGRAM. VARIOUS OPTIONS CAN BE ADJUSTED IN THE +FILE extraparams. + + +"(int)" means that this takes an integer value. +"(B)" means that this variable is Boolean + (ie insert 1 for True, and 0 for False) +"(str)" means that this is a string (but not enclosed in quotes!) + + +Basic Program Parameters + +#define MAXPOPS $main.MAXPOPS // default:2 // (int) number of populations assumed +#define BURNIN $main.BURNIN // default:10000 // (int) length of burnin period +#define NUMREPS $main.NUMREPS // default:20000 // (int) number of MCMC reps after burnin + +Input/Output files + +#define INFILE $infile // (str) name of input data file +#define OUTFILE outfile //(str) name of output data file + +Data file format + +#define NUMINDS $main.NUMINDS // default:100 // (int) number of diploid individuals in data file +#define NUMLOCI $main.NUMLOCI // default:100 // (int) number of loci in data file +#define PLOIDY $main.PLOIDY // default:2 // (int) ploidy of data +#define MISSING $main.MISSING // default:-9 // (int) value given to missing genotype data +#define ONEROWPERIND $main.ONEROWPERIND // default:0 // (B) store data for individuals in a single line + + +#define LABEL $main.LABEL // default:1 // (B) Input file contains individual labels +#define POPDATA $main.POPDATA // default:1 // (B) Input file contains a population identifier +#define POPFLAG ${extra.usepopinfo_cond.POPFLAG} // default:0 // (B) Input file contains a flag which says + whether to use popinfo when USEPOPINFO==1 +#define LOCDATA $main.LOCDATA // default:0 // (B) Input file contains a location identifier + +#define PHENOTYPE $main.PHENOTYPE // default:0 // (B) Input file contains phenotype information +#define EXTRACOLS $main.EXTRACOLS // default:0 // (int) Number of additional columns of data + before the genotype data start. + +#define MARKERNAMES $main.MARKERNAMES // default:1 // (B) data file contains row of marker names +#define RECESSIVEALLELES $main.recessivealleles_cond.RECESSIVEALLELES // default:0 // (B) data file contains dominant markers (eg AFLPs) + // and a row to indicate which alleles are recessive +#define MAPDISTANCES $main.MAPDISTANCES // default:0 // (B) data file contains row of map distances + // between loci + + +Advanced data file options + +#define PHASED $main.PHASED // default:0 // (B) Data are in correct phase (relevant for linkage model only) +#define PHASEINFO $main.PHASEINFO // default:0 // (B) the data for each individual contains a line + indicating phase (linkage model) +#define MARKOVPHASE $main.MARKOVPHASE // default:0 // (B) the phase info follows a Markov model. +#define NOTAMBIGUOUS $main.recessivealleles_cond.NOTAMBIGUOUS // default:-999 // (int) for use in some analyses of polyploid data + + + +Command line options: + +-m mainparams +-e extraparams +-s stratparams +-K MAXPOPS +-L NUMLOCI +-N NUMINDS +-i input file +-o output file +-D SEED + + ]]></configfile> + <configfile name="extraparams"><![CDATA[ +EXTRA PARAMS FOR THE PROGRAM structure. THESE PARAMETERS CONTROL HOW THE +PROGRAM RUNS. ATTRIBUTES OF THE DATAFILE AS WELL AS K AND RUNLENGTH ARE +SPECIFIED IN mainparams. + +"(int)" means that this takes an integer value. +"(d)" means that this is a double (ie, a Real number such as 3.14). +"(B)" means that this variable is Boolean + (ie insert 1 for True, and 0 for False). + +PROGRAM OPTIONS + +#define NOADMIX $extra.NOADMIX // default:0 // (B) Use no admixture model (0=admixture model, 1=no-admix) +#define LINKAGE $extra.LINKAGE // default:0 // (B) Use the linkage model model +#define USEPOPINFO $extra.usepopinfo_cond.USEPOPINFO // default:0 // (B) Use prior population information to pre-assign individuals + to clusters +#define LOCPRIOR $extra.LOCPRIOR // default:0 //(B) Use location information to improve weak data + +#define FREQSCORR $extra.FREQSCORR // default:1 // (B) allele frequencies are correlated among pops +#define ONEFST $extra.ONEFST // default:0 // (B) assume same value of Fst for all subpopulations. + +#define INFERALPHA $extra.inferalpha_cond.INFERALPHA // default:1 // (B) Infer ALPHA (the admixture parameter) +#define POPALPHAS $extra.POPALPHAS // default:0 // (B) Individual alpha for each population +#define ALPHA $extra.inferalpha_cond.ALPHA // default:1.0 // (d) Dirichlet parameter for degree of admixture + (this is the initial value if INFERALPHA==1). + +#define INFERLAMBDA $extra.inferlambda_cond.INFERLAMBDA // default:0 // (B) Infer LAMBDA (the allele frequencies parameter) +#define POPSPECIFICLAMBDA $extra.inferlambda_cond.POPSPECIFICLAMBDA // default:0 //(B) infer a separate lambda for each pop + (only if INFERLAMBDA=1). +#define LAMBDA $extra.LAMBDA // default:1.0 // (d) Dirichlet parameter for allele frequencies + + + + +PRIORS + +#define FPRIORMEAN $extra.FPRIORMEAN // default:0.01 // (d) Prior mean and SD of Fst for pops. +#define FPRIORSD $extra.FPRIORSD // default:0.05 // (d) The prior is a Gamma distribution with these parameters + +#define UNIFPRIORALPHA $extra.unifprioralpha_cond.UNIFPRIORALPHA // default:1 // (B) use a uniform prior for alpha; + otherwise gamma prior +#define ALPHAMAX $extra.ALPHAMAX // default:10.0 // (d) max value of alpha if uniform prior +#define ALPHAPRIORA $extra.unifprioralpha_cond.ALPHAPRIORA // default:1.0 // (only if UNIFPRIORALPHA==0): alpha has a gamma + prior with mean A*B, and +#define ALPHAPRIORB $extra.unifprioralpha_cond.ALPHAPRIORB // default:2.0 // variance A*B^2. + + +#define LOG10RMIN $extra.LOG10RMIN // default:-4.0 //(d) Log10 of minimum allowed value of r under linkage model +#define LOG10RMAX $extra.LOG10RMAX // default:1.0 //(d) Log10 of maximum allowed value of r +#define LOG10RPROPSD $extra.LOG10RPROPSD // default:0.1 //(d) standard deviation of log r in update +#define LOG10RSTART $extra.LOG10RSTART // default:-2.0 //(d) initial value of log10 r + + +USING PRIOR POPULATION INFO (USEPOPINFO) + +#define GENSBACK $extra.GENSBACK // default:2 //(int) For use when inferring whether an indiv- + idual is an immigrant, or has an immigrant an- + cestor in the past GENSBACK generations. eg, if + GENSBACK==2, it tests for immigrant ancestry + back to grandparents. +#define MIGRPRIOR $extra.usepopinfo_cond.MIGRPRIOR // default:0.01 //(d) prior prob that an individual is a migrant + (used only when USEPOPINFO==1). This should + be small, eg 0.01 or 0.1. +#define PFROMPOPFLAGONLY $extra.PFROMPOPFLAGONLY // default:0 // (B) only use individuals with POPFLAG=1 to update P. + This is to enable use of a reference set of + individuals for clustering additional "test" + individuals. + +LOCPRIOR MODEL FOR USING LOCATION INFORMATION + +#define LOCISPOP $extra.LOCISPOP // default:1 //(B) use POPDATA for location information +#define LOCPRIORINIT $extra.LOCPRIORINIT // default:1.0 //(d) initial value for r, the location prior +#define MAXLOCPRIOR $extra.MAXLOCPRIOR // default:20.0 //(d) max allowed value for r + + + + +OUTPUT OPTIONS + +#define PRINTNET $extra.PRINTNET // default:1 // (B) Print the "net nucleotide distance" to screen during the run +#define PRINTLAMBDA $extra.PRINTLAMBDA // default:1 // (B) Print current value(s) of lambda to screen +#define PRINTQSUM $extra.PRINTQSUM // default:1 // (B) Print summary of current population membership to screen + +#define SITEBYSITE $extra.SITEBYSITE // default:0 // (B) whether or not to print site by site results. + (Linkage model only) This is a large file! +#define PRINTQHAT $extra.PRINTQHAT // default:0 // (B) Q-hat printed to a separate file. Turn this + on before using STRAT. +#define UPDATEFREQ $extra.UPDATEFREQ // default:100 // (int) frequency of printing update on the screen. + Set automatically if this is 0. +#define PRINTLIKES $extra.PRINTLIKES // default:0 // (B) print current likelihood to screen every rep +#define INTERMEDSAVE $extra.INTERMEDSAVE // default:0 // (int) number of saves to file during run + +#define ECHODATA $extra.ECHODATA // default:1 // (B) Print some of data file to screen to check + that the data entry is correct. +(NEXT 3 ARE FOR COLLECTING DISTRIBUTION OF Q:) +#define ANCESTDIST $extra.ANCESTDIST // default:0 // (B) collect data about the distribution of an- + cestry coefficients (Q) for each individual +#define NUMBOXES $extra.NUMBOXES // default:1000 // (int) the distribution of Q values is stored as + a histogram with this number of boxes. +#define ANCESTPINT $extra.ANCESTPINT // default:0.90 // (d) the size of the displayed probability + interval on Q (values between 0.0--1.0) + + + +MISCELLANEOUS + +#define COMPUTEPROB $extra.COMPUTEPROB // default:1 // (B) Estimate the probability of the Data under + the model. This is used when choosing the + best number of subpopulations. +#define ADMBURNIN $extra.ADMBURNIN // default:500 // (int) [only relevant for linkage model]: + Initial period of burnin with admixture model (see Readme) +#define ALPHAPROPSD $extra.ALPHAPROPSD // default:0.025 // (d) SD of proposal for updating alpha +#define STARTATPOPINFO $extra.STARTATPOPINFO // default:0 // Use given populations as the initial condition + for population origins. (Need POPDATA==1). It + is assumed that the PopData in the input file + are between 1 and k where k<=MAXPOPS. +#define RANDOMIZE $extra.randomize_cond.RANDOMIZE // default:1 // (B) use new random seed for each run +#define SEED $extra.randomize_cond.SEED // default:2245 // (int) seed value for random number generator + (must set RANDOMIZE=0) +#define METROFREQ $extra.METROFREQ // default:10 // (int) Frequency of using Metropolis step to update + Q under admixture model (ie use the metr. move every + i steps). If this is set to 0, it is never used. + (Proposal for each q^(i) sampled from prior. The + goal is to improve mixing for small alpha.) +#define REPORTHITRATE $extra.REPORTHITRATE // default:0 // (B) report hit rate if using METROFREQ + + ]]></configfile> + </configfiles> + <inputs> + <param name="infile" type="data" label="Genotype data" format="tabular" /> + <param name="nb_run" value="1" type="integer" label="Number of runs" min="1" max="10" help="Note that the runs are sequential. Please launch separate runs if it's too long" /> + <section name="main" title="mainparams" expanded="True"> + <!--Basic Program Parameters--> + <param argument="MAXPOPS" value="" type="integer" label="Number of populations assumed" help="or [K]"/> + <param argument="BURNIN" value="10000" type="integer" label="Length of burnin period" /> + <param argument="NUMREPS" value="20000" type="integer" label="Number of MCMC reps after burnin" /> + + <!--Data file format--> + <param argument="NUMINDS" value="" type="integer" label="Number of diploid individuals in data file" help="or [N]"/> + <param argument="NUMLOCI" value="" type="integer" label="Number of loci in data file" help="or [L]"/> + <param argument="PLOIDY" value="2" type="integer" label="Ploidy of data" /> + <param argument="MISSING" value="-9" type="integer" label="Value given to missing genotype data" /> + <param argument="ONEROWPERIND" checked="False" type="boolean" label="Store data for individuals in a single line" truevalue="1" falsevalue="0" help=" E.g., for diploid data, this would mean that the two alleles for each locus are in consecutive order in the same row, rather than being arranged in the same column, in two consecutive rows "/> + + + <param argument="LABEL" checked="true" type="boolean" label="Input file contains individual labels" truevalue="1" falsevalue="0" /> + <param argument="POPDATA" checked="true" type="boolean" label="Input file contains a user-defined population-of-origin for each individual" truevalue="1" falsevalue="0" /> + <param argument="LOCDATA" checked="false" type="boolean" label="Input file contains a location identifier" truevalue="1" falsevalue="0" /> + + <param argument="PHENOTYPE" checked="false" type="boolean" label="Input file contains phenotype information" truevalue="1" falsevalue="0" /> + <param argument="EXTRACOLS" value="0" type="integer" label="Number of additional columns of data before the genotype data start." /> + + <param argument="MARKERNAMES" checked="true" type="boolean" label="Data file contains row of marker names" truevalue="1" falsevalue="0" /> + <conditional name="recessivealleles_cond"> + <param argument="RECESSIVEALLELES" type="select" label="Data file contains dominant markers (eg AFLPs) and a row to indicate which alleles are recessive" > + <option value="0" selected="True">No</option> + <option value="1">Yes</option> + </param> + <when value="0"> + <param argument="NOTAMBIGUOUS" value="-999" type="hidden" label="Defines the code indicating that genotype data at a marker are unambiguous." help="For use with polyploids when RECESSIVEALLELES=1/True. Must not match MISSING or any allele value in the data." /> + </when> + <when value="1"> + <param argument="NOTAMBIGUOUS" value="-999" type="integer" label="Defines the code indicating that genotype data at a marker are unambiguous." help="For use with polyploids when RECESSIVEALLELES=1/True. Must not match MISSING or any allele value in the data." /> + </when> + </conditional> + <param argument="MAPDISTANCES" checked="false" type="boolean" label="Data file contains row of map distances between loci" truevalue="1" falsevalue="0" /> + + + <!--Advanced data file options--> + + <param argument="PHASED" checked="false" type="boolean" label="Data are in correct phase (relevant for linkage model only)" truevalue="1" falsevalue="0" /> + <param argument="PHASEINFO" checked="false" type="boolean" label="The data for each individual contains a line indicating phase (linkage model)" truevalue="1" falsevalue="0" /> + <param argument="MARKOVPHASE" checked="false" type="boolean" label="The phase info follows a Markov model." truevalue="1" falsevalue="0" /> + </section> + <section name="extra" title="extraparams" expanded="False"> + + <param argument="NOADMIX" checked="false" type="boolean" label="Use no admixture model" help="(0/False=admixture model, 1/True=no-admix)" truevalue="1" falsevalue="0" /> + <param argument="LINKAGE" checked="false" type="boolean" label="Use the linkage model model" truevalue="1" falsevalue="0" /> + <conditional name="usepopinfo_cond"> + <param argument="USEPOPINFO" type="select" label="Use prior population information to pre-assign individuals to clusters"> + <option value="0" selected="True">No</option> + <option value="1">Yes</option> + </param> + <when value="0"> + <param argument="POPFLAG" value="0" type="hidden" label="Input file contains a flag which says whether to use popinfo" help="[mainparams] when USEPOPINFO is 1/True" /> + <param argument="MIGRPRIOR" value="0.01" type="hidden" label="Prior prob that an individual is a migrant" help="(used only when USEPOPINFO==1/True). This should be small, eg 0.01 or 0.1." /> + </when> + <when value="1"> + <param argument="POPFLAG" checked="false" type="boolean" label="Input file contains a flag which says whether to use popinfo" help="[mainparams] when USEPOPINFO is 1/True" truevalue="1" falsevalue="0" /> + <param argument="MIGRPRIOR" value="0.01" type="float" label="Prior prob that an individual is a migrant" help="(used only when USEPOPINFO==1/True). This should be small, eg 0.01 or 0.1." /> + </when> + </conditional> + <param argument="LOCPRIOR" checked="false" type="boolean" label="Use location information to improve weak data" truevalue="1" falsevalue="0" /> + + <param argument="FREQSCORR" checked="true" type="boolean" label="Allele frequencies are correlated among pops" truevalue="1" falsevalue="0" /> + <param argument="ONEFST" checked="false" type="boolean" label="Assume same value of Fst for all subpopulations" truevalue="1" falsevalue="0" /> + + <conditional name="inferalpha_cond"> + <param argument="INFERALPHA" type="select" label="Infer ALPHA (the admixture parameter)"> + <option value="1" selected="True">Yes</option> + <option value="0">No</option> + </param> + <when value="1"> + <param argument="ALPHA" value="1.0" type="float" label="Dirichlet parameter for degree of admixture" help="this is the initial value if INFERALPHA is 1/True." /> + </when> + <when value="0"> + <param argument="ALPHA" value="1.0" type="hidden" label="Dirichlet parameter for degree of admixture" help="this is the initial value if INFERALPHA is 1/True." /> + </when> + </conditional> + <param argument="POPALPHAS" checked="false" type="boolean" label="Individual alpha for each population" truevalue="1" falsevalue="0" /> + + <conditional name="inferlambda_cond"> + <param argument="INFERLAMBDA" type="select" label="Infer LAMBDA (the allele frequencies parameter)"> + <option value="0" selected="True">No</option> + <option value="1">Yes</option> + </param> + <when value="0"> + <param argument="POPSPECIFICLAMBDA" value="0" type="hidden" label="Infer a separate lambda for each pop" help="(only if INFERLAMBDA=1/True)." /> + </when> + <when value="1"> + <param argument="POPSPECIFICLAMBDA" checked="false" type="boolean" label="Infer a separate lambda for each pop" help="(only if INFERLAMBDA=1/True)." truevalue="1" falsevalue="0" /> + </when> + </conditional> + <param argument="LAMBDA" value="1.0" type="float" label="Dirichlet parameter for allele frequencies" /> + + + <!-- PRIORS --> + + <param argument="FPRIORMEAN" value="0.01" type="float" label="The Prior (Gamma distribution) mean of Fst for pops." /> + <param argument="FPRIORSD" value="0.05" type="float" label="The Prior (Gamma distribution) Standard Deviation of Fst for pops." /> + + <conditional name="unifprioralpha_cond"> + <param argument="UNIFPRIORALPHA" type="select" label="Use a uniform prior for alpha; otherwise gamma prior"> + <option value="1" selected="True">Yes</option> + <option value="0">No</option> + </param> + <when value="1"> + <param argument="ALPHAPRIORA" value="1.0" type="hidden" label="Alpha has a gamma prior with mean A*B, and variance A*B^2." help="(only if UNIFPRIORALPHA==0/False)" /> + <param argument="ALPHAPRIORB" value="2.0" type="hidden" label="Alpha has a gamma prior with mean A*B, and variance A*B^2." help="(only if UNIFPRIORALPHA==0/False)" /> + </when> + <when value="0"> + <param argument="ALPHAPRIORA" value="1.0" type="float" label="Alpha has a gamma prior with mean A*B, and variance A*B^2." help="(only if UNIFPRIORALPHA==0/False)"/> + <param argument="ALPHAPRIORB" value="2.0" type="float" label="Alpha has a gamma prior with mean A*B, and variance A*B^2." help="(only if UNIFPRIORALPHA==0/False)"/> + </when> + </conditional> + <param argument="ALPHAMAX" value="10.0" type="float" label="Max value of alpha if uniform prior" /> + + + <param argument="LOG10RMIN" value="-4.0" type="float" label="Log10 of minimum allowed value of r under linkage model" /> + <param argument="LOG10RMAX" value="1.0" type="float" label="Log10 of maximum allowed value of r" /> + <param argument="LOG10RPROPSD" value="0.1" type="float" label="Standard deviation of log r in update" /> + <param argument="LOG10RSTART" value="-2.0" type="float" label="Initial value of log10 r" /> + + + <!-- USING PRIOR POPULATION INFO (USEPOPINFO) --> + + <param argument="GENSBACK" value="2" type="integer" label="For use when inferring whether an individual is an immigrant, or has an immigrant an cestor in the past GENSBACK generations." help="eg, if GENSBACK==2, it tests for immigrant ancestry back to grandparents." /> + <param argument="PFROMPOPFLAGONLY" checked="false" type="boolean" label="Only use individuals with POPFLAG=1 to update P." help="This is to enable use of a reference set of individuals for clustering additional 'test' individuals." truevalue="1" falsevalue="0" /> + + <!-- LOCPRIOR MODEL FOR USING LOCATION INFORMATION --> + + <param argument="LOCISPOP" checked="true" type="boolean" label="Use POPDATA for location information" truevalue="1" falsevalue="0" /> + <param argument="LOCPRIORINIT" value="1.0" type="float" label="Initial value for r, the location prior" /> + <param argument="MAXLOCPRIOR" value="20.0" type="float" label="Max allowed value for r" /> + + <!-- OUTPUT OPTIONS --> + + <param argument="PRINTNET" checked="true" type="boolean" label="Print the 'net nucleotide distance' to screen during the run" truevalue="1" falsevalue="0" /> + <param argument="PRINTLAMBDA" checked="true" type="boolean" label="Print current value(s) of lambda to screen" truevalue="1" falsevalue="0" /> + <param argument="PRINTQSUM" checked="true" type="boolean" label="Print summary of current population membership to screen" truevalue="1" falsevalue="0" /> + + <param argument="SITEBYSITE" checked="false" type="boolean" label="whether or not to print site by site results." help="(Linkage model only) This is a large file!" truevalue="1" falsevalue="0" /> + <param argument="PRINTQHAT" checked="false" type="boolean" label="Q-hat printed to a separate file." help="Turn this on before using STRAT." truevalue="1" falsevalue="0" /> + <param argument="UPDATEFREQ" value="100" type="integer" label="Frequency of printing update on the screen." help="Set automatically if this is 0/False." /> + <param argument="PRINTLIKES" checked="false" type="boolean" label="Print current likelihood to screen every rep" truevalue="1" falsevalue="0" /> + <param argument="INTERMEDSAVE" value="0" type="integer" label="Number of saves to file during run" /> + + <param argument="ECHODATA" checked="false" type="boolean" label="Print some of data file to screen to check that the data entry is correct." help="(NEXT 3 ARE FOR COLLECTING DISTRIBUTION OF Q:)" truevalue="1" falsevalue="0" /> + <param argument="ANCESTDIST" checked="false" type="boolean" label="Collect data about the distribution of ancestry coefficients (Q) for each individual" truevalue="1" falsevalue="0" /> + <param argument="NUMBOXES" value="1000" type="integer" label="The distribution of Q values is stored as a histogram with this number of boxes." /> + <param argument="ANCESTPINT" value="0.90" type="float" label="The size of the displayed probability interval on Q (values between 0.0--1.0)" /> + + + + <!-- MISCELLANEOUS --> + + <param argument="COMPUTEPROB" checked="true" type="boolean" label="Estimate the probability of the Data under the model." help="This is used when choosing the best number of subpopulations." truevalue="1" falsevalue="0" /> + <param argument="ADMBURNIN" value="500" type="integer" label="Initial period of burnin with admixture model" help="[only relevant for linkage model] see Documentation" /> + <param argument="ALPHAPROPSD" value="0.025" type="float" label="SD of proposal for updating alpha" /> + <param argument="STARTATPOPINFO" checked="false" type="boolean" label="Use given populations as the initial condition for population origins." help="(Need POPDATA==1). It is assumed that the PopData in the input file are between 1 and k where k is less or equal MAXPOPS." truevalue="1" falsevalue="0" /> + <conditional name="randomize_cond"> + <param argument="RANDOMIZE" type="select" label="=use new random seed for each run"> + <option value="1" selected="True">Yes</option> + <option value="0">No</option> + </param> + <when value="1"> + <param argument="SEED" value="2245" type="hidden" label="Seed value for random number generator" help="(must set RANDOMIZE=0)" /> + </when> + <when value="0"> + <param argument="SEED" value="2245" type="integer" label="seed value for random number generator" help="(must set RANDOMIZE=0)" /> + </when> + </conditional> + <param argument="METROFREQ" value="10" type="integer" label="Frequency of using Metropolis step to update Q under admixture model" help="(ie use the metr. move every i steps). If this is set to 0, it is never used. (Proposal for each q^(i) sampled from prior. The goal is to improve mixing for small alpha.)" /> + <param argument="REPORTHITRATE" checked="false" type="boolean" label="Report hit rate if using METROFREQ" truevalue="1" falsevalue="0" /> + </section> + </inputs> + <outputs> + <data name="out_mainparams" format="txt" label="run_K_${main.MAXPOPS}.mainparams" /> + <data name="out_extraparams" format="txt" label="run_K_${main.MAXPOPS}.extraparams" /> + <collection name="out" type="list" label="run_K_${main.MAXPOPS}.out"> + <discover_datasets pattern="__name__" format="tabular" directory="out" /> + </collection> + <collection name="log" type="list" label="run_K_${main.MAXPOPS}.log"> + <discover_datasets pattern="__name__" format="tabular" directory="log" /> + </collection> + </outputs> + <tests> + <test> + <!-- https://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/html/structure-data.html --> + <param name="infile" value="testdata1" /> + <param name="nb_run" value="2" /> + <section name="main"> + <param name="NUMINDS" value="200" /> + <param name="MAXPOPS" value="2" /> + <param name="LABEL" value="1" /> + <param name="POPDATA" value="1" /> + <param name="NUMLOCI" value="5" /> + <param name="LOCDATA" value="1" /> + <param name="PLOIDY" value="2" /> + <param name="MISSING" value="-999" /> + <param name="ONEROWPERIND" value="0" /> + <param name="MARKERNAMES" value="0" /> + </section> + <section name="extra"> + <conditional name="randomize_cond"> + <param name="RANDOMIZE" value="0" /> + </conditional> + </section> + <output_collection name="out" type="list"> + <element name="run1_K_2.out" value="testdata1_f" lines_diff="6" /> + <element name="run2_K_2.out" value="testdata1_f" lines_diff="6" /> + </output_collection> + <output_collection name="log" type="list"> + <element name="run1_K_2.log"> + <assert_contents> + <has_line line="Final results printed to file outfile_f" /> + </assert_contents> + </element> + <element name="run2_K_2.log"> + <assert_contents> + <has_line line="Final results printed to file outfile_f" /> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + <help><![CDATA[ +**Introduction** + +The program structure_ implements a model-based clustering method for inferring population structure +using genotype data consisting of unlinked markers. The method was introduced in a paper +by Pritchard, Stephens and Donnelly (2000a) and extended in sequels by Falush, Stephens and +Pritchard (2003a, 2007). Applications of our method include demonstrating the presence of population +structure, identifying distinct genetic populations, assigning individuals to populations, and +identifying migrants and admixed individuals. + +Briefly, we assume a model in which there are K populations (where K may be unknown), +each of which is characterized by a set of allele frequencies at each locus. Individuals in the +sample are assigned (probabilistically) to populations, or jointly to two or more populations if their +genotypes indicate that they are admixed. It is assumed that within populations, the loci are at +Hardy-Weinberg equilibrium, and linkage equilibrium. Loosely speaking, individuals are assigned +to populations in such a way as to achieve this. + +Our model does not assume a particular mutation process, and it can be applied to most of the +commonly used genetic markers including microsatellites, SNPs and RFLPs. The model assumes +that markers are not in linkage disequilibrium (LD) within subpopulations, so we can’t handle +markers that are extremely close together. Starting with version 2.0, we can now deal with weakly +linked markers. + +While the computational approaches implemented here are fairly powerful, some care is needed +in running the program in order to ensure sensible answers. For example, it is not possible to +determine suitable run-lengths theoretically, and this requires some experimentation on the part of +the user. This document describes the use and interpretation of the software and supplements the +published papers, which provide more formal descriptions and evaluations of the methods. + +.. _structure: https://web.stanford.edu/group/pritchardlab/structure.html + +**Documentation** + +Please see the full Sructure documentation_ + +.. _documentation: https://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/structure_doc.pdf + +**Upstream** + +Inputs can be produced from: + +- Microsatellite analysis +- RADSeq analysis (eg: using populations_ from Stacks suite) + +.. _populations: http://catchenlab.life.illinois.edu/stacks/manual/#export + +**Input** + +======= === ===== ===== ===== ===== ===== + loc_a loc_b loc_c loc_d loc_e +======= === ===== ===== ===== ===== ===== +George 1 -9 145 66 0 92 +George 1 -9 -9 64 0 94 +Paula 1 106 142 68 1 92 +Paula 1 106 148 64 0 94 +Matthew 2 110 145 -9 0 92 +Matthew 2 110 148 66 1 -9 +Bob 2 108 142 64 1 94 +Bob 2 -9 142 -9 0 94 +Anja 1 112 142 -9 1 -9 +Anja 1 114 142 66 1 94 +Peter 1 -9 145 66 0 -9 +Peter 1 110 145 -9 1 -9 +Carsten 2 108 145 62 0 -9 +Carsten 2 110 145 64 1 92 +======= === ===== ===== ===== ===== ===== + +You will find other sample data sets: here_ + +.. _here: https://web.stanford.edu/group/pritchardlab/structure_software/release_versions/v2.3.4/html/structure-data.html + +**Downstream** + +- Clumpp_ +- Distruct_ +- Structure-harvester_ + +.. _Clumpp: https://rosenberglab.stanford.edu/clumpp.html +.. _Distruct: https://rosenberglab.stanford.edu/distruct.html +.. _Structure-harvester: http://taylor0.biology.ucla.edu/structureHarvester/ + + ]]></help> + <citations> + <citation type="doi">10.1111/j.1471-8286.2007.01758.x</citation> + <citation type="doi">10.1111/j.1755-0998.2009.02591.x</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/testdata1 Wed Nov 15 16:31:24 2017 -0500 @@ -0,0 +1,400 @@ +1 1 0 0 1 3 8 9 +1 1 0 1 -1 -1 7 -3 +2 1 0 -1 2 2 6 7 +2 1 0 0 5 0 9 7 +3 1 0 -1 2 0 2 8 +3 1 0 0 2 4 10 9 +4 1 0 -1 2 -1 8 6 +4 1 0 -1 -2 0 9 9 +5 1 0 0 1 3 7 8 +5 1 0 -1 2 1 5 7 +6 1 0 3 3 -1 7 6 +6 1 0 -1 6 0 5 8 +7 1 0 -1 3 -1 5 9 +7 1 0 1 3 -1 8 -3 +8 1 0 -2 6 -1 7 7 +8 1 0 -1 -1 3 8 8 +9 1 0 0 3 0 14 7 +9 1 0 -1 2 3 6 6 +10 1 0 -1 3 2 5 8 +10 1 0 1 2 2 8 8 +11 1 0 2 0 3 9 7 +11 1 0 0 2 0 5 6 +12 1 0 1 3 2 7 3 +12 1 0 -1 1 -1 7 7 +13 1 0 -1 5 -1 4 5 +13 1 0 1 -1 2 7 9 +14 1 0 1 3 2 10 7 +14 1 0 1 2 2 8 7 +15 1 0 -3 5 3 10 6 +15 1 0 -1 7 -1 8 7 +16 1 0 0 3 3 4 7 +16 1 0 1 5 -1 8 7 +17 1 0 1 2 2 9 3 +17 1 0 -1 3 -1 2 -3 +18 1 0 0 5 2 10 -2 +18 1 0 0 -1 2 7 7 +19 1 0 1 6 -1 5 8 +19 1 0 2 6 -1 2 5 +20 1 0 1 -1 3 14 -1 +20 1 0 -1 2 0 2 6 +21 1 0 -1 2 -1 8 8 +21 1 0 1 6 0 2 7 +22 1 0 -1 3 3 14 -3 +22 1 0 1 2 0 8 3 +23 1 0 2 5 2 7 5 +23 1 0 0 2 0 8 7 +24 1 0 -1 2 0 2 8 +24 1 0 1 1 0 2 8 +25 1 0 -1 2 0 6 -3 +25 1 0 -1 0 5 10 7 +26 1 0 2 3 0 10 8 +26 1 0 2 7 0 7 8 +27 1 0 1 2 -1 10 7 +27 1 0 1 3 -1 10 8 +28 1 0 -1 2 2 8 5 +28 1 0 -1 -1 3 6 -1 +29 1 0 2 2 5 10 -3 +29 1 0 -1 2 1 7 9 +30 1 0 1 7 0 10 7 +30 1 0 0 3 3 10 7 +31 1 0 0 3 0 10 8 +31 1 0 -1 3 1 8 7 +32 1 0 1 2 3 7 8 +32 1 0 -1 2 3 10 -1 +33 1 0 -1 0 -1 7 6 +33 1 0 1 2 0 7 9 +34 1 0 -3 3 0 5 7 +34 1 0 -1 2 -1 4 8 +35 1 0 -1 6 2 7 9 +35 1 0 -1 2 2 10 7 +36 1 0 0 5 0 14 6 +36 1 0 1 2 2 6 5 +37 1 0 1 7 4 8 9 +37 1 0 0 5 3 10 8 +38 1 0 -1 -2 2 8 -3 +38 1 0 2 6 2 6 7 +39 1 0 1 0 1 14 9 +39 1 0 1 2 3 8 7 +40 1 0 2 2 2 5 -2 +40 1 0 0 -1 3 4 7 +41 1 0 -1 7 0 2 8 +41 1 0 -1 5 0 13 8 +42 1 0 -1 2 -1 9 -2 +42 1 0 0 2 3 14 9 +43 1 0 1 6 1 4 8 +43 1 0 0 1 0 5 6 +44 1 0 -1 2 0 10 8 +44 1 0 2 4 0 4 5 +45 1 0 -1 3 2 5 7 +45 1 0 -3 3 0 9 7 +46 1 0 -1 3 2 10 7 +46 1 0 -1 2 2 8 8 +47 1 0 0 2 4 10 8 +47 1 0 1 2 -1 5 5 +48 1 0 3 1 2 10 7 +48 1 0 1 2 0 6 8 +49 1 0 0 -1 0 7 7 +49 1 0 2 0 -1 14 6 +50 1 0 1 6 3 5 6 +50 1 0 2 3 3 5 7 +51 1 1 -1 3 3 9 8 +51 1 1 -1 2 2 10 8 +52 1 1 -1 5 0 8 7 +52 1 1 -1 1 2 7 7 +53 1 1 0 3 2 9 7 +53 1 1 -3 -1 5 6 8 +54 1 1 -1 3 4 7 -3 +54 1 1 2 -2 0 10 -3 +55 1 1 0 2 0 13 7 +55 1 1 1 -1 1 10 5 +56 1 1 0 3 3 14 8 +56 1 1 -1 -2 -1 10 7 +57 1 1 2 4 2 7 9 +57 1 1 2 2 -1 7 7 +58 1 1 0 2 0 5 9 +58 1 1 -1 3 -3 13 4 +59 1 1 3 5 0 9 -3 +59 1 1 2 4 2 7 -3 +60 1 1 -2 2 3 8 9 +60 1 1 0 1 1 10 -1 +61 1 1 -1 -1 2 14 7 +61 1 1 -2 5 4 10 -3 +62 1 1 2 5 4 10 -3 +62 1 1 2 2 0 6 8 +63 1 1 2 3 -2 10 8 +63 1 1 0 5 -1 10 -1 +64 1 1 -1 2 0 5 8 +64 1 1 0 3 -2 6 -4 +65 1 1 0 2 3 10 7 +65 1 1 1 2 0 8 -1 +66 1 1 -1 4 1 7 -1 +66 1 1 -1 2 -1 9 5 +67 1 1 0 2 1 14 8 +67 1 1 -1 2 -1 5 8 +68 1 1 -1 3 -1 10 7 +68 1 1 0 3 -1 10 7 +69 1 1 1 2 3 5 3 +69 1 1 1 4 4 9 7 +70 1 1 -1 -1 0 14 -3 +70 1 1 -1 -2 0 10 -3 +71 1 1 1 1 1 10 5 +71 1 1 -3 2 2 13 5 +72 1 1 0 -1 3 4 7 +72 1 1 1 -1 -1 8 5 +73 1 1 2 6 3 8 7 +73 1 1 2 6 3 6 6 +74 1 1 -3 1 2 7 5 +74 1 1 2 -1 0 5 8 +75 1 1 -1 4 3 10 6 +75 1 1 -3 3 -1 7 9 +76 1 1 -1 2 3 9 7 +76 1 1 2 2 0 8 6 +77 1 1 0 -1 1 10 7 +77 1 1 -1 3 0 10 9 +78 1 1 2 3 3 7 7 +78 1 1 2 3 2 7 8 +79 1 1 -1 -1 -2 8 5 +79 1 1 -1 2 3 10 8 +80 1 1 2 -3 1 10 8 +80 1 1 -1 6 4 7 -3 +81 1 1 1 3 0 2 7 +81 1 1 -1 2 3 4 -3 +82 1 1 2 -1 1 14 8 +82 1 1 -1 2 3 10 -1 +83 1 1 0 5 -1 8 -3 +83 1 1 -1 6 2 4 7 +84 1 1 0 -3 2 8 -2 +84 1 1 1 3 4 14 9 +85 1 1 -1 3 2 8 -3 +85 1 1 -1 3 2 4 7 +86 1 1 -1 -1 0 13 8 +86 1 1 -1 -1 3 10 7 +87 1 1 -1 5 -1 9 8 +87 1 1 1 3 -1 6 -3 +88 1 1 1 2 2 5 5 +88 1 1 -1 6 4 10 -2 +89 1 1 0 2 3 14 7 +89 1 1 0 -1 0 7 8 +90 1 1 0 3 3 8 7 +90 1 1 3 3 -1 9 7 +91 1 1 1 -1 2 10 7 +91 1 1 1 1 0 5 5 +92 1 1 2 5 1 5 5 +92 1 1 1 5 -1 10 -3 +93 1 1 -1 3 4 9 -2 +93 1 1 3 4 3 10 8 +94 1 1 3 1 -1 8 7 +94 1 1 0 -1 2 10 8 +95 1 1 3 2 3 5 7 +95 1 1 1 2 1 8 7 +96 1 1 1 -1 3 8 8 +96 1 1 0 2 2 10 -2 +97 1 1 2 3 2 10 7 +97 1 1 0 2 0 9 7 +98 1 1 -1 2 3 8 7 +98 1 1 -1 -1 0 8 -2 +99 1 1 3 0 -1 8 8 +99 1 1 -1 3 -1 14 7 +100 1 1 -1 6 0 6 7 +100 1 1 -1 2 2 2 -3 +101 2 0 -2 1 1 5 -2 +101 2 0 -4 2 4 3 7 +102 2 0 -3 -2 2 8 -3 +102 2 0 2 2 4 5 -3 +103 2 0 2 3 2 5 -2 +103 2 0 -2 2 4 15 -2 +104 2 0 -3 -4 1 17 4 +104 2 0 -1 3 -1 5 -4 +105 2 0 1 1 0 5 6 +105 2 0 -1 1 1 17 7 +106 2 0 -3 5 4 15 -2 +106 2 0 -3 2 3 6 -1 +107 2 0 1 -3 3 17 7 +107 2 0 1 2 3 7 -3 +108 2 0 -4 -3 1 5 6 +108 2 0 -2 -1 3 6 -3 +109 2 0 -3 3 5 7 -1 +109 2 0 -3 1 2 17 4 +110 2 0 1 2 7 6 -3 +110 2 0 -4 -4 1 4 -3 +111 2 0 -2 0 1 5 -4 +111 2 0 1 -4 3 7 5 +112 2 0 0 5 5 4 -5 +112 2 0 1 -4 3 7 -2 +113 2 0 2 1 5 7 7 +113 2 0 -2 1 0 15 7 +114 2 0 1 -4 1 9 4 +114 2 0 -1 0 4 16 4 +115 2 0 2 -3 1 6 5 +115 2 0 2 2 1 5 -3 +116 2 0 1 2 4 6 6 +116 2 0 -2 -2 3 17 -4 +117 2 0 -2 2 0 3 -2 +117 2 0 -3 1 4 5 -3 +118 2 0 -3 2 0 17 6 +118 2 0 -4 -3 1 17 -1 +119 2 0 2 5 0 5 5 +119 2 0 -3 2 4 5 4 +120 2 0 1 2 1 6 -2 +120 2 0 -1 -3 1 14 -4 +121 2 0 1 0 3 3 -3 +121 2 0 0 1 1 15 -2 +122 2 0 -2 2 7 6 -1 +122 2 0 -3 -4 4 5 6 +123 2 0 1 -3 7 15 5 +123 2 0 -5 -1 1 6 -3 +124 2 0 -1 -3 0 7 -2 +124 2 0 -3 2 4 5 4 +125 2 0 -4 3 4 15 4 +125 2 0 -3 2 1 14 -3 +126 2 0 1 -4 0 5 -1 +126 2 0 -3 -4 7 14 -2 +127 2 0 1 -4 4 6 -3 +127 2 0 1 2 1 15 -2 +128 2 0 1 2 1 15 -1 +128 2 0 -1 1 3 6 -4 +129 2 0 -5 -4 1 15 -3 +129 2 0 3 -4 3 17 4 +130 2 0 1 1 4 6 -1 +130 2 0 -2 2 2 14 6 +131 2 0 -3 3 0 14 -3 +131 2 0 -4 1 0 5 1 +132 2 0 -4 2 6 17 7 +132 2 0 -2 3 4 15 5 +133 2 0 -2 3 4 14 7 +133 2 0 -2 2 -1 15 -2 +134 2 0 -2 -3 2 16 6 +134 2 0 0 3 3 9 4 +135 2 0 -3 2 0 5 7 +135 2 0 -3 -4 1 6 4 +136 2 0 2 -4 1 15 -2 +136 2 0 -2 2 1 14 -2 +137 2 0 1 -4 0 14 7 +137 2 0 -1 1 1 6 -1 +138 2 0 -3 5 0 17 1 +138 2 0 -3 1 4 15 4 +139 2 0 2 2 1 17 2 +139 2 0 1 1 3 6 -4 +140 2 0 -2 2 4 5 -1 +140 2 0 -2 -4 1 3 4 +141 2 0 3 -3 1 3 4 +141 2 0 0 0 0 6 4 +142 2 0 -4 2 4 5 -2 +142 2 0 1 3 0 16 3 +143 2 0 -5 -3 6 15 -3 +143 2 0 1 -4 7 15 5 +144 2 0 2 1 5 7 -2 +144 2 0 1 -3 1 5 5 +145 2 0 -3 2 3 15 5 +145 2 0 1 -4 1 17 7 +146 2 0 -4 2 1 15 -2 +146 2 0 -3 -4 0 15 -1 +147 2 0 1 5 3 6 5 +147 2 0 1 2 4 3 5 +148 2 0 -2 -3 0 17 -3 +148 2 0 -3 -4 5 6 6 +149 2 0 -3 2 0 17 -2 +149 2 0 -4 2 4 6 -3 +150 2 0 -2 -1 5 5 -3 +150 2 0 1 1 1 14 6 +151 2 1 2 -3 1 5 -2 +151 2 1 -3 -4 1 14 3 +152 2 1 1 2 2 15 -1 +152 2 1 2 2 2 5 7 +153 2 1 -4 2 3 14 -2 +153 2 1 1 -4 1 16 4 +154 2 1 -2 3 3 17 4 +154 2 1 2 1 4 3 -4 +155 2 1 1 3 5 14 -3 +155 2 1 -1 3 2 6 1 +156 2 1 2 3 1 17 -2 +156 2 1 -3 1 2 14 -4 +157 2 1 -4 2 0 14 -3 +157 2 1 2 2 3 17 -4 +158 2 1 -3 2 4 6 7 +158 2 1 0 2 3 15 -2 +159 2 1 1 1 1 14 7 +159 2 1 -2 1 1 5 -2 +160 2 1 -3 2 1 15 -4 +160 2 1 -2 -3 3 14 5 +161 2 1 -1 2 0 5 5 +161 2 1 -2 -3 0 5 -4 +162 2 1 1 -1 0 17 4 +162 2 1 -4 -1 1 5 -2 +163 2 1 1 2 0 5 -1 +163 2 1 -4 1 0 5 -2 +164 2 1 -3 1 0 16 -3 +164 2 1 1 2 1 7 -3 +165 2 1 -4 1 2 5 1 +165 2 1 -2 -3 1 15 6 +166 2 1 -2 1 3 5 -3 +166 2 1 -1 -4 1 14 6 +167 2 1 -4 1 1 15 -2 +167 2 1 -1 2 2 15 -2 +168 2 1 1 0 7 3 -2 +168 2 1 1 2 1 5 -3 +169 2 1 -3 -4 1 14 -3 +169 2 1 -2 -4 1 15 -3 +170 2 1 0 -4 0 5 7 +170 2 1 1 -4 3 17 -2 +171 2 1 -3 2 1 6 -2 +171 2 1 -2 2 4 5 4 +172 2 1 -2 2 3 14 1 +172 2 1 -3 2 4 3 0 +173 2 1 -3 2 4 17 1 +173 2 1 -3 2 1 17 -3 +174 2 1 1 -4 3 16 -3 +174 2 1 -1 3 3 6 7 +175 2 1 -4 2 3 17 7 +175 2 1 -2 3 0 6 -3 +176 2 1 1 -4 3 3 5 +176 2 1 0 -4 1 5 -2 +177 2 1 1 3 0 5 -3 +177 2 1 -2 -4 4 14 -3 +178 2 1 1 2 1 16 -1 +178 2 1 1 -1 3 15 -2 +179 2 1 1 -3 0 17 4 +179 2 1 -4 2 4 3 -4 +180 2 1 2 0 1 6 -2 +180 2 1 -2 2 3 17 -2 +181 2 1 -3 2 4 16 -2 +181 2 1 -4 1 4 8 -3 +182 2 1 0 2 1 3 -3 +182 2 1 1 3 4 6 -1 +183 2 1 -2 2 -1 17 -2 +183 2 1 -3 -3 7 14 -3 +184 2 1 1 1 1 3 5 +184 2 1 2 1 1 6 4 +185 2 1 -4 -4 3 7 -2 +185 2 1 1 3 1 17 2 +186 2 1 -1 3 4 14 -2 +186 2 1 -3 -3 2 5 -2 +187 2 1 0 2 4 15 -2 +187 2 1 -2 2 1 16 -4 +188 2 1 -2 2 3 17 -2 +188 2 1 -4 1 4 7 -4 +189 2 1 -1 -3 2 15 5 +189 2 1 -3 -3 3 9 4 +190 2 1 -3 2 1 17 -2 +190 2 1 -2 -4 0 14 -4 +191 2 1 1 3 2 15 6 +191 2 1 2 1 1 5 -1 +192 2 1 -3 5 1 7 -3 +192 2 1 3 2 3 6 -2 +193 2 1 -2 -3 4 9 -4 +193 2 1 -4 3 0 5 -3 +194 2 1 -4 2 7 5 4 +194 2 1 1 2 1 8 5 +195 2 1 1 2 3 3 -1 +195 2 1 -2 2 1 17 5 +196 2 1 -2 3 5 15 -2 +196 2 1 1 2 1 7 -4 +197 2 1 -2 1 0 5 -3 +197 2 1 -1 2 5 14 5 +198 2 1 2 1 3 6 6 +198 2 1 -3 3 -1 5 -2 +199 2 1 -3 1 3 6 5 +199 2 1 -3 5 1 5 -2 +200 2 1 -3 1 1 16 -3 +200 2 1 1 5 4 15 5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/testdata1_f Wed Nov 15 16:31:24 2017 -0500 @@ -0,0 +1,347 @@ + + +---------------------------------------------------- +STRUCTURE by Pritchard, Stephens and Donnelly (2000) + and Falush, Stephens and Pritchard (2003) + Code by Pritchard, Falush and Hubisz + Version 2.3.4 (Jul 2012) +---------------------------------------------------- + + + +Command line arguments: structure -i /tmp/tmpd8muoH/files/000/dataset_1.dat -o outfile -m /tmp/tmpd8muoH/job_working_directory/000/2/tmpKOW8yP -e /tmp/tmpd8muoH/job_working_directory/000/2/tmpLDe27N +Input File: /tmp/tmpd8muoH/files/000/dataset_1.dat + +Run parameters: + 200 individuals + 5 loci + 2 populations assumed + 10000 Burn-in period + 20000 Reps +RANDOMIZE turned off + + +-------------------------------------------- +Proportion of membership of each pre-defined + population in each of the 2 clusters + +Given Inferred Clusters Number of + Pop 1 2 Individuals + + 1: 0.033 0.967 100 + 2: 0.961 0.039 100 +-------------------------------------------- + +Allele-freq. divergence among pops (Net nucleotide distance), +computed using point estimates of P. + + 1 2 + 1 - 0.0691 + 2 0.0691 - + +Average distances (expected heterozygosity) between individuals in same cluster: +cluster 1 : 0.8378 +cluster 2 : 0.8195 + +-------------------------------------------- +Estimated Ln Prob of Data = -3966.7 +Mean value of ln likelihood = -3924.5 +Variance of ln likelihood = 84.4 +Mean value of alpha = 0.0428 + +Mean value of Fst_1 = 0.0712 +Mean value of Fst_2 = 0.0966 + + +Inferred ancestry of individuals: + Label (%Miss) Pop: Inferred clusters + 1 1 (0) 1 : 0.016 0.984 + 2 2 (0) 1 : 0.011 0.989 + 3 3 (0) 1 : 0.010 0.990 + 4 4 (0) 1 : 0.008 0.992 + 5 5 (0) 1 : 0.044 0.956 + 6 6 (0) 1 : 0.009 0.991 + 7 7 (0) 1 : 0.011 0.989 + 8 8 (0) 1 : 0.021 0.979 + 9 9 (0) 1 : 0.036 0.964 + 10 10 (0) 1 : 0.009 0.991 + 11 11 (0) 1 : 0.047 0.953 + 12 12 (0) 1 : 0.016 0.984 + 13 13 (0) 1 : 0.008 0.992 + 14 14 (0) 1 : 0.009 0.991 + 15 15 (0) 1 : 0.012 0.988 + 16 16 (0) 1 : 0.008 0.992 + 17 17 (0) 1 : 0.012 0.988 + 18 18 (0) 1 : 0.012 0.988 + 19 19 (0) 1 : 0.009 0.991 + 20 20 (0) 1 : 0.040 0.960 + 21 21 (0) 1 : 0.007 0.993 + 22 22 (0) 1 : 0.048 0.952 + 23 23 (0) 1 : 0.010 0.990 + 24 24 (0) 1 : 0.012 0.988 + 25 25 (0) 1 : 0.041 0.959 + 26 26 (0) 1 : 0.006 0.994 + 27 27 (0) 1 : 0.008 0.992 + 28 28 (0) 1 : 0.024 0.976 + 29 29 (0) 1 : 0.051 0.949 + 30 30 (0) 1 : 0.007 0.993 + 31 31 (0) 1 : 0.009 0.991 + 32 32 (0) 1 : 0.019 0.981 + 33 33 (0) 1 : 0.013 0.987 + 34 34 (0) 1 : 0.022 0.978 + 35 35 (0) 1 : 0.006 0.994 + 36 36 (0) 1 : 0.173 0.827 + 37 37 (0) 1 : 0.008 0.992 + 38 38 (0) 1 : 0.010 0.990 + 39 39 (0) 1 : 0.062 0.938 + 40 40 (0) 1 : 0.044 0.956 + 41 41 (0) 1 : 0.006 0.994 + 42 42 (0) 1 : 0.030 0.970 + 43 43 (0) 1 : 0.050 0.950 + 44 44 (0) 1 : 0.008 0.992 + 45 45 (0) 1 : 0.026 0.974 + 46 46 (0) 1 : 0.006 0.994 + 47 47 (0) 1 : 0.027 0.973 + 48 48 (0) 1 : 0.018 0.982 + 49 49 (0) 1 : 0.014 0.986 + 50 50 (0) 1 : 0.046 0.954 + 51 51 (0) 1 : 0.007 0.993 + 52 52 (0) 1 : 0.010 0.990 + 53 53 (0) 1 : 0.036 0.964 + 54 54 (0) 1 : 0.024 0.976 + 55 55 (0) 1 : 0.019 0.981 + 56 56 (0) 1 : 0.008 0.992 + 57 57 (0) 1 : 0.007 0.993 + 58 58 (0) 1 : 0.131 0.869 + 59 59 (0) 1 : 0.016 0.984 + 60 60 (0) 1 : 0.187 0.813 + 61 61 (0) 1 : 0.099 0.901 + 62 62 (0) 1 : 0.032 0.968 + 63 63 (0) 1 : 0.008 0.992 + 64 64 (0) 1 : 0.138 0.862 + 65 65 (0) 1 : 0.017 0.983 + 66 66 (0) 1 : 0.025 0.975 + 67 67 (0) 1 : 0.026 0.974 + 68 68 (0) 1 : 0.006 0.994 + 69 69 (0) 1 : 0.078 0.922 + 70 70 (0) 1 : 0.014 0.986 + 71 71 (0) 1 : 0.252 0.748 + 72 72 (0) 1 : 0.008 0.992 + 73 73 (0) 1 : 0.011 0.989 + 74 74 (0) 1 : 0.079 0.921 + 75 75 (0) 1 : 0.014 0.986 + 76 76 (0) 1 : 0.012 0.988 + 77 77 (0) 1 : 0.009 0.991 + 78 78 (0) 1 : 0.008 0.992 + 79 79 (0) 1 : 0.007 0.993 + 80 80 (0) 1 : 0.147 0.853 + 81 81 (0) 1 : 0.015 0.985 + 82 82 (0) 1 : 0.032 0.968 + 83 83 (0) 1 : 0.007 0.993 + 84 84 (0) 1 : 0.265 0.735 + 85 85 (0) 1 : 0.008 0.992 + 86 86 (0) 1 : 0.006 0.994 + 87 87 (0) 1 : 0.013 0.987 + 88 88 (0) 1 : 0.090 0.910 + 89 89 (0) 1 : 0.011 0.989 + 90 90 (0) 1 : 0.007 0.993 + 91 91 (0) 1 : 0.036 0.964 + 92 92 (0) 1 : 0.053 0.947 + 93 93 (0) 1 : 0.021 0.979 + 94 94 (0) 1 : 0.008 0.992 + 95 95 (0) 1 : 0.069 0.931 + 96 96 (0) 1 : 0.017 0.983 + 97 97 (0) 1 : 0.008 0.992 + 98 98 (0) 1 : 0.017 0.983 + 99 99 (0) 1 : 0.008 0.992 +100 100 (0) 1 : 0.012 0.988 +101 101 (0) 2 : 0.991 0.009 +102 102 (0) 2 : 0.343 0.657 +103 103 (0) 2 : 0.979 0.021 +104 104 (0) 2 : 0.959 0.041 +105 105 (0) 2 : 0.929 0.071 +106 106 (0) 2 : 0.988 0.012 +107 107 (0) 2 : 0.931 0.069 +108 108 (0) 2 : 0.984 0.016 +109 109 (0) 2 : 0.979 0.021 +110 110 (0) 2 : 0.977 0.023 +111 111 (0) 2 : 0.986 0.014 +112 112 (0) 2 : 0.726 0.274 +113 113 (0) 2 : 0.903 0.097 +114 114 (0) 2 : 0.977 0.023 +115 115 (0) 2 : 0.983 0.017 +116 116 (0) 2 : 0.970 0.030 +117 117 (0) 2 : 0.992 0.008 +118 118 (0) 2 : 0.992 0.008 +119 119 (0) 2 : 0.965 0.035 +120 120 (0) 2 : 0.986 0.014 +121 121 (0) 2 : 0.983 0.017 +122 122 (0) 2 : 0.992 0.008 +123 123 (0) 2 : 0.989 0.011 +124 124 (0) 2 : 0.977 0.023 +125 125 (0) 2 : 0.991 0.009 +126 126 (0) 2 : 0.993 0.007 +127 127 (0) 2 : 0.993 0.007 +128 128 (0) 2 : 0.982 0.018 +129 129 (0) 2 : 0.991 0.009 +130 130 (0) 2 : 0.976 0.024 +131 131 (0) 2 : 0.986 0.014 +132 132 (0) 2 : 0.985 0.015 +133 133 (0) 2 : 0.936 0.064 +134 134 (0) 2 : 0.897 0.103 +135 135 (0) 2 : 0.987 0.013 +136 136 (0) 2 : 0.992 0.008 +137 137 (0) 2 : 0.940 0.060 +138 138 (0) 2 : 0.992 0.008 +139 139 (0) 2 : 0.989 0.011 +140 140 (0) 2 : 0.994 0.006 +141 141 (0) 2 : 0.976 0.024 +142 142 (0) 2 : 0.978 0.022 +143 143 (0) 2 : 0.993 0.007 +144 144 (0) 2 : 0.983 0.017 +145 145 (0) 2 : 0.986 0.014 +146 146 (0) 2 : 0.993 0.007 +147 147 (0) 2 : 0.960 0.040 +148 148 (0) 2 : 0.991 0.009 +149 149 (0) 2 : 0.990 0.010 +150 150 (0) 2 : 0.980 0.020 +151 151 (0) 2 : 0.990 0.010 +152 152 (0) 2 : 0.665 0.335 +153 153 (0) 2 : 0.993 0.007 +154 154 (0) 2 : 0.991 0.009 +155 155 (0) 2 : 0.733 0.267 +156 156 (0) 2 : 0.984 0.016 +157 157 (0) 2 : 0.983 0.017 +158 158 (0) 2 : 0.955 0.045 +159 159 (0) 2 : 0.986 0.014 +160 160 (0) 2 : 0.992 0.008 +161 161 (0) 2 : 0.970 0.030 +162 162 (0) 2 : 0.972 0.028 +163 163 (0) 2 : 0.984 0.016 +164 164 (0) 2 : 0.981 0.019 +165 165 (0) 2 : 0.992 0.008 +166 166 (0) 2 : 0.981 0.019 +167 167 (0) 2 : 0.984 0.016 +168 168 (0) 2 : 0.991 0.009 +169 169 (0) 2 : 0.994 0.006 +170 170 (0) 2 : 0.965 0.035 +171 171 (0) 2 : 0.992 0.008 +172 172 (0) 2 : 0.991 0.009 +173 173 (0) 2 : 0.993 0.007 +174 174 (0) 2 : 0.879 0.121 +175 175 (0) 2 : 0.974 0.026 +176 176 (0) 2 : 0.987 0.013 +177 177 (0) 2 : 0.985 0.015 +178 178 (0) 2 : 0.984 0.016 +179 179 (0) 2 : 0.993 0.007 +180 180 (0) 2 : 0.989 0.011 +181 181 (0) 2 : 0.974 0.026 +182 182 (0) 2 : 0.973 0.027 +183 183 (0) 2 : 0.981 0.019 +184 184 (0) 2 : 0.991 0.009 +185 185 (0) 2 : 0.987 0.013 +186 186 (0) 2 : 0.969 0.031 +187 187 (0) 2 : 0.989 0.011 +188 188 (0) 2 : 0.991 0.009 +189 189 (0) 2 : 0.952 0.048 +190 190 (0) 2 : 0.993 0.007 +191 191 (0) 2 : 0.951 0.049 +192 192 (0) 2 : 0.920 0.080 +193 193 (0) 2 : 0.981 0.019 +194 194 (0) 2 : 0.963 0.037 +195 195 (0) 2 : 0.990 0.010 +196 196 (0) 2 : 0.987 0.013 +197 197 (0) 2 : 0.953 0.047 +198 198 (0) 2 : 0.821 0.179 +199 199 (0) 2 : 0.987 0.013 +200 200 (0) 2 : 0.989 0.011 + + +Estimated Allele Frequencies in each cluster +First column gives estimated ancestral frequencies + + +Locus 1 : +9 alleles +0.0% missing data + 0 (0.115) 0.049 0.191 + 1 (0.219) 0.245 0.208 + -1 (0.160) 0.077 0.357 + 3 (0.065) 0.018 0.041 + -2 (0.099) 0.189 0.015 + 2 (0.133) 0.088 0.144 + -3 (0.125) 0.206 0.039 + -4 (0.052) 0.112 0.002 + -5 (0.032) 0.016 0.001 + +Locus 2 : +12 alleles +0.0% missing data + 1 (0.117) 0.170 0.060 + -1 (0.082) 0.033 0.123 + 2 (0.241) 0.330 0.306 + 5 (0.083) 0.042 0.091 + -2 (0.046) 0.009 0.030 + 3 (0.141) 0.112 0.216 + 6 (0.038) 0.003 0.073 + 0 (0.064) 0.033 0.031 + 7 (0.031) 0.002 0.025 + 4 (0.033) 0.002 0.035 + -3 (0.068) 0.112 0.008 + -4 (0.056) 0.153 0.003 + +Locus 3 : +11 alleles +0.0% missing data + 3 (0.167) 0.173 0.200 + -1 (0.081) 0.021 0.187 + 2 (0.117) 0.065 0.208 + 0 (0.174) 0.157 0.236 + 4 (0.122) 0.175 0.056 + 1 (0.155) 0.309 0.071 + 5 (0.065) 0.045 0.019 + -3 (0.025) 0.002 0.005 + -2 (0.029) 0.002 0.015 + 7 (0.036) 0.040 0.002 + 6 (0.028) 0.011 0.001 + +Locus 4 : +14 alleles +0.0% missing data + 8 (0.074) 0.015 0.168 + 7 (0.108) 0.061 0.140 + 6 (0.120) 0.145 0.067 + 9 (0.075) 0.022 0.080 + 2 (0.036) 0.002 0.049 + 10 (0.046) 0.004 0.224 + 5 (0.155) 0.208 0.112 + 14 (0.109) 0.123 0.073 + 4 (0.055) 0.011 0.052 + 13 (0.033) 0.003 0.024 + 3 (0.042) 0.069 0.002 + 15 (0.057) 0.149 0.004 + 17 (0.050) 0.140 0.002 + 16 (0.039) 0.049 0.002 + +Locus 5 : +15 alleles +0.0% missing data + 9 (0.037) 0.003 0.082 + -3 (0.131) 0.181 0.116 + 7 (0.131) 0.078 0.312 + 8 (0.043) 0.003 0.215 + 6 (0.085) 0.066 0.071 + 3 (0.046) 0.013 0.021 + 5 (0.106) 0.105 0.082 + -2 (0.117) 0.226 0.041 + -1 (0.083) 0.080 0.042 + 4 (0.055) 0.108 0.006 + -4 (0.050) 0.085 0.005 + -5 (0.024) 0.005 0.002 + 1 (0.037) 0.030 0.002 + 2 (0.028) 0.011 0.001 + 0 (0.025) 0.006 0.001 + +Values of parameters used in structure: +DATAFILE=/tmp/tmpd8muoH/files/000/dataset_1.dat, OUTFILE=outfile, NUMINDS=200, NUMLOCI=5, MISSING=-999, LABEL=1, POPDATA=1, POPFLAG=0, PHENOTYPE=0, EXTRACOLS=0, MAXPOPS=2, BURNIN=10000, NUMREPS=20000, USEPOPINFO=0, INFERALPHA=1, INFERLAMBDA=0, POPSPECIFICLAMBDA=0, POPALPHAS=0, COMPUTEPROB=1, NOADMIX=0, ADMBURNIN=500, UPDATEFREQ=100, PRINTLIKES=0, INTERMEDSAVE=0, PRINTKLD=0, PRINTNET=1, PRINTLAMBDA=1, ANCESTDIST=0, NUMBOXES=1000, ANCESTPINT=0.90000, GENSBACK=2, MIGRPRIOR=0.01000, PRINTQHAT=0, PRINTQSUM=1, ALPHA=1.0000, FREQSCORR=1, FPRIORMEAN=0.0100, FPRIORSD=0.0500, ONEFST=0, LAMBDA=1.0000, UNIFPRIORALPHA=1, ALPHAMAX=10.0000, ALPHAPRIORA=1.0000, ALPHAPRIORB=2.0000, ALPHAPROPSD=0.0250, STARTATPOPINFO=0, RANDOMIZE=0, LINKAGE=0, METROFREQ=10, REPORTHITRATE=0, MARKOVPHASE=0, PHASED=0, PLOIDY=2, PHASEINFO=0 LOCPRIOR=0, LOCPRIORINIT=1.000000, LOCDATA=1, LOCISPOP=1, LOCPRIORSTEP=0.100000, MAXLOCPRIOR=20.000000, SEED=2245, +[STRAT parameters]: NUMSIMSTATS=1000, PHENOTYPECOL=-9, POOLFREQ=10, LOCUSxONLY=0, EMERROR=0.00100, MISSINGPHENO=-9, \ No newline at end of file