# HG changeset patch # User ylebrascnrs # Date 1505392385 14400 # Node ID 2c0b270dae705df04e5661ef7c945a5022c376cb Uploaded diff -r 000000000000 -r 2c0b270dae70 structure-923cc9e6aa30/Structure.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/structure-923cc9e6aa30/Structure.py Thu Sep 14 08:33:05 2017 -0400 @@ -0,0 +1,389 @@ +#!/usr/bin/env python +""" +Structure is a script for model-based clustering method for inferring population structure using genotype data + +Created by Yvan LE BRAS +""" +import optparse, os, sys, subprocess, tempfile, glob, shutil +import zipfile, tarfile, gzip +from os.path import basename + +def __main__(): + + + # arguments recuperation + parser = optparse.OptionParser() + parser.add_option("--input") + parser.add_option("--param") + parser.add_option("--extraparam") + # multifile management + parser.add_option("--K") + parser.add_option("--k2") + parser.add_option("--k3") + parser.add_option("--k4") + parser.add_option("--k5") + parser.add_option("--k6") + parser.add_option("--k7") + parser.add_option("--k8") + parser.add_option("--k9") + parser.add_option("--k10") + parser.add_option("--t") + parser.add_option("--N") + parser.add_option("--L") + parser.add_option("--D") + # output management + parser.add_option("--logfile") + parser.add_option("--id") + parser.add_option("--workdir") + parser.add_option("--compress_output") + # additionnal outputs + parser.add_option("--total_output") + (options, args) = parser.parse_args() + + # create the working dir + tmp_dir = tempfile.mkdtemp(dir=options.workdir) + tmp_output_dir = tempfile.mkdtemp(dir=tmp_dir) + + print tmp_dir + + # create the structure command input line + cmd_files=" -m "+options.param+" -e "+options.extraparam+" -i "+options.input+" " + + # create the structure command line. The idea is to run one job by k value then take all results from tmp_out_putdir + cmd_options="" + cmd_options0="" + cmd_options1="" + cmd_options2="" + cmd_options3="" + cmd_options4="" + cmd_options5="" + cmd_options6="" + cmd_options7="" + cmd_options8="" + cmd_options9="" + cmd_options10="" + cmd_options11="" + cmd_options12="" + cmd_options13="" + cmd_options14="" + cmd_options15="" + cmd_options16="" + cmd_options17="" + cmd_options18="" + cmd_options19="" + cmd_options20="" + cmd_options21="" + cmd_options22="" + cmd_options23="" + cmd_options24="" + cmd_options25="" + cmd_options26="" + cmd_options27="" + cmd_options28="" + cmd_options29="" + + if options.N: + cmd_options0+=" -N "+options.N + if options.L: + cmd_options0+=" -L "+options.L + if options.D: + cmd_options0+=" -D "+options.D + if options.K: + cmd_options+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_f" + if options.k2: + cmd_options1+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_f" + if options.k3: + cmd_options2+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_f" + if options.k4: + cmd_options3+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_f" + if options.k5: + cmd_options4+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_f" + if options.k6: + cmd_options5+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_f" + if options.k7: + cmd_options6+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_f" + if options.k8: + cmd_options7+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_f" + if options.k9: + cmd_options8+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_f" + if options.k10: + cmd_options9+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_f" + if options.K and options.t and options.t == 'true': + cmd_options10+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run2_f" + if options.k2 and options.t and options.t == 'true': + cmd_options11+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run2_f" + if options.k3 and options.t and options.t == 'true': + cmd_options12+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run2_f" + if options.k4 and options.t and options.t == 'true': + cmd_options13+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run2_f" + if options.k5 and options.t and options.t == 'true': + cmd_options14+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run2_f" + if options.k6 and options.t and options.t == 'true': + cmd_options15+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run2_f" + if options.k7 and options.t and options.t == 'true': + cmd_options16+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run2_f" + if options.k8 and options.t and options.t == 'true': + cmd_options17+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run2_f" + if options.k9 and options.t and options.t == 'true': + cmd_options18+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run2_f" + if options.k10 and options.t and options.t == 'true': + cmd_options19+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run2_f" + if options.K and options.t and options.t == 'true': + cmd_options20+=" -K "+options.K+" -o "+tmp_output_dir+"/outfile_run3_f" + if options.k2 and options.t and options.t == 'true': + cmd_options21+=" -K "+options.k2+" -o "+tmp_output_dir+"/outfilesecondk_run3_f" + if options.k3 and options.t and options.t == 'true': + cmd_options22+=" -K "+options.k3+" -o "+tmp_output_dir+"/outfilethirdk_run3_f" + if options.k4 and options.t and options.t == 'true': + cmd_options23+=" -K "+options.k4+" -o "+tmp_output_dir+"/outfilefourthk_run3_f" + if options.k5 and options.t and options.t == 'true': + cmd_options24+=" -K "+options.k5+" -o "+tmp_output_dir+"/outfilefifthk_run3_f" + if options.k6 and options.t and options.t == 'true': + cmd_options25+=" -K "+options.k6+" -o "+tmp_output_dir+"/outfilesixthk_run3_f" + if options.k7 and options.t and options.t == 'true': + cmd_options26+=" -K "+options.k7+" -o "+tmp_output_dir+"/outfileseventhk_run3_f" + if options.k8 and options.t and options.t == 'true': + cmd_options27+=" -K "+options.k8+" -o "+tmp_output_dir+"/outfileeigthk_run3_f" + if options.k9 and options.t and options.t == 'true': + cmd_options28+=" -K "+options.k9+" -o "+tmp_output_dir+"/outfileninthk_run3_f" + if options.k10 and options.t and options.t == 'true': + cmd_options29+=" -K "+options.k10+" -o "+tmp_output_dir+"/outfiletenthk_run3_f" + + + if options.t and options.t == 'true' and not options.K: + cmd='structure'+cmd_files+" "+cmd_options0+" 2>&1" + + # execute command line + proc = subprocess.Popen( args=cmd, shell=True ) + returncode = proc.wait() + + print "\n[INFO] : "+cmd + + if options.t and options.t == 'true' and options.K: + cmd1='structure'+cmd_files+" "+cmd_options0+" "+cmd_options+" 2>&1" + + # execute command line + proc = subprocess.Popen( args=cmd1, shell=True ) + returncode = proc.wait() + + print "\n[INFO] : "+cmd1 + + #os.system("mv outfile_f outfilefirstk_f") + + if options.k2: + cmd2='structure'+cmd_files+" "+cmd_options0+" "+cmd_options1+" 2>&1" + + proc = subprocess.Popen( args=cmd2, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilesecondk_f") + + if options.k3: + cmd3='structure'+cmd_files+" "+cmd_options0+" "+cmd_options2+" 2>&1" + + proc = subprocess.Popen( args=cmd3, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilethirdk_f") + + if options.k4: + cmd4='structure'+cmd_files+" "+cmd_options0+" "+cmd_options3+" 2>&1" + + proc = subprocess.Popen( args=cmd4, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilefourthk_f") + + if options.k5: + cmd5='structure'+cmd_files+" "+cmd_options0+" "+cmd_options4+" 2>&1" + + proc = subprocess.Popen( args=cmd5, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilefifthk_f") + + if options.k6: + cmd6='structure'+cmd_files+" "+cmd_options0+" "+cmd_options5+" 2>&1" + + proc = subprocess.Popen( args=cmd6, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilesixthk_f") + + if options.k7: + cmd7='structure'+cmd_files+" "+cmd_options0+" "+cmd_options6+" 2>&1" + + proc = subprocess.Popen( args=cmd7, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfileseventhk_f") + + if options.k8: + cmd8='structure'+cmd_files+" "+cmd_options0+" "+cmd_options7+" 2>&1" + + proc = subprocess.Popen( args=cmd8, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfileeigthk_f") + + if options.k9: + cmd9='structure'+cmd_files+" "+cmd_options0+" "+cmd_options8+" 2>&1" + + proc = subprocess.Popen( args=cmd9, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfileninthk_f") + + if options.k10: + cmd10='structure'+cmd_files+" "+cmd_options0+" "+cmd_options9+" 2>&1" + + proc = subprocess.Popen( args=cmd10, shell=True ) + returncode = proc.wait() + + if options.K and options.t and options.t == 'true': + cmd11='structure'+cmd_files+" "+cmd_options0+" "+cmd_options10+" 2>&1" + cmd21='structure'+cmd_files+" "+cmd_options0+" "+cmd_options20+" 2>&1" + + proc = subprocess.Popen( args=cmd11, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd21, shell=True ) + returncode = proc.wait() + + if options.k2 and options.t and options.t == 'true': + cmd12='structure'+cmd_files+" "+cmd_options0+" "+cmd_options11+" 2>&1" + cmd22='structure'+cmd_files+" "+cmd_options0+" "+cmd_options21+" 2>&1" + + proc = subprocess.Popen( args=cmd12, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd22, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilesecondk_f") + + if options.k3 and options.t and options.t == 'true': + cmd13='structure'+cmd_files+" "+cmd_options0+" "+cmd_options12+" 2>&1" + cmd23='structure'+cmd_files+" "+cmd_options0+" "+cmd_options22+" 2>&1" + + proc = subprocess.Popen( args=cmd13, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd23, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilethirdk_f") + + if options.k4 and options.t and options.t == 'true': + cmd14='structure'+cmd_files+" "+cmd_options0+" "+cmd_options13+" 2>&1" + cmd24='structure'+cmd_files+" "+cmd_options0+" "+cmd_options23+" 2>&1" + + proc = subprocess.Popen( args=cmd14, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd24, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilefourthk_f") + + if options.k5 and options.t and options.t == 'true': + cmd15='structure'+cmd_files+" "+cmd_options0+" "+cmd_options14+" 2>&1" + cmd25='structure'+cmd_files+" "+cmd_options0+" "+cmd_options24+" 2>&1" + + proc = subprocess.Popen( args=cmd15, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd25, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilefifthk_f") + + if options.k6 and options.t and options.t == 'true': + cmd16='structure'+cmd_files+" "+cmd_options0+" "+cmd_options15+" 2>&1" + cmd26='structure'+cmd_files+" "+cmd_options0+" "+cmd_options25+" 2>&1" + + proc = subprocess.Popen( args=cmd16, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd26, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfilesixthk_f") + + if options.k7 and options.t and options.t == 'true': + cmd17='structure'+cmd_files+" "+cmd_options0+" "+cmd_options16+" 2>&1" + cmd27='structure'+cmd_files+" "+cmd_options0+" "+cmd_options26+" 2>&1" + + proc = subprocess.Popen( args=cmd17, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd27, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfileseventhk_f") + + if options.k8 and options.t and options.t == 'true': + cmd18='structure'+cmd_files+" "+cmd_options0+" "+cmd_options17+" 2>&1" + cmd28='structure'+cmd_files+" "+cmd_options0+" "+cmd_options27+" 2>&1" + + proc = subprocess.Popen( args=cmd18, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd28, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfileeigthk_f") + + if options.k9 and options.t and options.t == 'true': + cmd19='structure'+cmd_files+" "+cmd_options0+" "+cmd_options18+" 2>&1" + cmd29='structure'+cmd_files+" "+cmd_options0+" "+cmd_options28+" 2>&1" + + proc = subprocess.Popen( args=cmd19, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd29, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfileninthk_f") + + if options.k10 and options.t and options.t == 'true': + cmd20='structure'+cmd_files+" "+cmd_options0+" "+cmd_options19+" 2>&1" + cmd30='structure'+cmd_files+" "+cmd_options0+" "+cmd_options29+" 2>&1" + + proc = subprocess.Popen( args=cmd20, shell=True ) + returncode = proc.wait() + proc = subprocess.Popen( args=cmd30, shell=True ) + returncode = proc.wait() + + #os.system("mv outfile_f outfiletenthk_f") + + # postprocesses + #if os.path.exists(tmp_output_dir+'/outfile_f'): + # os.system('mv '+tmp_output_dir+'/outfile_f '+options.logfile) + #else: + # sys.stderr.write('Error in structure execution; Please read the additional output (stdout)\n') + + + # copy all files inside tmp_dir into workdir + list_files = glob.glob(tmp_output_dir+'/*') + + + # if compress output is total + if options.compress_output == 'total': + mytotalzipfile=zipfile.ZipFile(tmp_output_dir+'/total.zip.temp', 'w') + os.chdir(tmp_output_dir) + + for i in list_files: + mytotalzipfile.write(os.path.basename(i)) + #command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt" + #proc = subprocess.Popen( args=command, shell=True ) + #returncode = proc.wait() + + # return the unique archive + os.system("mv "+tmp_output_dir+'/total.zip.temp'+" "+options.total_output) + + # if compress output is default + if options.compress_output == 'default': + + for i in list_files: + command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_txt" + proc = subprocess.Popen( args=command, shell=True ) + returncode = proc.wait() + + + + #clean up temp files + shutil.rmtree( tmp_dir ) + +if __name__=="__main__": __main__() diff -r 000000000000 -r 2c0b270dae70 structure-923cc9e6aa30/Structure.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/structure-923cc9e6aa30/Structure.xml Thu Sep 14 08:33:05 2017 -0400 @@ -0,0 +1,282 @@ + + model-based clustering method for inferring population structure using genotype data + + +Structure.py +--input $input +--param $parameters +--extraparam $extraparameters +#if str( $options_pops.options_pops_selector ) == "yes": +--K $options_pops.k_value +#if str( $options_pops.options_popsvar.options_popsvar_selector ) == "yes": +#if str( $options_pops.options_popsvar.options_popsnumber.options_popsnumber_selector ) == "2": +--k2 $options_pops.options_popsvar.options_popsnumber.k2 +#end if +#if str( $options_pops.options_popsvar.options_popsnumber.options_popsnumber_selector ) == "3": +--k2 $options_pops.options_popsvar.options_popsnumber.k2 +--k3 $options_pops.options_popsvar.options_popsnumber.k3 +#end if +#if str( $options_pops.options_popsvar.options_popsnumber.options_popsnumber_selector ) == "4": +--k2 $options_pops.options_popsvar.options_popsnumber.k2 +--k3 $options_pops.options_popsvar.options_popsnumber.k3 +--k4 $options_pops.options_popsvar.options_popsnumber.k4 +#end if +#if str( $options_pops.options_popsvar.options_popsnumber.options_popsnumber_selector ) == "5": +--k2 $options_pops.options_popsvar.options_popsnumber.k2 +--k3 $options_pops.options_popsvar.options_popsnumber.k3 +--k4 $options_pops.options_popsvar.options_popsnumber.k4 +--k5 $options_pops.options_popsvar.options_popsnumber.k5 +#end if +#if str( $options_pops.options_popsvar.options_popsnumber.options_popsnumber_selector ) == "6": +--k2 $options_pops.options_popsvar.options_popsnumber.k2 +--k3 $options_pops.options_popsvar.options_popsnumber.k3 +--k4 $options_pops.options_popsvar.options_popsnumber.k4 +--k5 $options_pops.options_popsvar.options_popsnumber.k5 +--k6 $options_pops.options_popsvar.options_popsnumber.k6 +#end if +#if str( $options_pops.options_popsvar.options_popsnumber.options_popsnumber_selector ) == "7": +--k2 $options_pops.options_popsvar.options_popsnumber.k2 +--k3 $options_pops.options_popsvar.options_popsnumber.k3 +--k4 $options_pops.options_popsvar.options_popsnumber.k4 +--k5 $options_pops.options_popsvar.options_popsnumber.k5 +--k6 $options_pops.options_popsvar.options_popsnumber.k6 +--k7 $options_pops.options_popsvar.options_popsnumber.k7 +#end if +#if str( $options_pops.options_popsvar.options_popsnumber.options_popsnumber_selector ) == "8": +--k2 $options_pops.options_popsvar.options_popsnumber.k2 +--k3 $options_pops.options_popsvar.options_popsnumber.k3 +--k4 $options_pops.options_popsvar.options_popsnumber.k4 +--k5 $options_pops.options_popsvar.options_popsnumber.k5 +--k6 $options_pops.options_popsvar.options_popsnumber.k6 +--k7 $options_pops.options_popsvar.options_popsnumber.k7 +--k8 $options_pops.options_popsvar.options_popsnumber.k8 +#end if +#if str( $options_pops.options_popsvar.options_popsnumber.options_popsnumber_selector ) == "9": +--k2 $options_pops.options_popsvar.options_popsnumber.k2 +--k3 $options_pops.options_popsvar.options_popsnumber.k3 +--k4 $options_pops.options_popsvar.options_popsnumber.k4 +--k5 $options_pops.options_popsvar.options_popsnumber.k5 +--k6 $options_pops.options_popsvar.options_popsnumber.k6 +--k7 $options_pops.options_popsvar.options_popsnumber.k7 +--k8 $options_pops.options_popsvar.options_popsnumber.k8 +--k9 $options_pops.options_popsvar.options_popsnumber.k9 +#end if +#if str( $options_pops.options_popsvar.options_popsnumber.options_popsnumber_selector ) == "10": +--k2 $options_pops.options_popsvar.options_popsnumber.k2 +--k3 $options_pops.options_popsvar.options_popsnumber.k3 +--k4 $options_pops.options_popsvar.options_popsnumber.k4 +--k5 $options_pops.options_popsvar.options_popsnumber.k5 +--k6 $options_pops.options_popsvar.options_popsnumber.k6 +--k7 $options_pops.options_popsvar.options_popsnumber.k7 +--k8 $options_pops.options_popsvar.options_popsnumber.k8 +--k9 $options_pops.options_popsvar.options_popsnumber.k9 +--k10 $options_pops.options_popsvar.options_popsnumber.k10 +#end if +#end if +#end if +--t $sevruns +#if str( $options_ind.options_ind_selector ) == "yes": +--N $options_ind.N_value +#end if +#if str( $options_loci.options_loci_selector ) == "yes": +--L $options_loci.L_value +#end if +#if str( $options_seed.options_seed_selector ) == "yes": +--D $options_seed.D_value +#end if +--logfile $output +--id $output.id +--workdir $__new_file_path__ +--compress_output $output_compress +--total_output $total_output + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + output_compress == "total" + + + + + +.. class:: infomark + +**What it does** + +The program structure implements a model-based clustering method for inferring population structure using genotype data consisting of unlinked markers. The method was introduced in a paper by Pritchard, Stephens and Donnelly (2000) and extended in sequels by Falush, Stephens and Pritchard (2003, 2007). Applications of our method include demonstrating the presence of population structure, identifying distinct genetic populations, assigning individuals to populations, and identifying migrants and admixed individuals. + + +Input file need to be in structure data format. + + +The output files resume standard output (log) and results. + +-------- + + +**References:** + +Pritchard, J. K., Stephens, M., and Donnelly, P. 2000 . Inference of population structure using multilocus genotype data. Genetics, 155 945 959. + +Falush, D., Stephens, M., and Pritchard, J. K. 2003 . Inference of population structure: Extensions to linked loci and correlated allele frequencies. Genetics, 164 1567 1587. + +Falush, D., Stephens, M., and Pritchard, J. K. 2007 . Inference of population structure using multilocus genotype data: dominant markers and null alleles. Molecular Ecology Notes, 7 574 578. + +-------- + +**Project links:** + +`Structure <http://pritch.bsd.uchicago.edu/structure.html>`_ . + +-------- + +**Integrated by:** + +Yvan Le Bras + + + + diff -r 000000000000 -r 2c0b270dae70 structure-923cc9e6aa30/Structureharvester.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/structure-923cc9e6aa30/Structureharvester.py Thu Sep 14 08:33:05 2017 -0400 @@ -0,0 +1,120 @@ +#!/usr/bin/env python + +import sys, re +import os +import tempfile +import shutil, subprocess, glob +import optparse +from os.path import basename +import zipfile, tarfile, gzip +from galaxy.datatypes.checkers import * +from adlib import * + +""" + +Created by Yvan Le Bras +yvan.le_bras@irisa.fr + +Last modifications : 01/10/2014 + +""" + + +def __main__(): + + + # arguments recuperation + parser = optparse.OptionParser() + parser.add_option("-P") + parser.add_option("--evanno") + parser.add_option("--clumpp") + # multifile management + parser.add_option("--logfile") + # output management + parser.add_option("--id") + parser.add_option("--workdir") + parser.add_option("--compress_output") + # additionnal outputs + parser.add_option("--total_output") + (options, args) = parser.parse_args() + + # create the working dir + tmp_dir = tempfile.mkdtemp(dir=options.workdir) + tmp_output_dir = tempfile.mkdtemp(dir=tmp_dir) + + print tmp_dir + + # Structure_archive + + # parse config files + tab_files=galaxy_config_to_tabfiles_for_STACKS(options.P) + + # check if zipped files are into the tab + extract_compress_files_from_tabfiles(tab_files, tmp_dir) + + # create the structure harvester command input line + cmd_files=" --dir "+tmp_dir+" --out "+tmp_output_dir+" " + + # create the populations command line + cmd_options="" + + if options.evanno and options.evanno == 'true': + cmd_options+=" --evanno " + if options.clumpp and options.clumpp == 'true': + cmd_options+=" --clumpp " + + + #print " "+cmd_files+" "+cmd_options + + # launch the command line + + #dev command + cmd = '/local/galaxy/structureharv/structureHarvester.py'+cmd_files+" "+cmd_options+" 2>&1" + + #command with dependencies installed + #cmd = 'structureHarvester.py'+cmd_files+" "+cmd_options+" 2>&1" + proc = subprocess.Popen( args=cmd, shell=True ) + returncode = proc.wait() + + # postprocesses + if os.path.exists(tmp_output_dir+'/summary.txt'): + os.system('mv '+tmp_output_dir+'/summary.txt '+options.logfile) + else: + sys.stderr.write('Error in StructureHarvester execution; Please read the additional output (stdout)\n') + + + print "\n[INFO] : "+cmd + + # copy all files inside tmp_dir into workdir or into an archive + list_files = glob.glob(tmp_output_dir+'/*') + + # if compress output is total + if options.compress_output == 'total': + mytotalzipfile=zipfile.ZipFile(tmp_output_dir+'/total.zip.temp', 'w') + + os.chdir(tmp_output_dir) + for i in list_files: + mytotalzipfile.write(os.path.basename(i)) + + # return the unique archive + os.system("mv "+tmp_output_dir+'/total.zip.temp'+" "+options.total_output) + + # if compress output is default + else: + for i in list_files: + command = "mv "+i+" "+options.workdir+ "/primary_" + options.id + "_" + os.path.basename(i).replace("_", ".") + "_visible_tabular" + proc = subprocess.Popen( args=command, shell=True ) + returncode = proc.wait() + + + #clean up temp files + shutil.rmtree( tmp_dir ) + + + +if __name__ == "__main__": __main__() + + + + + diff -r 000000000000 -r 2c0b270dae70 structure-923cc9e6aa30/Structureharvester.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/structure-923cc9e6aa30/Structureharvester.xml Thu Sep 14 08:33:05 2017 -0400 @@ -0,0 +1,121 @@ + + Run the structureharvester.py script + + + + +#for $input in $Structure_archive: +${input.display_name}::${input} +#end for + + + + + +Structureharvester.py +-P $Structure_archive +#if str( $options_output.options_output_selector ) == "1": +--evanno $options_output.evanno +--clumpp $options_output.clumpp +#end if +--logfile $output +--id $output.id +--workdir $__new_file_path__ +--compress_output $output_compress +--total_output $total_output + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + output_compress == "total" + + + + + + +.. class:: infomark + +**What it does** + +This program (structureHarvester.py) is a Python script capable of extracting all the relevant data from STRUCTURE results files. + +-------- + +**Created by:** + +structureHarvester.py was developed by Dent Earl, soe ucsc edu + +-------- + + +Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki <https://www.e-biogenouest.org/wiki/ManArchiveGalaxy>`_ . + +-------- + +**Project links:** + +`Structure Harvester website <http://users.soe.ucsc.edu/~dearl/software/structureHarvester/>`_ . + +-------- + +**References:** + +-Earl, Dent A. and vonHoldt, Bridgett M. (2012) STRUCTURE HARVESTER: a website and program for visualizing STRUCTURE output and implementing the Evanno method. Conservation Genetics Resources vol. 4 (2) pp. 359-361 doi: 10.1007/s12686-011-9548-7 + +-Evanno et al., 2005. Detecting the number of clusters of individuals using the software STRUCTURE: a simulation study. Molecular Ecology 14 , 2611 - 2620. + +-M. Jakobsson, N. Rosenberg 2007. CLUMPP: a cluster matching and permutation program for dealing with label switching and multimodality in analysis of population structure. Bioinformatics 23(14): 1801-1806. + +-J. Pritchard, M. Stephens, P. Donnelly. 2000. Genetics 155:945-959. + +-------- + +**Integrated by:** + +Yvan Le Bras + +GenOuest Bio-informatics Core Facility + +UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France) + +support@genouest.org + +If you use this tool in Galaxy, please cite : + +`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. <https://www.e-biogenouest.org/resources/128>`_ + + + + + + diff -r 000000000000 -r 2c0b270dae70 structure-923cc9e6aa30/adlib.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/structure-923cc9e6aa30/adlib.py Thu Sep 14 08:33:05 2017 -0400 @@ -0,0 +1,355 @@ +""" + +STACKS METHODS FOR GALAXY + +Created by Cyril Monjeaud & Yvan Le Bras +Cyril.Monjeaud@irisa.fr +yvan.le_bras@irisa.fr + +Last modifications : 01/22/2014 + + +""" + +import os, sys, re +import glob +import collections +import gzip, zipfile, tarfile +import subprocess +from galaxy.datatypes.checkers import * + + +""" + +STACKS COMMON METHODS + +galaxy_config_to_tabfiles(input_config) +galaxy_config_to_tabfiles_for_STACKS(input_config) +extract_compress_files_from_tabfiles(tab_files, tmp_input_dir) +create_symlinks_from_tabfiles(tab_files, tmp_input_dir) + +""" +def galaxy_config_to_tabfiles(input_config): + + tab_files={} + for line in open(input_config, "r").readlines(): + if line.strip() != '': + extract=line.strip().split("::") + tab_files[extract[0].replace(" (", ".").replace(" ", ".").replace(")", "").replace(":", ".").replace("/", ".")]=extract[1] + + # tabfiles[name]-> path + return tab_files + + +def galaxy_config_to_tabfiles_for_STACKS(input_config): + + tab_files={} + for line in open(input_config, "r").readlines(): + if line.strip() != '': + extract=line.strip().split("::") + parse_name=re.search("^STACKS.*\((.*\.[ATCG]*\.fq)\)$", extract[0]) + # rename galaxy name in a short name + if parse_name: + extract[0]=parse_name.groups(1)[0] + + tab_files[extract[0].replace(" (", ".").replace(" ", ".").replace(")", "").replace(":", ".").replace("/", ".")]=extract[1] + + # tabfiles[name]-> path + return tab_files + + +def extract_compress_files_from_tabfiles(tab_files, tmp_input_dir): + + # for each file + for key in tab_files.keys(): + #test if is zip file + if (check_zip( tab_files[key] )): + + # extract all files names and added it in the tab + myarchive = zipfile.ZipFile(tab_files[key], 'r') + for i in myarchive.namelist(): + tab_files[i]=tmp_input_dir+"/"+i + + # extract all files + myarchive.extractall(tmp_input_dir) + + #remove compress file from the tab + del tab_files[key] + + #test if is tar.gz file + else: + if tarfile.is_tarfile( tab_files[key] ) and check_gzip( tab_files[key] ): + # extract all files names and added it in the tab + mygzfile = tarfile.open(tab_files[key], 'r') + + for i in mygzfile.getnames(): + tab_files[i]=tmp_input_dir+"/"+i + + # extract all files + mygzfile.extractall(tmp_input_dir) + + #remove compress file from the tab + del tab_files[key] + + + +def create_symlinks_from_tabfiles(tab_files, tmp_input_dir): + + for key in tab_files.keys(): + #print "file single: "+key+" -> "+tab_files[key] + #create a sym_link in our temp dir + if not os.path.exists(tmp_input_dir+'/'+key): + cmd = 'ln -s '+tab_files[key]+' '+tmp_input_dir+'/'+key + proc = subprocess.Popen( args=cmd, shell=True ) + returncode = proc.wait() + + + +""" + +PROCESS RADTAGS METHODS + +generate_additional_file(tmp_output_dir, output_archive) + +""" + +def change_outputs_procrad_name(tmp_output_dir, sample_name): + + list_files = glob.glob(tmp_output_dir+'/*') + for fastq_file in list_files: + os.chdir(tmp_output_dir) + new_fastq_name=os.path.basename(fastq_file.replace("_",".").replace("sample", sample_name)) + os.system('mv '+os.path.basename(fastq_file)+' '+new_fastq_name) + + + + +def generate_additional_archive_file(tmp_output_dir, output_archive): + + list_files = glob.glob(tmp_output_dir+'/*') + myzip=zipfile.ZipFile(output_archive, 'w') + + # for each fastq file + for fastq_file in list_files: + + # add file to the archive output + os.chdir(tmp_output_dir) + myzip.write(os.path.basename(fastq_file)) + + +""" + +DENOVOMAP METHODS + +check_fastq_extension_and_add(tab_files, tmp_input_dir) + +""" + +def check_fastq_extension_and_add(tab_files, tmp_input_dir): + + # for each file + for key in tab_files.keys(): + + if not re.search("\.fq$", key) and not re.search("\.fastq$", key) and not re.search("\.fa$", key) and not re.search("\.fasta$", key): + # open the file + myfastxfile=open(tab_files[key], 'r') + + # get the header + line = myfastxfile.readline() + line = line.strip() + + # fasta rapid test + if line.startswith( '>' ): + tab_files[key+".fasta"]=tab_files[key] + del tab_files[key] + # fastq rapid test + elif line.startswith( '@' ): + tab_files[key+".fq"]=tab_files[key] + del tab_files[key] + else: + print "[WARNING] : your input file "+key+" was not extension and is not recognize as a Fasta or Fastq file" + + myfastxfile.close() + + +""" + +REFMAP METHODS + +""" + +def check_sam_extension_and_add(tab_files, tmp_input_dir): + + # for each file + for key in tab_files.keys(): + + if not re.search("\.sam$", key): + # add the extension + tab_files[key+".sam"]=tab_files[key] + del tab_files[key] + + + + + + +""" + +PREPARE POPULATION MAP METHODS + +generate_popmap_for_denovo(tab_files, infos_file, pop_map) +generate_popmap_for_refmap(tab_fq_files, tab_sam_files, infos_file, pop_map) + + +""" +def generate_popmap_for_denovo(tab_files, infos_file, pop_map): + + # initiate the dict : barcode -> tab[seq] + fq_name_for_barcode={} + + for key in tab_files: + single_barcode=re.search("([ATCG]*)\.fq", key).groups(0)[0] + fq_name_for_barcode[single_barcode]=key + + # open the infos file and output file + my_open_info_file=open(infos_file, 'r') + my_output_file=open(pop_map, 'w') + + # conversion tab for population to integer + pop_to_int=[] + + # write infos into the final output + for line in my_open_info_file: + parse_line=re.search("(^[ATCG]+)\t(.*)\t.*", line) + + # if its the first meet with the population + if parse_line.groups(1)[1] not in pop_to_int: + pop_to_int.append(parse_line.groups(1)[1]) + + # manage ext if present, because the population map file should not have the ext + if re.search("\.fq", fq_name_for_barcode[parse_line.groups(1)[0]]) or re.search("\.fastq", sam_name_for_barcode[parse_line.groups(1)[0]]): + fqfile=os.path.splitext(fq_name_for_barcode[parse_line.groups(1)[0]])[0] + else: + fqfile=fq_name_for_barcode[parse_line.groups(1)[0]] + + + # write in the file + my_output_file.write(fqfile+"\t"+str(pop_to_int.index(parse_line.groups(1)[1]))+"\n") + + # close files + my_output_file.close() + my_open_info_file.close() + + + + +def generate_popmap_for_refmap(tab_fq_files, tab_sam_files, infos_file, pop_map): + + # initiate the dict : barcode -> tab[seq] + seq_id_for_barcode={} + + # initiate the dict : barcode -> sam_name + sam_name_for_barcode={} + + ### Parse fastqfiles ### + # insert my barcode into a tab with sequences ID associated + for fastq_file in tab_fq_files.keys(): + single_barcode=re.search("([ATCG]*)\.fq", fastq_file).groups(0)[0] + + # open the fasq file + open_fastqfile=open(tab_fq_files[fastq_file], 'r') + + # for each line, get the seq ID + tab_seq_id=[] + for line in open_fastqfile: + my_match_seqID=re.search("^@([A-Z0-9]+\.[0-9]+)\s.*", line) + if my_match_seqID: + tab_seq_id.append(my_match_seqID.groups(0)[0]) + + # push in a dict the tab of seqID for the current barcode + seq_id_for_barcode[single_barcode]=tab_seq_id + + + ### Parse samfiles and get the first seq id ### + for sam_file in tab_sam_files.keys(): + + # open the sam file + open_samfile=open(tab_sam_files[sam_file], 'r') + + # get the first seq id + first_seq_id='' + for line in open_samfile: + if not re.search("^@", line): + first_seq_id=line.split("\t")[0] + break + + + # map with seq_id_for_barcode structure + for barcode in seq_id_for_barcode: + for seq in seq_id_for_barcode[barcode]: + if seq == first_seq_id: + #print "sam -> "+sam_file+" seq -> "+first_seq_id+" barcode -> "+barcode + sam_name_for_barcode[barcode]=sam_file + break + + # open the infos file and output file + my_open_info_file=open(infos_file, 'r') + my_output_file=open(pop_map, 'w') + + # conversion tab for population to integer + pop_to_int=[] + + # write infos into the final output + for line in my_open_info_file: + parse_line=re.search("(^[ATCG]+)\t(.*)\t.*", line) + + # if its the first meet with the population + if parse_line.groups(1)[1] not in pop_to_int: + pop_to_int.append(parse_line.groups(1)[1]) + + # manage ext if present, because the population map file should not have the ext + if re.search("\.sam", sam_name_for_barcode[parse_line.groups(1)[0]]): + samfile=os.path.splitext(sam_name_for_barcode[parse_line.groups(1)[0]])[0] + else: + samfile=sam_name_for_barcode[parse_line.groups(1)[0]] + + # write in the file + my_output_file.write(samfile+"\t"+str(pop_to_int.index(parse_line.groups(1)[1]))+"\n") + + # close files + my_output_file.close() + my_open_info_file.close() + + +""" + +STACKS POPULATION + + +""" + + +def extract_compress_files(myfile, tmp_input_dir): + + #test if is zip file + if (check_zip( myfile )): + + # extract all files names and added it in the tab + myarchive = zipfile.ZipFile(myfile, 'r') + + # extract all files + myarchive.extractall(tmp_input_dir) + + + #test if is tar.gz file + else: + # extract all files names and added it in the tab + mygzfile = tarfile.open(myfile, 'r') + + # extract all files + mygzfile.extractall(tmp_input_dir) + + + + +