Next changeset 1:ccfa8e539bdf (2015-08-24) |
Commit message:
first commit |
added:
STACKS_denovomap.py STACKS_denovomap.xml STACKS_genotypes.py STACKS_genotypes.xml STACKS_population.py STACKS_population.xml STACKS_prepare_population_map.py STACKS_prepare_population_map.xml STACKS_procrad.py STACKS_procrad.xml STACKS_refmap.py STACKS_refmap.xml STACKS_sort_read_pairs.py STACKS_sort_read_pairs.xml bwa_index.loc.sample bwa_wrapper.py bwa_wrapper.xml stacks.py tool_data_table_conf.xml.sample tool_dependencies.xml |
b |
diff -r 000000000000 -r d6ba40f6c824 STACKS_denovomap.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/STACKS_denovomap.py Mon Aug 24 09:29:12 2015 +0000 |
[ |
b"@@ -0,0 +1,265 @@\n+#!/usr/bin/python\n+# -*- coding: utf-8 -*-\n+\n+import sys\n+import re\n+import os\n+import tempfile\n+import shutil\n+import subprocess\n+import glob\n+import argparse\n+from os.path import basename\n+import zipfile\n+import tarfile\n+import gzip\n+from galaxy.datatypes.checkers import *\n+from stacks import *\n+\n+\n+def __main__():\n+\n+ # arguments recuperation\n+\n+ parser = argparse.ArgumentParser()\n+ parser.add_argument('-p')\n+ parser.add_argument('-b')\n+ parser.add_argument('-r')\n+ parser.add_argument('-s')\n+ parser.add_argument('-O')\n+ parser.add_argument('-m')\n+ parser.add_argument('-P')\n+ parser.add_argument('-M')\n+ parser.add_argument('-N')\n+ parser.add_argument('-n')\n+ parser.add_argument('-t')\n+ parser.add_argument('-H')\n+ parser.add_argument('--bound_low')\n+ parser.add_argument('--bound_high')\n+ parser.add_argument('--alpha')\n+ parser.add_argument('--logfile')\n+ parser.add_argument('--compress_output')\n+ parser.add_argument('--catalogsnps')\n+ parser.add_argument('--catalogalleles')\n+ parser.add_argument('--catalogtags')\n+\n+ # additionnal outputs\n+ parser.add_argument('--total_output')\n+ parser.add_argument('--tags_output')\n+ parser.add_argument('--snps_output')\n+ parser.add_argument('--alleles_output')\n+ parser.add_argument('--matches_output')\n+\n+ options = parser.parse_args()\n+\n+ # create working directories\n+\n+ os.mkdir('inputs')\n+ os.mkdir('job_outputs')\n+ os.mkdir('galaxy_outputs')\n+\n+ cmd_line = []\n+ cmd_line.append('denovo_map.pl')\n+\n+ # if genetic map\n+\n+ if options.p:\n+\n+ # parse config files\n+\n+ tab_parent_files = galaxy_config_to_tabfiles_for_STACKS(options.p)\n+\n+ # check if zipped files are into the tab and change tab content\n+\n+ extract_compress_files_from_tabfiles(tab_parent_files, 'inputs')\n+\n+ # check files extension (important to have .fq or .fasta files)\n+\n+ check_fastq_extension_and_add(tab_parent_files, 'inputs')\n+\n+ # create symlink into the temp dir\n+\n+ create_symlinks_from_tabfiles(tab_parent_files, 'inputs')\n+\n+ # parse the input dir and store all file names into a tab\n+\n+ fastq_files = []\n+ for fastq_file in glob.glob('inputs/*'):\n+ # if is a file (skip repository created after a decompression)\n+ if os.path.isfile(fastq_file):\n+ fastq_files.append(fastq_file)\n+\n+ fastq_files.sort()\n+\n+ # test if fastq are paired-end\n+ if options.b == 'true':\n+ for n in range(0, len(fastq_files), 2):\n+ cmd_line.extend(['-p', fastq_files[n]])\n+ else:\n+ for myfastqfile in fastq_files:\n+ cmd_line.extend(['-p', myfastqfile])\n+\n+ # if genetic map with progeny files\n+\n+ if options.r:\n+\n+ # parse config files\n+ tab_progeny_files = galaxy_config_to_tabfiles_for_STACKS(options.r)\n+\n+ # check if zipped files are into the tab and change tab content\n+ extract_compress_files_from_tabfiles(tab_progeny_files, 'inputs')\n+\n+ # check files extension (important to have .fq or .fasta files)\n+ check_fastq_extension_and_add(tab_progeny_files, 'inputs')\n+\n+ # create symlink into the temp dir\n+ create_symlinks_from_tabfiles(tab_progeny_files, 'inputs')\n+\n+ for key in tab_progeny_files:\n+\n+ # if is a file (skip repository created after a decompression)\n+\n+ if os.path.isfile('inputs/' + key):\n+ cmd_line.extend(['-r', 'inputs/' + key])\n+\n+ # if population is checked\n+ if options.s:\n+\n+ tab_individual_files = galaxy_config_to_tabfiles_for_STACKS(options.s)\n+\n+ # check if zipped files are into the tab and change tab content\n+ extract_compress_files_from_tabfiles(tab_individual_files, 'inputs')\n+\n+ # check files extension (important to have .fq or .fasta files)\n+ check_fastq_extension_and_add(t"..b' cmd_line.extend([\'-s\', \'inputs/\' + key])\n+\n+ # create the command line\n+ cmd_line.extend([\n+ \'-S\',\n+ \'-b\',\n+ \'1\',\n+ \'-T\',\n+ \'4\',\n+ \'-o\',\n+ \'job_outputs/\'\n+ ])\n+\n+ if options.O:\n+ cmd_line.extend([\'-O\', options.O])\n+\n+ if options.m and options.m != \'-1\':\n+ cmd_line.extend([\'-m\', options.m])\n+\n+ if options.P and options.P != \'-1\':\n+ cmd_line.extend([\'-P\', options.P])\n+\n+ if options.M and options.M != \'-1\':\n+ cmd_line.extend([\'-M\', options.M])\n+\n+ if options.N and options.N != \'-1\':\n+ cmd_line.extend([\'-N\', options.N])\n+\n+ if options.n and options.n != \'-1\':\n+ cmd_line.extend([\'-n\', options.n])\n+\n+ if options.t and options.t == \'true\':\n+ cmd_line.append(\'-t\')\n+\n+ if options.H and options.H == \'true\':\n+ cmd_line.append(\'-H\')\n+\n+ ## SNP model \n+ if options.bound_low:\n+ cmd_line.extend([\'--bound_low\', options.bound_low])\n+ cmd_line.extend([\'--bound_high\', options.bound_high])\n+\n+ if options.alpha:\n+ cmd_line.extend([\'--alpha\', options.alpha])\n+\n+ # launch the command line\n+ print "[CMD_LINE] : "+\' \'.join(cmd_line) \n+\n+ p = subprocess.call(cmd_line)\n+\n+ # postprocesses\n+ try:\n+ shutil.move(\'job_outputs/denovo_map.log\', options.logfile)\n+ except:\n+ sys.stderr.write(\'Error in denovo_map execution; Please read the additional output (stdout)\\n\')\n+ sys.exit(1)\n+\n+ # go inside the outputs dir\n+ os.chdir(\'job_outputs\')\n+\n+ # move files\n+ for i in glob.glob(\'*\'):\n+ if re.search(\'catalog.snps.tsv$\', i):\n+ shutil.copy(i, options.catalogsnps)\n+ if re.search(\'catalog.alleles.tsv$\', i):\n+ shutil.copy(i, options.catalogalleles)\n+ if re.search(\'catalog.tags.tsv$\', i):\n+ shutil.copy(i, options.catalogtags)\n+\n+ list_files = glob.glob(\'*\')\n+\n+ # if compress output is total\n+ if options.compress_output == \'total\':\n+\n+ mytotalzipfile = zipfile.ZipFile(\'total.zip.temp\', \'w\',\n+ allowZip64=True)\n+\n+ for i in list_files:\n+ mytotalzipfile.write(os.path.basename(i))\n+\n+ # return the unique archive\n+ shutil.move(\'total.zip.temp\', options.total_output)\n+ elif options.compress_output == \'categories\':\n+\n+ # if compress output is by categories\n+ mytagszip = zipfile.ZipFile(\'tags.zip.temp\', \'w\', allowZip64=True)\n+ mysnpszip = zipfile.ZipFile(\'snps.zip.temp\', \'w\', allowZip64=True)\n+ myalleleszip = zipfile.ZipFile(\'alleles.zip.temp\', \'w\', allowZip64=True)\n+ mymatcheszip = zipfile.ZipFile(\'matches.zip.temp\', \'w\', allowZip64=True)\n+\n+ for i in list_files:\n+ # for each type of files\n+ if re.search("tags\\.tsv$", i) and not re.search(\'batch\', i):\n+ mytagszip.write(os.path.basename(i))\n+ os.remove(i)\n+ elif re.search("snps\\.tsv$", i) and not re.search(\'batch\', i):\n+ mysnpszip.write(os.path.basename(i))\n+ os.remove(i)\n+ elif re.search("alleles\\.tsv$", i) and not re.search(\'batch\', i):\n+ myalleleszip.write(os.path.basename(i))\n+ os.remove(i)\n+ elif re.search("matches\\.tsv$", i) and not re.search(\'batch\', i):\n+ mymatcheszip.write(os.path.basename(i))\n+ os.remove(i)\n+ else:\n+ shutil.move(os.path.basename(i), \'../galaxy_outputs\')\n+\n+ # return archives....\n+ shutil.move(\'tags.zip.temp\', options.tags_output)\n+ shutil.move(\'snps.zip.temp\', options.snps_output)\n+ shutil.move(\'alleles.zip.temp\', options.alleles_output)\n+ shutil.move(\'matches.zip.temp\', options.matches_output)\n+ else:\n+ # else no compression\n+ for i in list_files:\n+ shutil.move(os.path.basename(i), \'../galaxy_outputs\')\n+\n+\n+if __name__ == \'__main__\':\n+ __main__()\n+\n+\t\t\t\n' |
b |
diff -r 000000000000 -r d6ba40f6c824 STACKS_denovomap.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/STACKS_denovomap.xml Mon Aug 24 09:29:12 2015 +0000 |
b |
b'@@ -0,0 +1,387 @@\n+<tool id="STACKSdenovomap" name="STACKS : De novo map" force_history_refresh="True">\r\n+ <description>Run the STACKS denovo_map.pl wrapper</description>\r\n+\r\n+<configfiles>\r\n+<configfile name="parent_sequences">\r\n+#if str( $options_usage.options_usage_selector ) == "genetic"\r\n+#for $input in $options_usage.parent_sequence:\r\n+${input.display_name}::${input}\r\n+#end for\r\n+#end if\r\n+</configfile>\r\n+<configfile name="progeny_sequences">\r\n+#if str( $options_usage.options_usage_selector ) == "genetic" and str( $options_usage.options_progeny.options_progeny_selector ) == "yes"\r\n+#for $input in $options_usage.options_progeny.progeny_sequence:\r\n+${input.display_name}::${input}\r\n+#end for\r\n+#end if\r\n+</configfile>\r\n+<configfile name="individual_samples">\r\n+#if str( $options_usage.options_usage_selector ) == "population"\r\n+#for $input in $options_usage.individual_sample:\r\n+${input.display_name}::${input}\r\n+#end for\r\n+#end if\r\n+</configfile>\r\n+</configfiles> \r\n+ \r\n+<requirements>\r\n+ <requirement type="package" version="1.18">stacks</requirement>\r\n+</requirements>\r\n+\r\n+<command interpreter="python">\r\n+STACKS_denovomap.py\r\n+#if str( $options_usage.options_usage_selector ) == "genetic"\r\n+-p $parent_sequences\r\n+-b $options_usage.paired\r\n+#if str( $options_usage.options_progeny.options_progeny_selector ) == "yes"\r\n+-r $progeny_sequences\r\n+#end if\r\n+#else\r\n+-s $individual_samples\r\n+#if str( $options_usage.options_popmap.popmap_selector) == "yes"\r\n+-O $options_usage.options_popmap.popmap\r\n+#end if\r\n+#end if\r\n+-m $advanced_options.minident\r\n+-P $advanced_options.minidentprogeny\r\n+-M $advanced_options.mismatchbetlociproc\r\n+-N $advanced_options.mismatchsecond\r\n+-n $advanced_options.mismatchbetlocibuild\r\n+-t $advanced_options.remove_hightly\r\n+-H $advanced_options.disable_calling\r\n+## snp_model\r\n+#if str( $snp_options.select_model.model_type) == "bounded"\r\n+--bound_low $snp_options.select_model.boundlow\r\n+--bound_high $snp_options.select_model.boundhigh\r\n+--alpha $snp_options.select_model.alpha\r\n+#else\r\n+--alpha $snp_options.select_model.alpha\r\n+#end if\r\n+## outputs\r\n+--catalogsnps $catalogsnps\r\n+--catalogalleles $catalogalleles\r\n+--catalogtags $catalogtags\r\n+--logfile $output\r\n+--compress_output $output_compress\r\n+##additionnal outputs\r\n+--total_output $total_output\r\n+--tags_output $tags_output\r\n+--snps_output $snps_output\r\n+--alleles_output $alleles_output\r\n+--matches_output $matches_output\r\n+\r\n+</command>\r\n+\r\n+<inputs>\r\n+\t<conditional name="options_usage">\r\n+\t\t<param name="options_usage_selector" type="select" label="Select your usage">\r\n+\t\t\t<option value="genetic" selected="true">Genetic map</option>\r\n+\t\t\t<option value="population">Population</option>\r\n+\t\t</param>\r\n+\t\t<when value="genetic">\r\n+\t\t\t<param name="parent_sequence" format="fastq,fasta,zip,tar.gz" type="data" multiple="true" label="Files containing parent sequences" help="FASTQ/FASTA/ZIP/TAR.GZ files containing parent sequences from a mapping cross" />\r\n+\t\t\t<param name="paired" type="boolean" checked="false" default="false" label="Paired-end fastq files?" help="be careful, all files must have a paired-end friend"/>\r\n+\t\t\t<conditional name="options_progeny">\r\n+\t\t\t<param name="options_progeny_selector" type="select" label="Use progeny files">\r\n+\t\t\t\t<option value="yes" selected="true">Yes</option>\r\n+\t\t\t\t<option value="no">No</option>\r\n+\t\t\t</param>\r\n+\t\t\t<when value="yes">\r\n+\t\t\t\t<param name="progeny_sequence" format="fastq,fasta,zip,tar.gz" type="data" multiple="true" label="Files containing progeny sequences" help="FASTQ/FASTA/ZIP/TAR.GZ files containing progeny sequences from a mapping cross" />\r\n+\t\t\t</when>\r\n+\t\t\t<when value="no">\r\n+\t\t\t</when>\r\n+\t\t\t</conditional>\r\n+\t\t</when>\r\n+\t\t<when value="population">\r\n+\t\t\t<param name="individual_sample" format="fastq,fasta,zip,tar.gz" type="data" multiple="true" label="Files containing an individual sample from a population" help="FASTQ/FASTA/ZIP/TAR.GZ files contiaining an individual sample from a population" />\r\n+\t\t\t<conditional n'..b'onfidence interval.\r\n+\tLOD Score \t\t\tLogarithm of odds score.\r\n+\tExpected Heterozygosity\t\tHeterozygosity expected under Hardy-Weinberg equilibrium.\r\n+\tExpected Homozygosity \t\tHomozygosity expected under Hardy-Weinberg equilibrium.\r\n+\tCorrected FST \t\t\tFST with either the FET p-value, or a window-size or genome size Bonferroni correction.\r\n+\tSmoothed FST \t\t\tA weighted average of FST depending on the surrounding 3s of sequence in both directions.\r\n+\tSmoothed FST P-value \t\tIf bootstrap resampling is enabled, a p-value ranking the significance of FST within this pair of populations.\r\n+\r\n+\r\n+Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki <https://www.e-biogenouest.org/wiki/ManArchiveGalaxy>`_ .\r\n+\r\n+--------\r\n+\r\n+**Output type:**\r\n+\r\n+- Output type details::\r\n+\r\n+\tNo compression \t\t\tAll files will be added in the current history.\r\n+\tCompressed by categories\tFiles will be compressed by categories (snps, allele, matches and tags) into 4 zip archives. These archives and batch files will be added in the current history.\r\n+\tCompressed all outputs \t\tAll files will be compressed in an unique zip archive. Batch files will be added in the current history with the archive.\r\n+\r\n+\r\n+--------\r\n+\r\n+**Project links:**\r\n+\r\n+`STACKS website <http://creskolab.uoregon.edu/stacks/>`_ .\r\n+\r\n+`STACKS manual <http://creskolab.uoregon.edu/stacks/stacks_manual.pdf>`_ .\r\n+\r\n+`STACKS google group <https://groups.google.com/forum/#!forum/stacks-users>`_ .\r\n+\r\n+--------\r\n+\r\n+**References:**\r\n+\r\n+-J. Catchen, P. Hohenlohe, S. Bassham, A. Amores, and W. Cresko. Stacks: an analysis tool set for population genomics. Molecular Ecology. 2013.\r\n+\r\n+-J. Catchen, S. Bassham, T. Wilson, M. Currey, C. O\'Brien, Q. Yeates, and W. Cresko. The population structure and recent colonization history of Oregon threespine stickleback determined using restriction-site associated DNA-sequencing. Molecular Ecology. 2013.\r\n+\r\n+-J. Catchen, A. Amores, P. Hohenlohe, W. Cresko, and J. Postlethwait. Stacks: building and genotyping loci de novo from short-read sequences. G3: Genes, Genomes, Genetics, 1:171-182, 2011.\r\n+\r\n+-A. Amores, J. Catchen, A. Ferrara, Q. Fontenot and J. Postlethwait. Genome evolution and meiotic maps by massively parallel DNA sequencing: Spotted gar, an outgroup for the teleost genome duplication. Genetics, 188:799\'808, 2011.\r\n+\r\n+-P. Hohenlohe, S. Amish, J. Catchen, F. Allendorf, G. Luikart. RAD sequencing identifies thousands of SNPs for assessing hybridization between rainbow trout and westslope cutthroat trout. Molecular Ecology Resources, 11(s1):117-122, 2011.\r\n+\r\n+-K. Emerson, C. Merz, J. Catchen, P. Hohenlohe, W. Cresko, W. Bradshaw, C. Holzapfel. Resolving postglacial phylogeography using high-throughput sequencing. Proceedings of the National Academy of Science, 107(37):16196-200, 2010.\r\n+\r\n+--------\r\n+\r\n+**Integrated by:**\r\n+\r\n+Yvan Le Bras and Cyril Monjeaud \r\n+\r\n+GenOuest Bio-informatics Core Facility\r\n+\r\n+UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France)\r\n+\r\n+support@genouest.org\r\n+\r\n+</help>\r\n+<citations>\r\n+ <citation type="doi">10.1111/mec.12354</citation>\r\n+ <citation type="doi">10.1111/mec.12330</citation>\r\n+ <citation type="doi">10.1534/g3.111.000240</citation>\r\n+ <citation type="doi">10.1534/genetics.111.127324</citation>\r\n+ <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation>\r\n+ <citation type="doi">10.1073/pnas.1006538107</citation>\r\n+\r\n+ <citation type="bibtex">@INPROCEEDINGS{JOBIM2013,\r\n+ author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},\r\n+ title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},\r\n+ booktitle = {JOBIM 2013 Proceedings},\r\n+ year = {2013},\r\n+ url = {https://www.e-biogenouest.org/resources/128},\r\n+ pages = {97-106}\r\n+ }</citation>\r\n+</citations>\r\n+</tool>\r\n+\r\n' |
b |
diff -r 000000000000 -r d6ba40f6c824 STACKS_genotypes.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/STACKS_genotypes.py Mon Aug 24 09:29:12 2015 +0000 |
[ |
@@ -0,0 +1,143 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import sys +import re +import os +import tempfile +import shutil +import subprocess +import glob +import argparse +from os.path import basename +import zipfile +import tarfile +import gzip +from galaxy.datatypes.checkers import * +from stacks import * + + +def __main__(): + + # arguments recuperation + + parser = argparse.ArgumentParser() + parser.add_argument('-P') + parser.add_argument('-b') + parser.add_argument('-c') + parser.add_argument('-t') + parser.add_argument('-o') + parser.add_argument('-e') + parser.add_argument('--active_advanced') + parser.add_argument('-r') + parser.add_argument('-m') + parser.add_argument('-B') + parser.add_argument('-W') + parser.add_argument('--active_autocorrect') + parser.add_argument('--min_hom_seqs') + parser.add_argument('--min_het_seqs') + parser.add_argument('--max_het_seqs') + + # multifile management + + parser.add_argument('--logfile') + parser.add_argument('--compress_output') + + # additionnal outputs + + parser.add_argument('--total_output') + + options = parser.parse_args() + + # create the working dir + + os.mkdir('job_outputs') + os.mkdir('galaxy_outputs') + + os.chdir('job_outputs') + + # edit the command line + + cmd_line = [] + cmd_line.append("genotypes") + + # STACKS_archive + # check if zipped files are into the tab + + extract_compress_files(options.P, os.getcwd()) + + # create the genotypes command input line + + cmd_line.extend(["-b", options.b, "-P", os.getcwd()]) + + # create the genotypes command line + + if options.e: + cmd_line.extend(["-e", options.e]) + if options.c == 'true': + cmd_line.append("-c") + if options.t: + cmd_line.extend(["-t", options.t]) + if options.o: + cmd_line.extend(["-o", options.o]) + + # if advanced is activate + if options.active_advanced == "true": + cmd_line.extend(["-r", options.r]) + cmd_line.extend(["-m", options.m]) + if options.B: + cmd_line.extend(["-B", options.B]) + if options.W: + cmd_line.extend(["-W", options.W]) + + # if autocorrect is activate + if options.active_autocorrect == "true": + cmd_line.extend(["--min_hom_seqs", options.min_hom_seqs]) + cmd_line.extend(["--min_het_seqs", options.min_het_seqs]) + cmd_line.extend(["--max_het_seqs", options.max_het_seqs]) + + # command with dependencies installed + print "[CMD]:"+' '.join(cmd_line) + subprocess.call(cmd_line) + + # postprocesses + try: + shutil.copy('batch_1.haplotypes_1.tsv', options.logfile) + except: + sys.stderr.write('Error in genotypes execution; Please read the additional output (stdout)\n') + sys.exit(1) + + # copy all files inside tmp_dir into workdir + + list_files = glob.glob('*') + + # if compress output is total + + if options.compress_output == 'total': + mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w') + + for i in list_files: + if re.search('^batch', os.path.basename(i)) \ + and not re.search("\.tsv$", os.path.basename(i)) \ + or re.search(".*_[0-9]*\.tsv$", os.path.basename(i)) \ + or re.search('.*genotypes.*', os.path.basename(i)): + mytotalzipfile.write(i, os.path.basename(i)) + + # return the unique archive + + shutil.move('total.zip.temp', options.total_output) + + # if compress output is default + if options.compress_output == 'default': + for i in list_files: + if re.search('^batch', os.path.basename(i)) \ + and not re.search("\.tsv$", os.path.basename(i)) \ + or re.search(".*_[0-9]*\.tsv$", os.path.basename(i)) \ + or re.search('.*genotypes.*', os.path.basename(i)): + shutil.move(i, '../galaxy_outputs') + + +if __name__ == '__main__': + __main__() + + |
b |
diff -r 000000000000 -r d6ba40f6c824 STACKS_genotypes.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/STACKS_genotypes.xml Mon Aug 24 09:29:12 2015 +0000 |
b |
b'@@ -0,0 +1,379 @@\n+<tool id="STACKSgenotypes" name="STACKS : genotypes" force_history_refresh="True">\r\n+ <description>Run the STACKS genotypes program</description>\r\n+\r\n+\r\n+<requirements>\r\n+ <requirement type="package" version="1.18">stacks</requirement>\r\n+</requirements>\r\n+\r\n+<command interpreter="python">\r\n+\r\n+STACKS_genotypes.py\r\n+-P $STACKS_archive\r\n+-b $batchid\r\n+-c $corrections\r\n+#if str( $options_output.options_output_selector ) == "1":\r\n+-t $options_output.map\r\n+-o $options_output.filetype\r\n+#end if\r\n+#if str( $options_enzyme.options_enzyme_selector ) == "1":\r\n+-e $options_enzyme.enzyme\r\n+#end if\r\n+--active_advanced $active_advanced\r\n+-r $advanced_options.minprogeny\r\n+-m $advanced_options.mindepth\r\n+#if str( $advanced_options.blacklistselect.advanced_blackoptions_selector) == "advanced"\r\n+-B $advanced_options.blacklistselect.blacklist\r\n+#end if\r\n+#if str( $advanced_options.whitelistselect.advanced_whiteoptions_selector) == "advanced"\r\n+-W $advanced_options.whitelistselect.whitelist\r\n+#end if\r\n+--active_autocorrect $active_autocorrect\r\n+--min_hom_seqs $options_autocorrect.hom\r\n+--min_het_seqs $options_autocorrect.het\r\n+--max_het_seqs $options_autocorrect.hetmax\r\n+--logfile $output\r\n+--compress_output $output_compress\r\n+##additionnal outputs\r\n+--total_output $total_output\r\n+\r\n+\r\n+</command>\r\n+\r\n+<inputs>\r\n+\t<param name="STACKS_archive" format="zip,tar.gz" type="data" label="Archive from STACKS pipeline regrouping all outputs" />\r\n+\t<param name="batchid" type="integer" value="1" label="Batch ID" help="Batch ID to examine when exporting from the catalog" />\r\n+\t<conditional name="options_output">\r\n+\t <param name="options_output_selector" type="select" label="Did you want to use the file type output option?">\r\n+\t\t\t<option value="1">Yes</option>\r\n+\t\t\t<option value="2" selected="true">No</option>\r\n+\t </param>\r\n+\t <when value="1">\r\n+\t\t\t<param name="map" type="select" format="text" label="map type" help="map type to write. \'CP\', \'DH\', \'F2\', \'BC1\', and \'GEN\' are the currently supported map types" >\r\n+\t\t\t\t<option value="CP">CP</option>\r\n+\t\t\t\t<option value="DH">DH</option>\r\n+\t\t\t\t<option value="F2">F2</option>\r\n+\t\t\t\t<option value="BC1">BC1</option>\r\n+\t\t\t\t<option value="GEN">GEN</option>\r\n+\t\t\t</param>\r\n+\t\t\t<param name="filetype" type="select" format="text" label="output file type" help="output file type to write, \'joinmap\', \'onemap\', \'rqtl\', and \'genomic\' are currently supported" >\r\n+\t\t\t\t<option value="joinmap">joinmap</option>\r\n+\t\t\t\t<option value="onemap">onemap</option>\r\n+\t\t\t\t<option value="rqtl">rqtl</option>\r\n+\t\t\t\t<option value="genomic">genomic</option>\r\n+\t\t\t</param>\r\n+\t </when>\r\n+\t <when value="2">\r\n+\t </when>\r\n+\t</conditional>\t\r\n+\t<conditional name="options_enzyme">\r\n+\t <param name="options_enzyme_selector" type="select" label="Did you want to use the genomic output option?">\r\n+\t\t\t<option value="1">Yes</option>\r\n+\t\t\t<option value="2" selected="true">No</option>\r\n+\t </param>\r\n+\t <when value="1">\r\n+\t\t\t<param name="enzyme" type="select" format="text" label="provide the restriction enzyme used" help="required if generating genomic output" >\r\n+\t\t\t\t<option value="apeKI">apeKI</option>\r\n+\t\t\t\t<option value="bamHI">bamHI</option>\r\n+\t\t\t\t<option value="claI">claI</option>\r\n+\t\t\t\t<option value="dpnII">dpnII</option>\r\n+\t\t\t\t<option value="eaeI">eaeI</option>\r\n+\t\t\t\t<option value="ecoRI">ecoRI</option>\r\n+\t\t\t\t<option value="ecoT22I">ecoT22I</option>\r\n+\t\t\t\t<option value="hindIII">hindIII</option>\r\n+\t\t\t\t<option value="mluCI">mluCI</option>\r\n+\t\t\t\t<option value="mseI">mseI</option>\r\n+\t\t\t\t<option value="mspI">mspI</option>\r\n+\t\t\t\t<option value="ndeI">ndeI</option>\r\n+\t\t\t\t<option value="nlaIII">nlaIII</option>\r\n+\t\t\t\t<option value="notI">notI</option>\r\n+\t\t\t\t<option value="nsiI">nsiI</option>\r\n+\t\t\t\t<option value="pstI">pstI</option>\r\n+\t\t\t\t<option value="sau3AI">sau3AI</option>\r\n+\t\t\t\t<option value="sbfI">sbfI</option>\r\n+\t\t\t\t<option value="sexAI">sexAI</option>\r\n+\t\t\t\t<option value="sgrAI">sgrAI</option>\r\n+\t\t\t\t<option value='..b'ection.\r\n+\tSmoothed FST \t\t\tA weighted average of FST depending on the surrounding 3s of sequence in both directions.\r\n+\tSmoothed FST P-value \t\tIf bootstrap resampling is enabled, a p-value ranking the significance of FST within this pair of populations.\r\n+\r\n+\r\n+Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki <https://www.e-biogenouest.org/wiki/ManArchiveGalaxy>`_ .\r\n+\r\n+--------\r\n+\r\n+**Output type:**\r\n+\r\n+- Output type details::\r\n+\r\n+\tNo compression \t\t\tAll files will be added in the current history.\r\n+\tCompressed by categories\tFiles will be compressed by categories (snps, allele, matches and tags) into 4 zip archives. These archives and batch files will be added in the current history.\r\n+\tCompressed all outputs \t\tAll files will be compressed in an unique zip archive. Batch files will be added in the current history with the archive.\r\n+\r\n+\r\n+--------\r\n+\r\n+**Project links:**\r\n+\r\n+`STACKS website <http://creskolab.uoregon.edu/stacks/>`_ .\r\n+\r\n+`STACKS manual <http://creskolab.uoregon.edu/stacks/stacks_manual.pdf>`_ .\r\n+\r\n+`STACKS google group <https://groups.google.com/forum/#!forum/stacks-users>`_ .\r\n+\r\n+--------\r\n+\r\n+**References:**\r\n+\r\n+-J. Catchen, P. Hohenlohe, S. Bassham, A. Amores, and W. Cresko. Stacks: an analysis tool set for population genomics. Molecular Ecology. 2013.\r\n+\r\n+-J. Catchen, S. Bassham, T. Wilson, M. Currey, C. O\'Brien, Q. Yeates, and W. Cresko. The population structure and recent colonization history of Oregon threespine stickleback determined using restriction-site associated DNA-sequencing. Molecular Ecology. 2013.\r\n+\r\n+-J. Catchen, A. Amores, P. Hohenlohe, W. Cresko, and J. Postlethwait. Stacks: building and genotyping loci de novo from short-read sequences. G3: Genes, Genomes, Genetics, 1:171-182, 2011.\r\n+\r\n+-A. Amores, J. Catchen, A. Ferrara, Q. Fontenot and J. Postlethwait. Genome evolution and meiotic maps by massively parallel DNA sequencing: Spotted gar, an outgroup for the teleost genome duplication. Genetics, 188:799\'808, 2011.\r\n+\r\n+-P. Hohenlohe, S. Amish, J. Catchen, F. Allendorf, G. Luikart. RAD sequencing identifies thousands of SNPs for assessing hybridization between rainbow trout and westslope cutthroat trout. Molecular Ecology Resources, 11(s1):117-122, 2011.\r\n+\r\n+-K. Emerson, C. Merz, J. Catchen, P. Hohenlohe, W. Cresko, W. Bradshaw, C. Holzapfel. Resolving postglacial phylogeography using high-throughput sequencing. Proceedings of the National Academy of Science, 107(37):16196-200, 2010.\r\n+\r\n+--------\r\n+\r\n+**Integrated by:**\r\n+\r\n+Yvan Le Bras and Cyril Monjeaud \r\n+\r\n+GenOuest Bio-informatics Core Facility\r\n+\r\n+UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France)\r\n+\r\n+support@genouest.org\r\n+\r\n+If you use this tool in Galaxy, please cite :\r\n+\r\n+`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. <https://www.e-biogenouest.org/resources/128>`_\r\n+\r\n+\r\n+</help>\r\n+<citations>\r\n+ <citation type="doi">10.1111/mec.12354</citation>\r\n+ <citation type="doi">10.1111/mec.12330</citation>\r\n+ <citation type="doi">10.1534/g3.111.000240</citation>\r\n+ <citation type="doi">10.1534/genetics.111.127324</citation>\r\n+ <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation>\r\n+ <citation type="doi">10.1073/pnas.1006538107</citation>\r\n+\r\n+ <citation type="bibtex">@INPROCEEDINGS{JOBIM2013,\r\n+ author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},\r\n+ title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},\r\n+ booktitle = {JOBIM 2013 Proceedings},\r\n+ year = {2013},\r\n+ url = {https://www.e-biogenouest.org/resources/128},\r\n+ pages = {97-106}\r\n+ }</citation>\r\n+</citations>\r\n+</tool>\r\n+\r\n' |
b |
diff -r 000000000000 -r d6ba40f6c824 STACKS_population.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/STACKS_population.py Mon Aug 24 09:29:12 2015 +0000 |
[ |
@@ -0,0 +1,243 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import sys +import re +import os +import tempfile +import shutil +import subprocess +import glob +import argparse +from os.path import basename +import zipfile +import tarfile +import gzip +from galaxy.datatypes.checkers import * +from stacks import * + + +def __main__(): + + # arguments recuperation + + parser = argparse.ArgumentParser() + parser.add_argument('-P') + parser.add_argument('-M') + parser.add_argument('-b') + parser.add_argument('--vcf', action='store_true') + parser.add_argument('--genepop', action='store_true') + parser.add_argument('--structure', action='store_true') + parser.add_argument('-e') + parser.add_argument('--genomic', action='store_true') + parser.add_argument('--fasta', action='store_true') + parser.add_argument('--phase', action='store_true') + parser.add_argument('--beagle', action='store_true') + parser.add_argument('--plink', action='store_true') + parser.add_argument('--phylip', action='store_true') + parser.add_argument('--phylip_var', action='store_true') + parser.add_argument('--write_single_snp', action='store_true') + parser.add_argument('-k', action='store_true') + + # advanced options + parser.add_argument('--advanced_options_activate') + parser.add_argument('-B') + parser.add_argument('-W') + parser.add_argument('-r') + parser.add_argument('-p') + parser.add_argument('-m') + parser.add_argument('-a') + parser.add_argument('-f') + parser.add_argument('--p_value_cutoff') + parser.add_argument('--window_size') + parser.add_argument('--bootstrap') + parser.add_argument('--bootstrap_reps') + + # multifile management + parser.add_argument('--logfile') + + # outputs + parser.add_argument('--ss') + parser.add_argument('--s') + + # optional outputs + parser.add_argument('--ov') + parser.add_argument('--op') + parser.add_argument('--ol') + parser.add_argument('--of') + parser.add_argument('--os') + parser.add_argument('--oe') + parser.add_argument('--om') + parser.add_argument('--og') + + parser.add_argument('--unphased_output') + parser.add_argument('--markers_output') + parser.add_argument('--phase_output') + parser.add_argument('--fst_output') + + options = parser.parse_args() + + # create the working dir + os.mkdir('job_outputs') + os.mkdir('galaxy_outputs') + + os.chdir('job_outputs') + + # STACKS_archive + # check if zipped files are into the tab + extract_compress_files(options.P, os.getcwd()) + + # create the populations command input line + cmd_line=['populations'] + cmd_line.extend(['-b', options.b, '-P', os.getcwd(), '-M', options.M]) + + if options.e: + cmd_line.extend(['-e', options.e, options.genomic]) + + # output options + if options.vcf: + cmd_line.append('--vcf') + if options.genepop: + cmd_line.append('--genepop') + if options.structure: + cmd_line.append('--structure') + if options.fasta: + cmd_line.append('--fasta') + if options.phase: + cmd_line.append('--phase') + if options.beagle: + cmd_line.append('--beagle') + if options.plink: + cmd_line.append('--plink') + if options.phylip: + cmd_line.append('--phylip') + if options.phylip_var and options.phylip: + cmd_line.append('--phylip_var') + if options.write_single_snp and (options.genepop or options.structure): + cmd_line.append('--write_single_snp') + + if options.k: + cmd_line.extend(['-k', '--window_size', options.window_size]) + + if options.advanced_options_activate == 'true': + if options.B: + cmd_line.extend(['-B', options.B]) + if options.W: + cmd_line.extend(['-W', options.W]) + + cmd_line.extend(['-r', options.r]) + cmd_line.extend(['-p', options.p]) + cmd_line.extend(['-m', options.m]) + cmd_line.extend(['-a', options.a]) + + if options.f: + cmd_line.extend(['-f', options.f, '--p_value_cutoff', options.p_value_cutoff]) + if options.bootstrap: + cmd_line.extend(['--bootstrap', options.bootstrap, '--bootstrap_reps', options.bootstrap_reps]) + + print "[CMD]:"+' '.join(cmd_line) + subprocess.call(cmd_line) + + # postprocesses + try: + shutil.copy('batch_1.populations.log', options.logfile) + except: + sys.stderr.write('Error in population execution; Please read the additional output (stdout)\n') + sys.exit(1) + + try: + shutil.move(glob.glob('*.sumstats_summary.tsv')[0], options.ss) + except: + print "No sumstats summary file" + + try: + shutil.move(glob.glob('*.sumstats.tsv')[0], options.s) + except: + print "No sumstats file" + + # move additionnal output files + if options.vcf: + try: + shutil.move(glob.glob('*.vcf')[0], options.ov) + except: + print "No VCF files" + + if options.phylip: + try: + shutil.move(glob.glob('*.phylip')[0], options.op) + shutil.move(glob.glob('*.phylip.log')[0], options.ol) + except: + print "No phylip file" + + if options.fasta: + try: + shutil.move(glob.glob('*.fa')[0], options.of) + except: + print "No fasta files" + + if options.structure: + try: + shutil.move(glob.glob('*.structure.tsv')[0], options.os) + except: + print "No structure file" + + if options.plink : + try: + shutil.move(glob.glob('*.ped')[0], options.oe) + shutil.move(glob.glob('*.map')[0], options.om) + except: + print "No ped and map file" + + if options.genepop : + try: + shutil.move(glob.glob('*.genepop')[0], options.og) + except: + print "No genepop file" + + # copy all files inside tmp_dir into workdir or into an archive.... + list_files = glob.glob('*') + + markerszip = zipfile.ZipFile('markers.zip.temp', 'w', + allowZip64=True) + phasezip = zipfile.ZipFile('phase.zip.temp', 'w', allowZip64=True) + unphasedzip = zipfile.ZipFile('unphased.zip.temp', 'w', + allowZip64=True) + fstzip = zipfile.ZipFile('fst.zip.temp', 'w', allowZip64=True) + + for i in list_files: + # for each type of files + if re.search("\.markers$", i): + markerszip.write(i) + elif re.search("phase\.inp$", i): + phasezip.write(i) + elif re.search("unphased\.bgl$", i): + unphasedzip.write(i) + elif re.search('fst', i): + fstzip.write(i) + else: + # else return original files + if re.search('^batch', os.path.basename(i)) \ + and not re.search("\.tsv$", os.path.basename(i)) \ + or re.search(".*_[0-9]*\.tsv$", os.path.basename(i)): + shutil.move(i, '../galaxy_outputs') + + # close zip files + markerszip.close() + phasezip.close() + unphasedzip.close() + fstzip.close() + + # return archives + shutil.move('fst.zip.temp', options.fst_output) + if options.beagle: + shutil.move('markers.zip.temp', options.markers_output) + shutil.move('unphased.zip.temp', options.unphased_output) + if options.phase: + shutil.move('phase.zip.temp', options.phase_output) + + +if __name__ == '__main__': + __main__() + + + |
b |
diff -r 000000000000 -r d6ba40f6c824 STACKS_population.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/STACKS_population.xml Mon Aug 24 09:29:12 2015 +0000 |
b |
b'@@ -0,0 +1,497 @@\n+<tool id="STACKSpopulation" name="STACKS : populations" force_history_refresh="True">\r\n+ <description>Run the STACKS populations program</description>\r\n+\r\n+\r\n+<requirements>\r\n+ <requirement type="package" version="1.18">stacks</requirement>\r\n+</requirements>\r\n+\r\n+<command interpreter="python">\r\n+\r\n+STACKS_population.py\r\n+-P $STACKS_archive\r\n+-b $batchid\r\n+-M $popmap\r\n+\r\n+#if $options_kernel.kernel\r\n+ -k\r\n+ --window_size $options_kernel.window\r\n+#end if\r\n+\r\n+#if str( $options_enzyme.options_enzyme_selector ) == "1":\r\n+ -e $options_enzyme.enzyme\r\n+ --genomic $options_enzyme.genomic\r\n+#end if\r\n+\r\n+## advanced options\r\n+--advanced_options_activate $advanced_options_activate\r\n+#if $advanced_options_activate\r\n+ -r $advanced_options.minperc\r\n+ -p $advanced_options.minpop\r\n+ -m $advanced_options.mindepth\r\n+ -a $advanced_options.minminor\r\n+ #if str( $advanced_options.correction_select.correction ) != "no_corr":\r\n+ -f $advanced_options.correction_select.correction\r\n+ --p_value_cutoff $advanced_options.correction_select.pcutoff\r\n+ #end if\r\n+ #if str( $advanced_options.blacklistselect.advanced_blackoptions_selector) == "advanced"\r\n+ -B $advanced_options.blacklistselect.blacklist\r\n+ #end if\r\n+ #if str( $advanced_options.whitelistselect.advanced_whiteoptions_selector) == "advanced"\r\n+ -W $advanced_options.whitelistselect.whitelist\r\n+ #end if\r\n+ #if str( $advanced_options.bootstrapresampling.advanced_bootoptions_selector) == "advanced"\r\n+ --bootstrap $advanced_options.bootstrapresampling.bootstrap\r\n+ --bootstrap_reps $advanced_options.bootstrapresampling.bootstrapreps\r\n+ #end if\r\n+#end if\r\n+\r\n+## output files\r\n+--ss $sumstatssum\r\n+--s $sumstats\r\n+--fst_output $outfst\r\n+\r\n+## output section\r\n+#if $options_output.vcf\r\n+--vcf\r\n+--ov $outvcf \r\n+#end if\r\n+#if $options_output.phylip\r\n+--phylip\r\n+--op $outphylip\r\n+#end if\r\n+#if $options_output.phylip\r\n+--phylip_var\r\n+--ol $outphyliplog \r\n+#end if\r\n+#if $options_output.fasta\r\n+--fasta\r\n+--of $outfasta \r\n+#end if\r\n+#if $options_output.structure\r\n+--structure\r\n+--os $outstructure \r\n+#end if\r\n+#if $options_output.plink\r\n+--plink\r\n+--oe $outplinkped \r\n+--om=$outplinkmap\r\n+#end if\r\n+#if $options_output.phase\r\n+--phase\r\n+--phase_output $outphase \r\n+#end if\r\n+#if $options_output.beagle\r\n+--beagle\r\n+--unphased_output $outbeagle \r\n+#end if\r\n+--markers_output $outmarkers\r\n+#if $options_output.genepop\r\n+--genepop\r\n+--og=$outgenepop \r\n+#end if\r\n+#if $options_output.write_single_snp\r\n+--write_single_snp\r\n+#end if\r\n+--logfile $output\r\n+\r\n+</command>\r\n+\r\n+<inputs>\r\n+\t<param name="STACKS_archive" format="zip,tar.gz" type="data" label="Archive from STACKS pipeline regrouping all outputs" />\r\n+\t<param name="batchid" type="integer" value="1" label="Batch ID" help="Batch ID to examine when exporting from the catalog" />\r\n+\t<param name="popmap" type="data" format="tabular,txt" label="Specify a population map" help="specify a population map" />\r\n+\t<section name="options_output" title="Output options" expanded="False">\r\n+\t\t<param name="vcf" type="boolean" checked="false" default="false" label="output results in Variant Call Format (VCF)" />\r\n+\t\t<param name="genepop" type="boolean" checked="false" default="false" label="output results in GenePop Format" />\r\n+\t\t<param name="structure" type="boolean" checked="false" default="false" label="output results in Structure Format" />\r\n+\t\t<param name="fasta" type="boolean" checked="false" default="false" label="output full sequence for each allele, from each sample locus in FASTA format" />\r\n+\t\t<param name="phase" type="boolean" checked="false" default="false" label="output genotypes in PHASE/fastPHASE format" />\r\n+\t\t<param name="beagle" type="boolean" checked="false" default="false" label="output genotypes in Beagle format" />\r\n+\t\t<param name="plink" type="boolean" checked="false" default="false" label="output genotypes in PLINK format" />\r\n+\t\t<param name="phylip" type="boolean" checked="false" default="false" label="output '..b'ection.\r\n+\tSmoothed FST \t\t\tA weighted average of FST depending on the surrounding 3s of sequence in both directions.\r\n+\tSmoothed FST P-value \t\tIf bootstrap resampling is enabled, a p-value ranking the significance of FST within this pair of populations.\r\n+\r\n+\r\n+Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki <https://www.e-biogenouest.org/wiki/ManArchiveGalaxy>`_ .\r\n+\r\n+--------\r\n+\r\n+**Output type:**\r\n+\r\n+- Output type details::\r\n+\r\n+\tNo compression \t\t\tAll files will be added in the current history.\r\n+\tCompressed by categories\tFiles will be compressed by categories (snps, allele, matches and tags) into 4 zip archives. These archives and batch files will be added in the current history.\r\n+\tCompressed all outputs \t\tAll files will be compressed in an unique zip archive. Batch files will be added in the current history with the archive.\r\n+\r\n+\r\n+--------\r\n+\r\n+**Project links:**\r\n+\r\n+`STACKS website <http://creskolab.uoregon.edu/stacks/>`_ .\r\n+\r\n+`STACKS manual <http://creskolab.uoregon.edu/stacks/stacks_manual.pdf>`_ .\r\n+\r\n+`STACKS google group <https://groups.google.com/forum/#!forum/stacks-users>`_ .\r\n+\r\n+--------\r\n+\r\n+**References:**\r\n+\r\n+-J. Catchen, P. Hohenlohe, S. Bassham, A. Amores, and W. Cresko. Stacks: an analysis tool set for population genomics. Molecular Ecology. 2013.\r\n+\r\n+-J. Catchen, S. Bassham, T. Wilson, M. Currey, C. O\'Brien, Q. Yeates, and W. Cresko. The population structure and recent colonization history of Oregon threespine stickleback determined using restriction-site associated DNA-sequencing. Molecular Ecology. 2013.\r\n+\r\n+-J. Catchen, A. Amores, P. Hohenlohe, W. Cresko, and J. Postlethwait. Stacks: building and genotyping loci de novo from short-read sequences. G3: Genes, Genomes, Genetics, 1:171-182, 2011.\r\n+\r\n+-A. Amores, J. Catchen, A. Ferrara, Q. Fontenot and J. Postlethwait. Genome evolution and meiotic maps by massively parallel DNA sequencing: Spotted gar, an outgroup for the teleost genome duplication. Genetics, 188:799\'808, 2011.\r\n+\r\n+-P. Hohenlohe, S. Amish, J. Catchen, F. Allendorf, G. Luikart. RAD sequencing identifies thousands of SNPs for assessing hybridization between rainbow trout and westslope cutthroat trout. Molecular Ecology Resources, 11(s1):117-122, 2011.\r\n+\r\n+-K. Emerson, C. Merz, J. Catchen, P. Hohenlohe, W. Cresko, W. Bradshaw, C. Holzapfel. Resolving postglacial phylogeography using high-throughput sequencing. Proceedings of the National Academy of Science, 107(37):16196-200, 2010.\r\n+\r\n+--------\r\n+\r\n+**Integrated by:**\r\n+\r\n+Yvan Le Bras and Cyril Monjeaud \r\n+\r\n+GenOuest Bio-informatics Core Facility\r\n+\r\n+UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France)\r\n+\r\n+support@genouest.org\r\n+\r\n+If you use this tool in Galaxy, please cite :\r\n+\r\n+`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. <https://www.e-biogenouest.org/resources/128>`_\r\n+\r\n+\r\n+</help>\r\n+<citations>\r\n+ <citation type="doi">10.1111/mec.12354</citation>\r\n+ <citation type="doi">10.1111/mec.12330</citation>\r\n+ <citation type="doi">10.1534/g3.111.000240</citation>\r\n+ <citation type="doi">10.1534/genetics.111.127324</citation>\r\n+ <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation>\r\n+ <citation type="doi">10.1073/pnas.1006538107</citation>\r\n+\r\n+ <citation type="bibtex">@INPROCEEDINGS{JOBIM2013,\r\n+ author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},\r\n+ title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},\r\n+ booktitle = {JOBIM 2013 Proceedings},\r\n+ year = {2013},\r\n+ url = {https://www.e-biogenouest.org/resources/128},\r\n+ pages = {97-106}\r\n+ }</citation>\r\n+</citations>\r\n+</tool>\r\n+\r\n' |
b |
diff -r 000000000000 -r d6ba40f6c824 STACKS_prepare_population_map.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/STACKS_prepare_population_map.py Mon Aug 24 09:29:12 2015 +0000 |
b |
@@ -0,0 +1,74 @@ +#!/usr/bin/env python + +import sys, re +import os +import tempfile +import shutil, subprocess, glob +import optparse +from os.path import basename +import zipfile, tarfile, gzip +from galaxy.datatypes.checkers import * +from stacks import * + +""" + +Created by Cyril Monjeaud +Cyril.Monjeaud@irisa.fr + +Last modifications : 01/10/2014 + +WARNING : + +STACKS_denovomap.py needs: + +- STACKS scripts in your $PATH + +These scripts are available after compiling the sources of STACKS : + +http://creskolab.uoregon.edu/stacks/ + +or with the galaxy_stacks package in the Genouest toolshed (http://toolshed.genouest.org) + +""" +def __main__(): + + # arguments recuperation + parser = optparse.OptionParser() + parser.add_option("-f") + parser.add_option("-s") + parser.add_option("-t") + parser.add_option("-o") + parser.add_option("-d") + (options, args) = parser.parse_args() + + # create the working dir + tmp_dir = tempfile.mkdtemp(dir=options.d) + + print tmp_dir + #os.chdir(tmp_dir) + + # parse config files + tab_fq_files=galaxy_config_to_tabfiles_for_STACKS(options.f) + + # check if zipped files are into the tab and change tab content + extract_compress_files_from_tabfiles(tab_fq_files, tmp_dir) + + # generate population map for denovo map + if not options.s: + generate_popmap_for_denovo(tab_fq_files, options.t, options.o) + else: + # parse config files + tab_sam_files=galaxy_config_to_tabfiles_for_STACKS(options.s) + extract_compress_files_from_tabfiles(tab_sam_files, tmp_dir) + generate_popmap_for_refmap(tab_fq_files, tab_sam_files, options.t, options.o) + + + #clean up temp files + shutil.rmtree( tmp_dir ) + + + + + + +if __name__ == "__main__": __main__() |
b |
diff -r 000000000000 -r d6ba40f6c824 STACKS_prepare_population_map.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/STACKS_prepare_population_map.xml Mon Aug 24 09:29:12 2015 +0000 |
b |
@@ -0,0 +1,168 @@ +<tool id="STACKSpreparepopmap" name="STACKS : Prepare population map file " > + <description>for STACKS denovomap and refmap</description> + +<configfiles> +<configfile name="fastq_files"> +#for $input in $fastq_file: +${input.display_name}::${input} +#end for +</configfile> +<configfile name="sam_files"> +#if str( $options_target.options_target_selector ) == "refmap": +#for $input in $options_target.sam_file: +${input.display_name}::${input} +#end for +#end if +</configfile> +</configfiles> + +<command interpreter="python"> +STACKS_prepare_population_map.py +-f $fastq_files +#if str( $options_target.options_target_selector ) == "refmap": +-s $sam_files +#end if +-t $info_file +-o $output +-d $__new_file_path__ +</command> + +<inputs> +<conditional name="options_target"> + <param name="options_target_selector" type="select" label="Select your target"> + <option value="denovo" selected="true">STACKS De Novo map</option> + <option value="refmap">STACKS Reference map</option> + </param> + <when value="denovo"> + </when> + <when value="refmap"> + <param name="sam_file" format="sam,zip,tar.gz" type="data" multiple="true" label="SAM files generated by your alignment" help="SAM/ZIP/TAR.GZ files." /> + </when> + +</conditional> + <param name="fastq_file" format="fastq,fasta,zip,tar.gz" type="data" multiple="true" label="Fastq files generated by STACKS : Process radtags tool" help="FASTQ/FASTA/ZIP/TAR.GZ files." /> + <param name="info_file" format="tabular,txt" type="data" label="File with population information" help="File looks like : barcode TAB population " /> + + +</inputs> +<outputs> + + <data format="tabular" name="output" label="population_map.txt with ${tool.name} on ${on_string}" /> + +</outputs> +<help> + +.. class:: infomark + +**What it does** + +This program will prepare a population map dataset from a 2 columns file containing relation between barcode and population. + +-------- + +**Created by:** + +Stacks was developed by Julian Catchen with contributions from Angel Amores, Paul Hohenlohe, and Bill Cresko + +-------- + +**Example:** + +Input files: + +- FASTQ, FASTA, zip, tar.gz + + +- File with population informations: + +This file must have exactly 2 columns, separated by a tab, the first with barcode, second with population name or ID :: + + CGATA pop1 + CGGCG pop1 + GAAGC pop1 + GAGAT pop1 + CGATA pop2 + CGGCG pop2 + GAAGC pop2 + GAGAT pop2 + + +Output file: + +- Population map:: + + indv_01 1 + indv_02 1 + indv_03 1 + indv_04 2 + indv_05 2 + indv_06 2 + +WARNING : the file name in the population map output may be different from the history file name. Don't worry about this, it's safe. + + +Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki <https://www.e-biogenouest.org/wiki/ManArchiveGalaxy>`_ . + +-------- + +**Project links:** + +`STACKS website <http://creskolab.uoregon.edu/stacks/>`_ . + +`STACKS manual <http://creskolab.uoregon.edu/stacks/stacks_manual.pdf>`_ . + +`STACKS google group <https://groups.google.com/forum/#!forum/stacks-users>`_ . + +-------- + +**References:** + +-J. Catchen, P. Hohenlohe, S. Bassham, A. Amores, and W. Cresko. Stacks: an analysis tool set for population genomics. Molecular Ecology. 2013. + +-J. Catchen, S. Bassham, T. Wilson, M. Currey, C. O'Brien, Q. Yeates, and W. Cresko. The population structure and recent colonization history of Oregon threespine stickleback determined using restriction-site associated DNA-sequencing. Molecular Ecology. 2013. + +-J. Catchen, A. Amores, P. Hohenlohe, W. Cresko, and J. Postlethwait. Stacks: building and genotyping loci de novo from short-read sequences. G3: Genes, Genomes, Genetics, 1:171-182, 2011. + +-A. Amores, J. Catchen, A. Ferrara, Q. Fontenot and J. Postlethwait. Genome evolution and meiotic maps by massively parallel DNA sequencing: Spotted gar, an outgroup for the teleost genome duplication. Genetics, 188:799'808, 2011. + +-P. Hohenlohe, S. Amish, J. Catchen, F. Allendorf, G. Luikart. RAD sequencing identifies thousands of SNPs for assessing hybridization between rainbow trout and westslope cutthroat trout. Molecular Ecology Resources, 11(s1):117-122, 2011. + +-K. Emerson, C. Merz, J. Catchen, P. Hohenlohe, W. Cresko, W. Bradshaw, C. Holzapfel. Resolving postglacial phylogeography using high-throughput sequencing. Proceedings of the National Academy of Science, 107(37):16196-200, 2010. + +-------- + +**Integrated by:** + +Yvan Le Bras and Cyril Monjeaud + +GenOuest Bio-informatics Core Facility + +UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France) + +support@genouest.org + +If you use this tool in Galaxy, please cite : + +`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. <https://www.e-biogenouest.org/resources/128>`_ + + +</help> +<citations> + <citation type="doi">10.1111/mec.12354</citation> + <citation type="doi">10.1111/mec.12330</citation> + <citation type="doi">10.1534/g3.111.000240</citation> + <citation type="doi">10.1534/genetics.111.127324</citation> + <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation> + <citation type="doi">10.1073/pnas.1006538107</citation> + + <citation type="bibtex">@INPROCEEDINGS{JOBIM2013, + author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.}, + title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France}, + booktitle = {JOBIM 2013 Proceedings}, + year = {2013}, + url = {https://www.e-biogenouest.org/resources/128}, + pages = {97-106} + }</citation> +</citations> +</tool> + |
b |
diff -r 000000000000 -r d6ba40f6c824 STACKS_procrad.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/STACKS_procrad.py Mon Aug 24 09:29:12 2015 +0000 |
[ |
@@ -0,0 +1,177 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import sys +import os +import re +import tempfile +import subprocess +import glob +import shutil +import argparse +from os.path import basename +import zipfile +import tarfile +import gzip +from stacks import * + + +def __main__(): + + # arguments recuperation + + parser = argparse.ArgumentParser() + parser.add_argument('--input_type') + parser.add_argument('--input_enzyme') + parser.add_argument('--input_single') + parser.add_argument('--input_paired1') + parser.add_argument('--input_paired2') + parser.add_argument('--inputype') + parser.add_argument('--sample_name') + parser.add_argument('--barcode') + parser.add_argument('--output_choice') + parser.add_argument('--output_archive') + parser.add_argument('--enzyme1') + parser.add_argument('--enzyme2') + parser.add_argument('--outype') + parser.add_argument('--qualitenc') + parser.add_argument('-D', action='store_true') + parser.add_argument('-t') + parser.add_argument('-q', action='store_true') + parser.add_argument('--activate_advanced_options') + parser.add_argument('-r', action='store_true') + parser.add_argument('-w', default='0.15') + parser.add_argument('-s', default='10') + parser.add_argument('-c', action='store_true') + parser.add_argument('--inline_null', action='store_true') + parser.add_argument('--index_null', action='store_true') + parser.add_argument('--inline_inline', action='store_true') + parser.add_argument('--index_index', action='store_true') + parser.add_argument('--inline_index', action='store_true') + parser.add_argument('--index_inline', action='store_true') + parser.add_argument('--logfile') + options = parser.parse_args() + + # create the working dir + os.mkdir('inputs') + os.mkdir('job_outputs') + os.mkdir('galaxy_outputs') + + cmd_line = [] + cmd_line.append('process_radtags') + cmd_line.extend(['-p', 'inputs']) + cmd_line.extend(['-i', options.inputype]) + cmd_line.extend(['-b', options.barcode]) + + # parse config files and create symlink into the temp dir + + if options.input_type == 'single': + + # load the config file + input_single = options.input_single + + # parse the input_file to extract filenames and filepaths + tab_files = galaxy_config_to_tabfiles(input_single) + + # create symlink into the temp dir + create_symlinks_from_tabfiles(tab_files, 'inputs') + else: + + # load config files + input_paired1 = options.input_paired1 + input_paired2 = options.input_paired2 + + # parse the input_file to extract filenames and filepaths + + tab_files_paired1 = galaxy_config_to_tabfiles(input_paired1) + tab_files_paired2 = galaxy_config_to_tabfiles(input_paired2) + + # create symlinks into the temp dir + + create_symlinks_from_tabfiles(tab_files_paired1, 'inputs') + create_symlinks_from_tabfiles(tab_files_paired2, 'inputs') + + cmd_line.append('-P') + + # test nb enzyme + if options.input_enzyme == '1': + cmd_line.extend(['-e', options.enzyme1]) + + if options.input_enzyme == '2': + cmd_line.extend(['---renz_1', options.enzyme1, '--renz_2', options.enzyme2]) + + # quality + cmd_line.extend(['-E', options.qualitenc]) + + # outputs + cmd_line.extend(['-o', 'job_outputs/']) + cmd_line.extend(['-y', options.outype]) + + # test capture discards + if options.D: + cmd_line.append('-D') + + # optional options + if options.activate_advanced_options == "true": + + if options.q: + cmd_line.append('-q') + if options.r: + cmd_line.append('-r') + + cmd_line.extend(['-w', options.w, '-s', options.s]) + + if options.c: + cmd_line.append('-c') + if options.t != '-1': + cmd_line.extend(['-t', options.t]) + if options.inline_null: + cmd_line.append('--inline_null') + if options.index_null: + cmd_line.append('--index_null') + if options.inline_inline: + cmd_line.append('--inline_inline') + if options.index_index: + cmd_line.append('--index_index') + if options.inline_index: + cmd_line.append('--inline_index') + if options.index_inline: + cmd_line.append('--index_inline') + + print '[CMD_LINE] : ' + ' '.join(cmd_line) + + p = subprocess.call(cmd_line) + + # postprocesses + + try: + shutil.move('job_outputs/process_radtags.log', options.logfile) + except: + sys.stderr.write('Error in process_radtags execution; Please read the additional output (stdout)\n') + sys.exit(1) + + if options.discard_file: + discards_file_name = glob.glob('job_outputs/*.discards')[0] + shutil.move(discards_file_name, options.discard_file) + + # manage outputs names + + change_outputs_procrad_name(os.getcwd() + '/job_outputs', options.sample_name) + + # generate additional output archive file + + if options.output_choice != '1': + generate_additional_archive_file(os.getcwd() + '/job_outputs', options.output_archive) + + # if user has not choose the only zip archive + + if options.output_choice != '3': + list_files = glob.glob('job_outputs/*') + for i in list_files: + shutil.move(i, 'galaxy_outputs') + + +if __name__ == '__main__': + __main__() + + |
b |
diff -r 000000000000 -r d6ba40f6c824 STACKS_procrad.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/STACKS_procrad.xml Mon Aug 24 09:29:12 2015 +0000 |
b |
b'@@ -0,0 +1,382 @@\n+<?xml version="1.0"?>\n+<tool id="STACKSprocrad" name="STACKS : Process radtags" force_history_refresh="True" version="1.1.0">\n+<description>Run the STACKS cleaning script</description>\n+<configfiles>\n+<configfile name="input_single">\n+#if str( $options_type.options_type_selector ) == "single":\n+#for $input in $options_type.inputs_single:\n+${input.display_name}::${input}\n+#end for\n+#end if\n+</configfile>\n+<configfile name="input_paired1">\n+#if str( $options_type.options_type_selector ) == "paired":\n+#for $input in $options_type.inputs_paired1:\n+${input.display_name}::${input}\n+#end for\n+#end if\n+</configfile>\n+<configfile name="input_paired2">\n+#if str( $options_type.options_type_selector ) == "paired":\n+#for $input in $options_type.inputs_paired2:\n+${input.display_name}::${input}\n+#end for\n+#end if\n+</configfile>\n+</configfiles> \n+<requirements>\n+ <requirement type="package" version="1.18">stacks</requirement>\n+ </requirements>\n+<command interpreter="python">\n+\n+STACKS_procrad.py\n+ --input_type $options_type.options_type_selector\n+ #if str( $options_type.options_type_selector ) == "single":\n+ --input_single $input_single\n+ #else\n+ --input_paired1 $input_paired1\n+ --input_paired2 $input_paired2\n+ #end if\n+ --inputype $inputype\n+ --barcode $barcode\n+ --sample_name $sample_name\n+ --output_choice $options_output_infos_selector\n+ #if str( $options_output_infos_selector ) != "1":\n+ --output_archive $output_archive\n+ #end if\n+ --input_enzyme $options_enzyme.options_enzyme_selector\n+ --enzyme1 $options_enzyme.enzyme\n+ #if str( $options_enzyme.options_enzyme_selector ) == "2":\n+ --enzyme2 $options_enzyme.enzyme2\n+ #end if\n+ --outype $outype\n+ --qualitenc $options_quality.qualitenc\n+ #if $capture:\n+ -D\n+ #end if\n+ --activate_advanced_options $activate_advanced_options\n+ -t $options_advanced.truncate\n+ #if $options_advanced.discard:\n+ -q\n+ #end if\n+ #if $options_advanced.rescue:\n+ -r\n+ #end if\n+ -w $options_advanced.sliding\n+ -s $options_advanced.score\n+ #if $options_advanced.remove:\n+ -c\n+ #end if\n+ #if $options_advanced.inline:\n+ --inline_null\n+ #end if\n+ #if $options_advanced.index:\n+ --index_null\n+ #end if\n+ #if $options_advanced.inlinein:\n+ --inline_inline\n+ #end if\n+ #if $options_advanced.indexind:\n+ --index_index\n+ #end if\n+ #if $options_advanced.inlineind:\n+ --inline_index\n+ #end if\n+ #if $options_advanced.indexin:\n+ --index_inline\n+ #end if\n+ --logfile $output\n+\n+</command>\n+\n+<inputs>\n+\t\n+\t<conditional name="options_type">\n+\t <param name="options_type_selector" type="select" label="Single-end or paired-end reads files">\n+\t\t\t<option value="single" selected="True">Single-end files</option>\n+\t\t\t<option value="paired">Paired-end files</option>\n+\t </param>\n+\t <when value="single">\n+\t\t\t<param name="inputs_single" format="fastq,fastq.gz" type="data" multiple="true" label="singles-end reads infile(s)" help="input files" />\n+\t </when>\n+\t <when value="paired">\n+\t\t\t<param name="inputs_paired1" format="fastq,fastq.gz" type="data" multiple="true" label="paired-end reads infile(s) 1" help="Files must have this syntax : name_R1_001.fastq" />\n+\t\t\t<param name="inputs_paired2" format="fastq,fastq.gz" type="data" multiple="true" label="paired-end reads infile(s) 2" help="Files must have this syntax : name_R2_001.fastq" />\n+\t </when>\n+\t</conditional>\n+\t<param name="inputype" type="select" format="text" label="Inputs format">\n+\t\t<option value="fastq" selected="True">fastq</option>\n+\t\t<option value="gzfastq">fastq.gz</option>\n+\t\t<option value="bustard">Illumina BUSTARD</option>\n+\t</param>\n+\t<param name="barcode" type="data" format="tabular,txt" label="Barcode file" help="Barcode file" />\n+\n+\t<param name="sample_name" type="text" value="sample" label="Sample name" help="Precise the s'..b'le-digested \n+data.\n+filter adapter sequence while allowing for sequencing error in the adapter pattern.\n+process individual files or whole directories of files.\n+directly read gzipped data\n+filter reads based on Illumina\'s Chastity filter\n+\n+--------\n+\n+**Help**\n+\n+Input files:\n+\n+- FASTQ, FASTA, zip, tar.gz\n+\n+- Barcode File Format\n+\n+The barcode file is a very simple format : one barcode per line.\n+\n+\tCGATA\n+\tCGGCG\n+\tGAAGC\n+\tGAGAT\n+\tCGATA\n+\tCGGCG\n+\tGAAGC\n+\tGAGAT\n+\n+Combinatorial barcodes are specified, one per column, separated by a tab::\n+\n+\tCGATA\tACGTA\n+\tCGGCG\tCGTA\n+\tGAAGC\tCGTA\n+\tGAGAT\tCGTA\n+\tCGATA\tAGCA \n+\tCGGCG\tAGCA\n+\tGAAGC\tAGCA\n+\tGAGAT\tAGCA\n+\n+\n+Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki <https://www.e-biogenouest.org/wiki/ManArchiveGalaxy>`_ .\n+\n+--------\n+\n+\n+**Created by:**\n+\n+Stacks was developed by Julian Catchen with contributions from Angel Amores, Paul Hohenlohe, and Bill Cresko\n+\n+--------\n+\n+**Project links:**\n+\n+`STACKS website <http://creskolab.uoregon.edu/stacks/>`_ .\n+\n+`STACKS manual <http://creskolab.uoregon.edu/stacks/stacks_manual.pdf>`_ .\n+\n+`STACKS google group <https://groups.google.com/forum/#!forum/stacks-users>`_ .\n+\n+--------\n+\n+**References:**\n+\n+-J. Catchen, P. Hohenlohe, S. Bassham, A. Amores, and W. Cresko. Stacks: an analysis tool set for population genomics. Molecular Ecology. 2013.\n+\n+-J. Catchen, S. Bassham, T. Wilson, M. Currey, C. O\'Brien, Q. Yeates, and W. Cresko. The population structure and recent colonization history of Oregon threespine stickleback determined using restriction-site associated DNA-sequencing. Molecular Ecology. 2013.\n+\n+-J. Catchen, A. Amores, P. Hohenlohe, W. Cresko, and J. Postlethwait. Stacks: building and genotyping loci de novo from short-read sequences. G3: Genes, Genomes, Genetics, 1:171-182, 2011.\n+\n+-A. Amores, J. Catchen, A. Ferrara, Q. Fontenot and J. Postlethwait. Genome evolution and meiotic maps by massively parallel DNA sequencing: Spotted gar, an outgroup for the teleost genome duplication. Genetics, 188:799\'808, 2011.\n+\n+-P. Hohenlohe, S. Amish, J. Catchen, F. Allendorf, G. Luikart. RAD sequencing identifies thousands of SNPs for assessing hybridization between rainbow trout and westslope cutthroat trout. Molecular Ecology Resources, 11(s1):117-122, 2011.\n+\n+-K. Emerson, C. Merz, J. Catchen, P. Hohenlohe, W. Cresko, W. Bradshaw, C. Holzapfel. Resolving postglacial phylogeography using high-throughput sequencing. Proceedings of the National Academy of Science, 107(37):16196-200, 2010.\n+\n+--------\n+\n+**Integrated by:**\n+\n+Yvan Le Bras and Cyril Monjeaud\n+\n+GenOuest Bio-informatics Core Facility\n+\n+UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France)\n+\n+support@genouest.org\n+\n+If you use this tool in Galaxy, please cite :\n+\n+`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. <https://www.e-biogenouest.org/resources/128>`_\n+\n+\n+\n+</help>\n+<citations>\n+ <citation type="doi">10.1111/mec.12354</citation>\n+ <citation type="doi">10.1111/mec.12330</citation>\n+ <citation type="doi">10.1534/g3.111.000240</citation>\n+ <citation type="doi">10.1534/genetics.111.127324</citation>\n+ <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation>\n+ <citation type="doi">10.1073/pnas.1006538107</citation>\n+\n+ <citation type="bibtex">@INPROCEEDINGS{JOBIM2013,\n+ author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},\n+ title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},\n+ booktitle = {JOBIM 2013 Proceedings},\n+ year = {2013},\n+ url = {https://www.e-biogenouest.org/resources/128},\n+ pages = {97-106}\n+ }</citation>\n+</citations>\n+</tool>\n+\n' |
b |
diff -r 000000000000 -r d6ba40f6c824 STACKS_refmap.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/STACKS_refmap.py Mon Aug 24 09:29:12 2015 +0000 |
[ |
@@ -0,0 +1,258 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import sys +import re +import os +import tempfile +import shutil +import subprocess +import glob +import optparse +from os.path import basename +import zipfile +import tarfile +import gzip +from galaxy.datatypes.checkers import * +from stacks import * + + +def __main__(): + + # arguments recuperation + + parser = optparse.OptionParser() + parser.add_option('-p') + parser.add_option('-r') + parser.add_option('-s') + parser.add_option('-O') + parser.add_option('-n') + parser.add_option('-m') + parser.add_option('--bound_low') + parser.add_option('--bound_high') + parser.add_option('--alpha') + parser.add_option('--logfile') + parser.add_option('--compress_output') + parser.add_option('--catalogsnps') + parser.add_option('--catalogalleles') + parser.add_option('--catalogtags') + + # additionnal outputs + + parser.add_option('--total_output') + parser.add_option('--tags_output') + parser.add_option('--snps_output') + parser.add_option('--alleles_output') + parser.add_option('--matches_output') + (options, args) = parser.parse_args() + + # create working directories + + os.mkdir('inputs') + os.mkdir('job_outputs') + os.mkdir('galaxy_outputs') + + cmd_line = [] + cmd_line.append('ref_map.pl') + + # if genetic map + + if options.p: + + # parse config files + + tab_parent_files = galaxy_config_to_tabfiles_for_STACKS(options.p) + + # check if zipped files are into the tab and change tab content + + extract_compress_files_from_tabfiles(tab_parent_files, 'inputs') + + # check files extension (important to have .sam files) + + check_sam_extension_and_add(tab_parent_files, 'inputs') + + # create symlink into the temp dir + + create_symlinks_from_tabfiles(tab_parent_files, 'inputs') + + # create the command input line + + for key in tab_parent_files: + + # if is a file (skip repository created after a decompression) + + if os.path.isfile('inputs/'+key): + cmd_line.extend(['-p', os.path.normpath('inputs/'+key)]) + + # if genetic map with progeny files + + if options.r: + + # parse config files + + tab_progeny_files = galaxy_config_to_tabfiles_for_STACKS(options.r) + + # check if zipped files are into the tab and change tab content + + extract_compress_files_from_tabfiles(tab_progeny_files, 'inputs') + + # check files extension (important to have .sam files) + + check_sam_extension_and_add(tab_progeny_files, 'inputs') + + # create symlink into the temp dir + + create_symlinks_from_tabfiles(tab_progeny_files, 'inputs') + + for key in tab_progeny_files: + + # if is a file (skip repository created after a decompression) + + if os.path.isfile('inputs/' + key): + cmd_line.extend(['-r', 'inputs/' + key]) + + # parse config files and create symlink if individual files are selected + + if options.s: + + # parse config files + + tab_individual_files = galaxy_config_to_tabfiles_for_STACKS(options.s) + + # check if zipped files are into the tab and change tab content + + extract_compress_files_from_tabfiles(tab_individual_files, 'inputs') + + # check files extension (important to have .sam files) + + check_sam_extension_and_add(tab_individual_files, 'inputs') + + # create symlink into the temp dir + + create_symlinks_from_tabfiles(tab_individual_files, 'inputs') + + # create the command input line + + for key in tab_individual_files: + cmd_line.extend(['-s', 'inputs/' + key]) + + # create the options command line + + cmd_line.extend([ + '-S', + '-b', '1', + '-T', '4', + '-o', 'job_outputs', + '-n', options.n, + '-m', options.m, + ]) + + if options.O: + cmd_line.extend(['-O', options.O]) + + if options.bound_low: + cmd_line.extend(['--bound_low', options.bound_low]) + + if options.bound_high: + cmd_line.extend(['--bound_high', options.bound_high]) + + if options.alpha: + cmd_line.extend(['--alpha', options.alpha]) + + # execute job + + print '[COMMAND LINE]' + ' '.join(cmd_line) + + p = subprocess.Popen(cmd_line, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + (stdoutput, stderror) = p.communicate() + + print stdoutput + print stderror + + # postprocesses + + try: + shutil.move('job_outputs/ref_map.log', options.logfile) + except: + sys.stderr.write('Error in ref_map execution; Please read the additional output (stdout)\n') + + # go inside the outputs dir + + os.chdir('job_outputs') + + # move files + + for i in glob.glob('*'): + if re.search('catalog.snps.tsv$', i): + shutil.copy(i, options.catalogsnps) + if re.search('catalog.alleles.tsv$', i): + shutil.copy(i, options.catalogalleles) + if re.search('catalog.tags.tsv$', i): + shutil.copy(i, options.catalogtags) + + # copy all files inside tmp_dir into workdir + + list_files = glob.glob('*') + + # if compress output is total + + if options.compress_output == 'total': + + mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w', + allowZip64=True) + + for i in list_files: + + mytotalzipfile.write(os.path.basename(i)) + + # return the unique archive + + shutil.move('total.zip.temp', options.total_output) + elif options.compress_output == 'categories': + + # if compress output is by categories + + mytagszip = zipfile.ZipFile('tags.zip.temp', 'w', allowZip64=True) + mysnpszip = zipfile.ZipFile('snps.zip.temp', 'w', allowZip64=True) + myalleleszip = zipfile.ZipFile('alleles.zip.temp', 'w', allowZip64=True) + mymatcheszip = zipfile.ZipFile('matches.zip.temp', 'w', allowZip64=True) + + for i in list_files: + + # for each type of files + + if re.search("tags\.tsv$", i) and not re.search('batch', i): + mytagszip.write(os.path.basename(i)) + os.remove(i) + elif re.search("snps\.tsv$", i) and not re.search('batch', i): + mysnpszip.write(os.path.basename(i)) + os.remove(i) + elif re.search("alleles\.tsv$", i) and not re.search('batch', i): + myalleleszip.write(os.path.basename(i)) + os.remove(i) + elif re.search("matches\.tsv$", i) and not re.search('batch', i): + mymatcheszip.write(os.path.basename(i)) + os.remove(i) + else: + shutil.move(os.path.basename(i), '../galaxy_outputs') + + # return archives.... + + shutil.move('tags.zip.temp', options.tags_output) + shutil.move('snps.zip.temp', options.snps_output) + shutil.move('alleles.zip.temp', options.alleles_output) + shutil.move('matches.zip.temp', options.matches_output) + else: + + # else no compression + + for i in list_files: + shutil.move(os.path.basename(i), '../galaxy_outputs') + + +if __name__ == '__main__': + __main__() + + |
b |
diff -r 000000000000 -r d6ba40f6c824 STACKS_refmap.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/STACKS_refmap.xml Mon Aug 24 09:29:12 2015 +0000 |
b |
b'@@ -0,0 +1,347 @@\n+<tool id="STACKSrefmap" name="STACKS : Reference map" force_history_refresh="True">\r\n+ <description>Run the STACKS ref_map.pl wrapper</description>\r\n+\r\n+<configfiles>\r\n+<configfile name="parent_sequences">\r\n+#if str( $options_usage.options_usage_selector ) == "genetic"\r\n+#for $input in $options_usage.parent_sequence:\r\n+${input.display_name}::${input}\r\n+#end for\r\n+#end if\r\n+</configfile>\r\n+<configfile name="progeny_sequences">\r\n+#if str( $options_usage.options_usage_selector ) == "genetic" and str( $options_usage.options_progeny.options_progeny_selector ) == "yes"\r\n+#for $input in $options_usage.options_progeny.progeny_sequence:\r\n+${input.display_name}::${input}\r\n+#end for\r\n+#end if\r\n+</configfile>\r\n+<configfile name="individual_samples">\r\n+#if str( $options_usage.options_usage_selector ) == "population"\r\n+#for $input in $options_usage.individual_sample:\r\n+${input.display_name}::${input}\r\n+#end for\r\n+#end if\r\n+</configfile>\r\n+</configfiles> \r\n+<requirements>\r\n+ <requirement type="package" version="1.18">stacks</requirement>\r\n+</requirements>\r\n+\r\n+<command interpreter="python">\r\n+STACKS_refmap.py\r\n+#if str( $options_usage.options_usage_selector ) == "genetic"\r\n+\t-p $parent_sequences\r\n+\t#if str( $options_usage.options_progeny.options_progeny_selector ) == "yes"\r\n+\t\t-r $progeny_sequences\r\n+\t#end if\r\n+#else\r\n+\t-s $individual_samples\r\n+\t#if str( $options_usage.options_popmap.popmap_selector) == "yes"\r\n+\t\t-O $options_usage.options_popmap.popmap\r\n+\t#end if\r\n+#end if\r\n+-n $mismatchbetlocibuild\r\n+-m $mincov\r\n+--bound_low $snp_options.boundlow\r\n+--bound_high $snp_options.boundhigh\r\n+--alpha $snp_options.alpha\r\n+--catalogsnps $catalogsnps\r\n+--catalogalleles $catalogalleles\r\n+--catalogtags $catalogtags\r\n+--logfile $output\r\n+--compress_output $output_compress\r\n+##additionnal outputs\r\n+--total_output $total_output\r\n+--tags_output $tags_output\r\n+--snps_output $snps_output\r\n+--alleles_output $alleles_output\r\n+--matches_output $matches_output\r\n+\r\n+</command>\r\n+\r\n+<inputs>\r\n+\t<conditional name="options_usage">\r\n+\t\t<param name="options_usage_selector" type="select" label="Select your usage">\r\n+\t\t\t<option value="genetic" selected="true">Genetic map</option>\r\n+\t\t\t<option value="population">Population</option>\r\n+\t\t</param>\r\n+\t\t<when value="genetic">\r\n+\t\t\t<param name="parent_sequence" format="sam,zip,tar.gz" type="data" multiple="true" label="Files containing parent sequences" help="SAM/ZIP/TAR.GZ files" />\r\n+\t\t\t<conditional name="options_progeny">\r\n+\t\t\t<param name="options_progeny_selector" type="select" label="Use progeny files">\r\n+\t\t\t\t<option value="yes" selected="true">Yes</option>\r\n+\t\t\t\t<option value="no">No</option>\r\n+\t\t\t</param>\r\n+\t\t\t<when value="yes">\r\n+\t\t\t\t<param name="progeny_sequence" format="sam,zip,tar.gz" type="data" multiple="true" label="Files containing progeny sequences" help="SAM/ZIP/TAR.GZ files containing progeny sequences from a mapping cross" />\r\n+\t\t\t</when>\r\n+\t\t\t<when value="no">\r\n+\t\t\t</when>\r\n+\t\t\t</conditional>\r\n+\r\n+\t\t</when>\r\n+\t\t<when value="population">\r\n+\t\t\t<param name="individual_sample" format="sam,zip,tar.gz" type="data" multiple="true" label="Files containing an individual sample from a population" help="SAM/ZIP/TAR.GZ files." />\r\n+\t\t\t<conditional name="options_popmap">\r\n+\t\t\t\t<param name="popmap_selector" type="select" label="Analyzing one or more populations?" >\r\n+\t\t\t\t\t<option value="no" selected="true">No</option>\t\t\r\n+\t\t\t\t\t<option value="yes">Yes</option>\r\n+\t\t\t\t</param>\r\n+\t\t\t\t<when value="no"></when>\r\n+\t\t\t\t<when value="yes">\r\n+\t\t\t\t\t<param name="popmap" type="data" format="tabular,txt" label="Specify a population map" help="If analyzing one or more populations, specify a population map" />\r\n+\t\t\t\t</when>\r\n+\t\t\t</conditional>\r\n+\t\t</when>\r\n+\t</conditional>\r\n+\t\r\n+\t<param name="mismatchbetlocibuild" type="integer" value="0" label="specify the number of mismatches allowed between loci when building the catalog" />\r\n+\t<param name="mincov" type="integer" value="1" label="Minimum depth of coverage" hel'..b'ct Test.\r\n+\tOdds Ratio \t\t\tFisher\'s Exact Test odds ratio\r\n+\tCI High \t\t\tFisher\'s Exact Test confidence interval.\r\n+\tCI Low \t\t\t\tFisher\'s Exact Test confidence interval.\r\n+\tLOD Score \t\t\tLogarithm of odds score.\r\n+\tExpected Heterozygosity\t\tHeterozygosity expected under Hardy-Weinberg equilibrium.\r\n+\tExpected Homozygosity \t\tHomozygosity expected under Hardy-Weinberg equilibrium.\r\n+\tCorrected FST \t\t\tFST with either the FET p-value, or a window-size or genome size Bonferroni correction.\r\n+\tSmoothed FST \t\t\tA weighted average of FST depending on the surrounding 3s of sequence in both directions.\r\n+\tSmoothed FST P-value \t\tIf bootstrap resampling is enabled, a p-value ranking the significance of FST within this pair of populations.\r\n+\r\n+\r\n+Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki <https://www.e-biogenouest.org/wiki/ManArchiveGalaxy>`_ .\r\n+\r\n+\r\n+--------\r\n+\r\n+**Project links:**\r\n+\r\n+`STACKS website <http://creskolab.uoregon.edu/stacks/>`_ .\r\n+\r\n+`STACKS manual <http://creskolab.uoregon.edu/stacks/stacks_manual.pdf>`_ .\r\n+\r\n+`STACKS google group <https://groups.google.com/forum/#!forum/stacks-users>`_ .\r\n+\r\n+--------\r\n+\r\n+**References:**\r\n+\r\n+-J. Catchen, P. Hohenlohe, S. Bassham, A. Amores, and W. Cresko. Stacks: an analysis tool set for population genomics. Molecular Ecology. 2013.\r\n+\r\n+-J. Catchen, S. Bassham, T. Wilson, M. Currey, C. O\'Brien, Q. Yeates, and W. Cresko. The population structure and recent colonization history of Oregon threespine stickleback determined using restriction-site associated DNA-sequencing. Molecular Ecology. 2013.\r\n+\r\n+-J. Catchen, A. Amores, P. Hohenlohe, W. Cresko, and J. Postlethwait. Stacks: building and genotyping loci de novo from short-read sequences. G3: Genes, Genomes, Genetics, 1:171-182, 2011.\r\n+\r\n+-A. Amores, J. Catchen, A. Ferrara, Q. Fontenot and J. Postlethwait. Genome evolution and meiotic maps by massively parallel DNA sequencing: Spotted gar, an outgroup for the teleost genome duplication. Genetics, 188:799\'808, 2011.\r\n+\r\n+-P. Hohenlohe, S. Amish, J. Catchen, F. Allendorf, G. Luikart. RAD sequencing identifies thousands of SNPs for assessing hybridization between rainbow trout and westslope cutthroat trout. Molecular Ecology Resources, 11(s1):117-122, 2011.\r\n+\r\n+-K. Emerson, C. Merz, J. Catchen, P. Hohenlohe, W. Cresko, W. Bradshaw, C. Holzapfel. Resolving postglacial phylogeography using high-throughput sequencing. Proceedings of the National Academy of Science, 107(37):16196-200, 2010.\r\n+\r\n+--------\r\n+\r\n+**Integrated by:**\r\n+\r\n+Yvan Le Bras and Cyril Monjeaud \r\n+\r\n+GenOuest Bio-informatics Core Facility\r\n+\r\n+UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France)\r\n+\r\n+support@genouest.org\r\n+\r\n+If you use this tool in Galaxy, please cite :\r\n+\r\n+`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. <https://www.e-biogenouest.org/resources/128>`_\r\n+\r\n+\r\n+</help>\r\n+<citations>\r\n+ <citation type="doi">10.1111/mec.12354</citation>\r\n+ <citation type="doi">10.1111/mec.12330</citation>\r\n+ <citation type="doi">10.1534/g3.111.000240</citation>\r\n+ <citation type="doi">10.1534/genetics.111.127324</citation>\r\n+ <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation>\r\n+ <citation type="doi">10.1073/pnas.1006538107</citation>\r\n+\r\n+ <citation type="bibtex">@INPROCEEDINGS{JOBIM2013,\r\n+ author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},\r\n+ title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},\r\n+ booktitle = {JOBIM 2013 Proceedings},\r\n+ year = {2013},\r\n+ url = {https://www.e-biogenouest.org/resources/128},\r\n+ pages = {97-106}\r\n+ }</citation>\r\n+</citations>\r\n+</tool>\r\n+\r\n' |
b |
diff -r 000000000000 -r d6ba40f6c824 STACKS_sort_read_pairs.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/STACKS_sort_read_pairs.py Mon Aug 24 09:29:12 2015 +0000 |
[ |
@@ -0,0 +1,109 @@ +#!/usr/bin/env python + +import sys, re +import os +import tempfile +import shutil, subprocess, glob +import optparse +from os.path import basename +import zipfile, tarfile, gzip +from galaxy.datatypes.checkers import * +from stacks import * + +""" + +Created by Yvan Le Bras +yvan.le_bras@irisa.fr + +Last modifications : 02/17/2014 + +WARNING : + +STACKS_sort_read_pairs.py needs: + +- STACKS scripts in your $PATH + +These scripts are available after compiling the sources of STACKS : + +http://creskolab.uoregon.edu/stacks/ + +or with the galaxy_stacks package in the Genouest toolshed + + +""" + +def __main__(): + + + # create the working dir + os.mkdir("sort_read_outputs") + os.mkdir("assembly_outputs") + os.mkdir("samples_inputs") + os.mkdir("stacks_inputs") + + # arguments recuperation + parser = optparse.OptionParser() + parser.add_option("-a") + parser.add_option("-e") + parser.add_option("-b") + parser.add_option("-c") + parser.add_option("-d") + parser.add_option("-o") + (options, args) = parser.parse_args() + + # edit the command line + cmd_line1 = ["sort_read_pairs.pl"] + + #parse config files and create symlink if individual files are selected + + # STACKS_archive + # check if zipped files are into the tab + extract_compress_files(options.a, os.getcwd()+"/stacks_inputs") + + # samples_archive + # check if zipped files are into the tab and change tab content + extract_compress_files(options.e, os.getcwd()+"/samples_inputs") + + # create the sort_read_pairs command input line + cmd_line1.extend(["-p", "stacks_inputs", "-s", "samples_inputs", "-o", "sort_read_outputs"]) + + if options.b: + cmd_line1.extend(["-w", options.b]) + if options.c: + cmd_line1.extend(["-r", options.c]) + + # exec command line 1 + p1 = subprocess.Popen(cmd_line1) + p1.communicate() + + # parse all files list and remove empty files from the output dir + for fasta_file in glob.glob("sort_read_outputs/*"): + if os.stat(fasta_file).st_size==0: + print "File "+fasta_file+" is empty" + os.remove(fasta_file) + + + # create the exec_velvet command input line + cmd_line2 = ["exec_velvet.pl"] + cmd_line2.extend(["-s", "sort_read_outputs", "-o", "assembly_outputs"]) + cmd_line2.append("-c") + + if options.d: + cmd_line2.extend(["-M", options.d]) + + # version + #cmd = 'sort_read_pairs.pl'+cmd_files+" "+cmd_options+" 2>&1" + #cmd2 = 'exec_velvet.pl'+cmd_files2+" -c -e /softs/local/velvet/velvet_1.2.03/ "+cmd_options2+" 2>&1" + + # launch the command line 2 + p2 = subprocess.Popen(cmd_line2) + p2.communicate() + + # get collated.fa file + try: + shutil.copy("assembly_outputs/collated.fa", options.o) + except: + print "No result file" + sys.exit(1) + +if __name__ == "__main__": __main__() |
b |
diff -r 000000000000 -r d6ba40f6c824 STACKS_sort_read_pairs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/STACKS_sort_read_pairs.xml Mon Aug 24 09:29:12 2015 +0000 |
b |
@@ -0,0 +1,164 @@ +<tool id="STACKSassembleperead" name="STACKS : assemble read pairs by locus" version="1.1.1"> + <description>Run the STACKS sort_read_pairs.pl and exec_velvet.pl wrappers</description> + +<requirements> + <requirement type="package" version="1.18">stacks</requirement> + <requirement type="package" version="1.2.10">velvet</requirement> + </requirements> +<command interpreter="python"> + +STACKS_sort_read_pairs.py +-a $STACKS_archive +-e $samples_archive +#if str( $options_whitelist.whitelist_selector) == "yes" +-b $whitelist +#end if +#if str( $options_filter.reads_selector) == "yes" +-c $options_filter.threshold +#end if +#if str( $options_filter2.length_selector) == "yes" +-d $options_filter2.threshold2 +#end if +-o $output + + +</command> + +<inputs> +<param name="STACKS_archive" format="zip,tar.gz" type="data" label="Archive from STACKS pipeline regrouping all outputs" /> + +<param name="samples_archive" format="zip,fastq.gz,tar.gz,tar.bz2" type="data" label="Archive with raw reads used to execute previous STACKS pipeline" /> + + <conditional name="options_whitelist"> +<param name="whitelist_selector" type="select" label="Have you got a whitelist?" > +<option value="no" selected="true">No</option> +<option value="yes">Yes</option> +</param> +<when value="no"></when> +<when value="yes"> +<param name="whitelist" format="txt, tabular" type="data" label="Whitelist file containing loci that we want to assemble: those that have SNPs" /> +</when> +</conditional> + + +<conditional name="options_filter"> +<param name="reads_selector" type="select" label="Specify a treshold for the minimum number of reads by locus?" > +<option value="no" selected="true">No</option> +<option value="yes">Yes</option> +</param> +<when value="no"></when> +<when value="yes"> +<param name="threshold" type="integer" value="10" label="Minimum number of reads by locus"/> +</when> +</conditional> +<conditional name="options_filter2"> +<param name="length_selector" type="select" label="Specify a minimum length for asssembled contigs?" > +<option value="no" selected="true">No</option> +<option value="yes">Yes</option> +</param> +<when value="no"></when> +<when value="yes"> +<param name="threshold2" type="integer" value="200" label="Minimum length for asssembled contigs"/> +</when> +</conditional> + +</inputs> +<outputs> + <data format="fasta" name="output" label="collated.fa : ${tool.name} on ${on_string}" /> +</outputs> +<stdio> + <exit_code range="1" level="fatal" description="Error" /> +</stdio> +<help> + +.. class:: infomark + +**What it does** + +This program will run each of the Stacks sort_read_pairs.pl and exec_velvet.pl utilities to assemble pair-end reads from STACKS pipeline results + +-------- + +**Created by:** + +Stacks was developed by Julian Catchen with contributions from Angel Amores, Paul Hohenlohe, and Bill Cresko + +-------- + +**Example:** + +Input file: + +Output archives of STACKS : Reference map or STACKS : De novo map, in zip or tar.gz format + + +Output file: + +A collated.fa file containing assembled contigs for each locus + + +Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki <https://www.e-biogenouest.org/wiki/ManArchiveGalaxy>`_ . + +-------- + + +**Project links:** + +`STACKS website <http://creskolab.uoregon.edu/stacks/>`_ . + +`STACKS manual <http://creskolab.uoregon.edu/stacks/stacks_manual.pdf>`_ . + +`STACKS google group <https://groups.google.com/forum/#!forum/stacks-users>`_ . + +-------- + +**References:** + +-J. Catchen, P. Hohenlohe, S. Bassham, A. Amores, and W. Cresko. Stacks: an analysis tool set for population genomics. Molecular Ecology. 2013. + +-J. Catchen, S. Bassham, T. Wilson, M. Currey, C. O'Brien, Q. Yeates, and W. Cresko. The population structure and recent colonization history of Oregon threespine stickleback determined using restriction-site associated DNA-sequencing. Molecular Ecology. 2013. + +-J. Catchen, A. Amores, P. Hohenlohe, W. Cresko, and J. Postlethwait. Stacks: building and genotyping loci de novo from short-read sequences. G3: Genes, Genomes, Genetics, 1:171-182, 2011. + +-A. Amores, J. Catchen, A. Ferrara, Q. Fontenot and J. Postlethwait. Genome evolution and meiotic maps by massively parallel DNA sequencing: Spotted gar, an outgroup for the teleost genome duplication. Genetics, 188:799'808, 2011. + +-P. Hohenlohe, S. Amish, J. Catchen, F. Allendorf, G. Luikart. RAD sequencing identifies thousands of SNPs for assessing hybridization between rainbow trout and westslope cutthroat trout. Molecular Ecology Resources, 11(s1):117-122, 2011. + +-K. Emerson, C. Merz, J. Catchen, P. Hohenlohe, W. Cresko, W. Bradshaw, C. Holzapfel. Resolving postglacial phylogeography using high-throughput sequencing. Proceedings of the National Academy of Science, 107(37):16196-200, 2010. + +-------- + +**Integrated by:** + +Yvan Le Bras and Cyril Monjeaud + +GenOuest Bio-informatics Core Facility + +UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France) + +support@genouest.org + +If you use this tool in Galaxy, please cite : + +`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. <https://www.e-biogenouest.org/resources/128>`_ + + +</help> +<citations> + <citation type="doi">10.1111/mec.12354</citation> + <citation type="doi">10.1111/mec.12330</citation> + <citation type="doi">10.1534/g3.111.000240</citation> + <citation type="doi">10.1534/genetics.111.127324</citation> + <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation> + <citation type="doi">10.1073/pnas.1006538107</citation> + + <citation type="bibtex">@INPROCEEDINGS{JOBIM2013, + author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.}, + title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France}, + booktitle = {JOBIM 2013 Proceedings}, + year = {2013}, + url = {https://www.e-biogenouest.org/resources/128}, + pages = {97-106} + }</citation> +</citations> +</tool> |
b |
diff -r 000000000000 -r d6ba40f6c824 bwa_index.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bwa_index.loc.sample Mon Aug 24 09:29:12 2015 +0000 |
b |
@@ -0,0 +1,38 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of BWA indexed sequences data files. You will need +#to create these data files and then create a bwa_index.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The bwa_index.loc +#file has this format (longer white space characters are TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, for example, if you had phiX indexed stored in +#/depot/data2/galaxy/phiX/base/, +#then the bwa_index.loc entry would look like this: +# +#phiX174 phiX phiX Pretty /depot/data2/galaxy/phiX/base/phiX.fa +# +#and your /depot/data2/galaxy/phiX/base/ directory +#would contain phiX.fa.* files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 phiX.fa.amb +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 phiX.fa.ann +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 phiX.fa.bwt +#...etc... +# +#Your bwa_index.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +#phiX174 phiX phiX174 /depot/data2/galaxy/phiX/base/phiX.fa +#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/hg18/base/hg18canon.fa +#hg18full hg18 hg18 Full /depot/data2/galaxy/hg18/base/hg18full.fa +#/orig/path/hg19.fa hg19 hg19 /depot/data2/galaxy/hg19/base/hg19.fa +#...etc... +# +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. +# |
b |
diff -r 000000000000 -r d6ba40f6c824 bwa_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bwa_wrapper.py Mon Aug 24 09:29:12 2015 +0000 |
[ |
b'@@ -0,0 +1,358 @@\n+#!/usr/bin/env python\n+\n+"""\n+Runs BWA on single-end or paired-end data.\n+Produces a SAM file containing the mappings.\n+Works with BWA version 0.5.9.\n+\n+usage: bwa_wrapper.py [options]\n+\n+See below for options\n+"""\n+\n+import optparse, os, shutil, subprocess, sys, tempfile\n+import glob \n+import gzip, zipfile, tarfile\n+\n+def stop_err( msg ):\n+ sys.stderr.write( \'%s\\n\' % msg )\n+ sys.exit()\n+\n+def check_is_double_encoded( fastq ):\n+ # check that first read is bases, not one base followed by numbers\n+ bases = [ \'A\', \'C\', \'G\', \'T\', \'a\', \'c\', \'g\', \'t\', \'N\' ]\n+ nums = [ \'0\', \'1\', \'2\', \'3\' ]\n+ for line in file( fastq, \'rb\'):\n+ if not line.strip() or line.startswith( \'@\' ):\n+ continue\n+ if len( [ b for b in line.strip() if b in nums ] ) > 0:\n+ return False\n+ elif line.strip()[0] in bases and len( [ b for b in line.strip() if b in bases ] ) == len( line.strip() ):\n+ return True\n+ else:\n+ raise Exception, \'First line in first read does not appear to be a valid FASTQ read in either base-space or color-space\'\n+ raise Exception, \'There is no non-comment and non-blank line in your FASTQ file\'\n+\n+def __main__():\n+ #Parse Command Line\n+ parser = optparse.OptionParser()\n+ parser.add_option( \'-t\', \'--threads\', dest=\'threads\', help=\'The number of threads to use\' )\n+ parser.add_option( \'-c\', \'--color-space\', dest=\'color_space\', action=\'store_true\', help=\'If the input files are SOLiD format\' )\n+ parser.add_option( \'-r\', \'--ref\', dest=\'ref\', help=\'The reference genome to use or index\' )\n+ parser.add_option( \'-f\', \'--input1\', dest=\'fastq\', help=\'The (forward) fastq file to use for the mapping\' )\n+ parser.add_option( \'-u\', \'--output\', dest=\'output\', help=\'The file to save the output (SAM format)\' )\n+ parser.add_option( \'-p\', \'--params\', dest=\'params\', help=\'Parameter setting to use (pre_set or full)\' )\n+ parser.add_option( \'-s\', \'--fileSource\', dest=\'fileSource\', help=\'Whether to use a previously indexed reference sequence or one form history (indexed or history)\' )\n+ parser.add_option( \'-n\', \'--maxEditDist\', dest=\'maxEditDist\', help=\'Maximum edit distance if integer\' )\n+ parser.add_option( \'-m\', \'--fracMissingAligns\', dest=\'fracMissingAligns\', help=\'Fraction of missing alignments given 2% uniform base error rate if fraction\' )\n+ parser.add_option( \'-o\', \'--maxGapOpens\', dest=\'maxGapOpens\', help=\'Maximum number of gap opens\' )\n+ parser.add_option( \'-e\', \'--maxGapExtens\', dest=\'maxGapExtens\', help=\'Maximum number of gap extensions\' )\n+ parser.add_option( \'-d\', \'--disallowLongDel\', dest=\'disallowLongDel\', help=\'Disallow a long deletion within specified bps\' )\n+ parser.add_option( \'-i\', \'--disallowIndel\', dest=\'disallowIndel\', help=\'Disallow indel within specified bps\' )\n+ parser.add_option( \'-l\', \'--seed\', dest=\'seed\', help=\'Take the first specified subsequences\' )\n+ parser.add_option( \'-k\', \'--maxEditDistSeed\', dest=\'maxEditDistSeed\', help=\'Maximum edit distance to the seed\' )\n+ parser.add_option( \'-M\', \'--mismatchPenalty\', dest=\'mismatchPenalty\', help=\'Mismatch penalty\' )\n+ parser.add_option( \'-O\', \'--gapOpenPenalty\', dest=\'gapOpenPenalty\', help=\'Gap open penalty\' )\n+ parser.add_option( \'-E\', \'--gapExtensPenalty\', dest=\'gapExtensPenalty\', help=\'Gap extension penalty\' )\n+ parser.add_option( \'-R\', \'--suboptAlign\', dest=\'suboptAlign\', default=None, help=\'Proceed with suboptimal alignments even if the top hit is a repeat\' )\n+ parser.add_option( \'-N\', \'--noIterSearch\', dest=\'noIterSearch\', help=\'Disable iterative search\' )\n+ parser.add_option( \'-T\', \'--outputTopN\', dest=\'outputTopN\', help=\'Maximum number of alignments to output in the XA tag for reads paired properly\' )\n+ parser.add_option( \'\', \'--outputTopNDisc\', dest=\'outputTopNDisc\', help=\'Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons)\' )\n+ parser.add_option( \'-S'..b'wa samse %s %s %s %s >> %s\' % ( gen_alignment_cmds, ref_file_name, tmp_align_out_name, fastq, sam_output_file )\n+\t # perform alignments\n+\t buffsize = 1048576\n+\t try:\n+\t\t# need to nest try-except in try-finally to handle 2.4\n+\t\ttry:\n+\t\t # align\n+\t\t try:\n+\t\t\ttmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name\n+\t\t\ttmp_stderr = open( tmp, \'wb\' )\n+\t\t\tproc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )\n+\t\t\treturncode = proc.wait()\n+\t\t\ttmp_stderr.close()\n+\t\t\t# get stderr, allowing for case where it\'s very large\n+\t\t\ttmp_stderr = open( tmp, \'rb\' )\n+\t\t\tstderr = \'\'\n+\t\t\ttry:\n+\t\t\t while True:\n+\t\t\t\tstderr += tmp_stderr.read( buffsize )\n+\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\n+\t\t\t\t break\n+\t\t\texcept OverflowError:\n+\t\t\t pass\n+\t\t\ttmp_stderr.close()\n+\t\t\tif returncode != 0:\n+\t\t\t raise Exception, stderr\n+\t\t except Exception, e:\n+\t\t\traise Exception, \'Error aligning sequence. \' + str( e )\n+\t\t # and again if paired data\n+\t\t try:\n+\t\t\tif cmd2b:\n+\t\t\t tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name\n+\t\t\t tmp_stderr = open( tmp, \'wb\' )\n+\t\t\t proc = subprocess.Popen( args=cmd2b, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )\n+\t\t\t returncode = proc.wait()\n+\t\t\t tmp_stderr.close()\n+\t\t\t # get stderr, allowing for case where it\'s very large\n+\t\t\t tmp_stderr = open( tmp, \'rb\' )\n+\t\t\t stderr = \'\'\n+\t\t\t try:\n+\t\t\t\twhile True:\n+\t\t\t\t stderr += tmp_stderr.read( buffsize )\n+\t\t\t\t if not stderr or len( stderr ) % buffsize != 0:\n+\t\t\t\t\tbreak\n+\t\t\t except OverflowError:\n+\t\t\t\tpass\n+\t\t\t tmp_stderr.close()\n+\t\t\t if returncode != 0:\n+\t\t\t\traise Exception, stderr\n+\t\t except Exception, e:\n+\t\t\traise Exception, \'Error aligning second sequence. \' + str( e )\n+\t\t # generate align\n+\t\t try:\n+\t\t\ttmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name\n+\t\t\ttmp_stderr = open( tmp, \'wb\' )\n+\t\t\tproc = subprocess.Popen( args=cmd3, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )\n+\t\t\treturncode = proc.wait()\n+\t\t\ttmp_stderr.close()\n+\t\t\t# get stderr, allowing for case where it\'s very large\n+\t\t\ttmp_stderr = open( tmp, \'rb\' )\n+\t\t\tstderr = \'\'\n+\t\t\ttry:\n+\t\t\t while True:\n+\t\t\t\tstderr += tmp_stderr.read( buffsize )\n+\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\n+\t\t\t\t break\n+\t\t\texcept OverflowError:\n+\t\t\t pass\n+\t\t\ttmp_stderr.close()\n+\t\t\tif returncode != 0:\n+\t\t\t raise Exception, stderr\n+\t\t except Exception, e:\n+\t\t\traise Exception, \'Error generating alignments. \' + str( e ) \n+\t\t # remove header if necessary\n+\t\t if options.suppressHeader == \'true\':\n+\t\t\ttmp_out = tempfile.NamedTemporaryFile( dir=tmp_dir)\n+\t\t\ttmp_out_name = tmp_out.name\n+\t\t\ttmp_out.close()\n+\t\t\ttry:\n+\t\t\t shutil.move( sam_output_file, tmp_out_name )\n+\t\t\texcept Exception, e:\n+\t\t\t raise Exception, \'Error moving output file before removing headers. \' + str( e )\n+\t\t\tfout = file( sam_output_file, \'w\' )\n+\t\t\tfor line in file( tmp_out.name, \'r\' ):\n+\t\t\t if not ( line.startswith( \'@HD\' ) or line.startswith( \'@SQ\' ) or line.startswith( \'@RG\' ) or line.startswith( \'@PG\' ) or line.startswith( \'@CO\' ) ):\n+\t\t\t\tfout.write( line )\n+\t\t\tfout.close()\n+\t\t # check that there are results in the output file\n+\t\t if os.path.getsize( sam_output_file ) > 0:\n+\t\t\tsys.stdout.write( \'BWA run on single-end data\')\n+\t\t else:\n+\t\t\traise Exception, \'The output file is empty. You may simply have no matches, or there may be an error with your input file or settings.\'\n+\t\texcept Exception, e:\n+\t\t stop_err( \'The alignment failed.\\n\' + str( e ) )\n+\t finally:\n+\t\t# clean up temp dir\n+\t\tif os.path.exists( tmp_index_dir ):\n+\t\t shutil.rmtree( tmp_index_dir )\n+\t\tif os.path.exists( tmp_dir ):\n+\t\t shutil.rmtree( tmp_dir )\t\n+\n+ # put all in an archive\n+ mytotalzipfile=zipfile.ZipFile(options.output, \'w\', allowZip64=True)\n+ os.chdir(tmp_output_dir)\n+ for samfile in glob.glob(tmp_output_dir+\'/*\'):\n+\tmytotalzipfile.write(os.path.basename(samfile))\n+\n+\n+if __name__=="__main__": __main__()\n' |
b |
diff -r 000000000000 -r d6ba40f6c824 bwa_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bwa_wrapper.xml Mon Aug 24 09:29:12 2015 +0000 |
[ |
b'@@ -0,0 +1,339 @@\n+<tool id="bwa_wrapper_stacks" name="Map with BWA for STACKS" version="1.2.3">\n+ <description>from zip file with fastqsanger files</description>\n+ <requirements>\n+ <requirement type="package" version="0.6.2">bwa</requirement>\n+ </requirements>\n+ <description></description>\n+ <parallelism method="basic"></parallelism>\n+ <command interpreter="python">\n+ bwa_wrapper.py \n+ --threads="4"\n+\n+ #if $input1.ext == "fastqillumina":\n+ --illumina1.3\n+ #end if\n+\n+ ## reference source\n+ --fileSource="${genomeSource.refGenomeSource}"\n+ #if $genomeSource.refGenomeSource == "history":\n+ ##build index on the fly\n+ --ref="${genomeSource.ownFile}"\n+ --dbkey="${dbkey}"\n+ #else:\n+ ##use precomputed indexes\n+ --ref="${genomeSource.indices.fields.path}"\n+ --do_not_build_index\n+ #end if\n+\n+ ## input file(s)\n+ --input1="${paired.input1}"\n+ \n+ ## output file\n+ --output="${output}"\n+\n+ ## run parameters\n+ --params="${params.source_select}"\n+ #if $params.source_select != "pre_set":\n+ --maxEditDist="${params.maxEditDist}"\n+ --fracMissingAligns="${params.fracMissingAligns}"\n+ --maxGapOpens="${params.maxGapOpens}"\n+ --maxGapExtens="${params.maxGapExtens}"\n+ --disallowLongDel="${params.disallowLongDel}"\n+ --disallowIndel="${params.disallowIndel}"\n+ --seed="${params.seed}"\n+ --maxEditDistSeed="${params.maxEditDistSeed}"\n+ --mismatchPenalty="${params.mismatchPenalty}"\n+ --gapOpenPenalty="${params.gapOpenPenalty}"\n+ --gapExtensPenalty="${params.gapExtensPenalty}"\n+ --suboptAlign="${params.suboptAlign}"\n+ --noIterSearch="${params.noIterSearch}"\n+ --outputTopN="${params.outputTopN}"\n+ --outputTopNDisc="${params.outputTopNDisc}"\n+ --maxInsertSize="${params.maxInsertSize}"\n+ --maxOccurPairing="${params.maxOccurPairing}"\n+ #if $params.readGroup.specReadGroup == "yes"\n+ --rgid="${params.readGroup.rgid}"\n+ --rgcn="${params.readGroup.rgcn}"\n+ --rgds="${params.readGroup.rgds}"\n+ --rgdt="${params.readGroup.rgdt}"\n+ --rgfo="${params.readGroup.rgfo}"\n+ --rgks="${params.readGroup.rgks}"\n+ --rglb="${params.readGroup.rglb}"\n+ --rgpg="${params.readGroup.rgpg}"\n+ --rgpi="${params.readGroup.rgpi}"\n+ --rgpl="${params.readGroup.rgpl}"\n+ --rgpu="${params.readGroup.rgpu}"\n+ --rgsm="${params.readGroup.rgsm}"\n+ #end if\n+ #end if\n+\n+ ## suppress output SAM header\n+ --suppressHeader="${suppressHeader}"\n+ </command>\n+ <inputs>\n+ <conditional name="genomeSource">\n+ <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">\n+ <option value="indexed">Use a built-in index</option>\n+ <option value="history">Use one from the history</option>\n+ </param>\n+ <when value="indexed">\n+ <param name="indices" type="select" label="Select a reference genome">\n+ <options from_data_table="bwa_indexes">\n+ <filter type="sort_by" column="2" />\n+ <validator type="no_options" message="No indexes are available" />\n+ </options>\n+ </param>\n+ </when>\n+ <when value="history">\n+ <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />\n+ </when>\n+ </conditional>\n+ <conditional name="paired">\n+ <param name="sPaired" type="select" label="Is this library mate-paired?">\n+ <option value="single">Single-end</option>\n+ </param>\n+ <when value="single">\n+ <param name="input1" type="data" format="zip" label="Zip file" help="Zip file with several FASTQ with either Sanger-scaled quality values (fastqsanger) or Illumina-scaled quality values (fastqillum'..b'hit is\n+ unique. Using this option has no effect on accuracy for single-end\n+ reads. It is mainly designed for improving the alignment accuracy of\n+ paired-end reads. However, the pairing procedure will be slowed down,\n+ especially for very short reads (~32bp).\n+ -N Disable iterative search. All hits with no more than maxDiff\n+ differences will be found. This mode is much slower than the default.\n+\n+For **samse**::\n+\n+ -n INT Maximum number of alignments to output in the XA tag for reads paired\n+ properly. If a read has more than INT hits, the XA tag will not be\n+ written. [3]\n+ -r STR Specify the read group in a format like \'@RG\\tID:foo\\tSM:bar\' [null]\n+\n+For **sampe**::\n+\n+ -a INT Maximum insert size for a read pair to be considered as being mapped\n+ properly. Since version 0.4.5, this option is only used when there\n+ are not enough good alignment to infer the distribution of insert\n+ sizes. [500]\n+ -n INT Maximum number of alignments to output in the XA tag for reads paired\n+ properly. If a read has more than INT hits, the XA tag will not be\n+ written. [3]\n+ -N INT Maximum number of alignments to output in the XA tag for disconcordant\n+ read pairs (excluding singletons). If a read has more than INT hits,\n+ the XA tag will not be written. [10]\n+ -o INT Maximum occurrences of a read for pairing. A read with more\n+ occurrences will be treated as a single-end read. Reducing this\n+ parameter helps faster pairing. [100000]\n+ -r STR Specify the read group in a format like \'@RG\\tID:foo\\tSM:bar\' [null]\n+\n+For specifying the read group in **samse** or **sampe**, use the following::\n+\n+ @RG Read group. Unordered multiple @RG lines are allowed. \n+ ID Read group identi\xef\xac\x81er. Each @RG line must have a unique ID. The value of\n+ ID is used in the RG tags of alignment records. Must be unique among all\n+ read groups in header section. Read group IDs may be modi\xef\xac\x81ed when\n+ merging SAM \xef\xac\x81les in order to handle collisions. \n+ CN Name of sequencing center producing the read. \n+ DS Description. \n+ DT Date the run was produced (ISO8601 date or date/time). \n+ FO Flow order. The array of nucleotide bases that correspond to the\n+ nucleotides used for each \xef\xac\x82ow of each read. Multi-base \xef\xac\x82ows are encoded\n+ in IUPAC format, and non-nucleotide \xef\xac\x82ows by various other characters.\n+ Format : /\\*|[ACMGRSVTWYHKDBN]+/ \n+ KS The array of nucleotide bases that correspond to the key sequence of each read. \n+ LB Library. \n+ PG Programs used for processing the read group. \n+ PI Predicted median insert size. \n+ PL Platform/technology used to produce the reads. Valid values : CAPILLARY,\n+ LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT and PACBIO. \n+ PU Platform unit (e.g. \xef\xac\x82owcell-barcode.lane for Illumina or slide for\n+ SOLiD). Unique identi\xef\xac\x81er. \n+ SM Sample. Use pool name where a pool is being sequenced. \n+\n+ </help>\n+ <citations>\n+ <citation type="doi">10.1111/mec.12354</citation>\n+ <citation type="doi">10.1111/mec.12330</citation>\n+ <citation type="doi">10.1534/g3.111.000240</citation>\n+ <citation type="doi">10.1534/genetics.111.127324</citation>\n+ <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation>\n+ <citation type="doi">10.1073/pnas.1006538107</citation>\n+\n+ <citation type="bibtex">@INPROCEEDINGS{JOBIM2013,\n+ author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},\n+ title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},\n+ booktitle = {JOBIM 2013 Proceedings},\n+ year = {2013},\n+ url = {https://www.e-biogenouest.org/resources/128},\n+ pages = {97-106}\n+ }</citation>\n+</citations>\n+</tool>\n+\n+\n' |
b |
diff -r 000000000000 -r d6ba40f6c824 stacks.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/stacks.py Mon Aug 24 09:29:12 2015 +0000 |
[ |
b'@@ -0,0 +1,367 @@\n+"""\n+\n+STACKS METHODS FOR GALAXY\n+\n+Created by Cyril Monjeaud & Yvan Le Bras\n+Cyril.Monjeaud@irisa.fr\n+yvan.le_bras@irisa.fr\n+\n+Last modifications : 01/22/2014\n+\n+\n+"""\n+\n+import os, sys, re, shutil\n+import glob \n+import collections\n+import gzip, zipfile, tarfile\n+import subprocess\n+from galaxy.datatypes.checkers import *\n+\n+\n+"""\n+\n+STACKS COMMON METHODS\n+\n+galaxy_config_to_tabfiles(input_config)\n+galaxy_config_to_tabfiles_for_STACKS(input_config)\n+extract_compress_files_from_tabfiles(tab_files, tmp_input_dir)\n+create_symlinks_from_tabfiles(tab_files, tmp_input_dir)\n+\n+"""\n+def galaxy_config_to_tabfiles(input_config):\n+\n+\ttab_files=collections.OrderedDict()\n+\tfor line in open(input_config, "r").readlines():\n+\t\tif line.strip() != \'\':\n+\t\t\textract=line.strip().split("::")\n+\t\t\ttab_files[extract[0].replace(" (", ".").replace(" ", ".").replace(")", "").replace(":", ".").replace("/", ".")]=extract[1]\n+\n+\t# tabfiles[name]-> path\n+\treturn tab_files\n+\n+\n+def galaxy_config_to_tabfiles_for_STACKS(input_config):\n+\n+\ttab_files=collections.OrderedDict()\n+\tfor line in open(input_config, "r").readlines():\n+\t\tif line.strip() != \'\':\n+\t\t\textract=line.strip().split("::")\n+\t\t\tparse_name=re.search("^STACKS.*\\((.*\\.[ATCG]*\\.fq)\\)$", extract[0])\n+\t\t\t# rename galaxy name in a short name\n+\t\t\tif parse_name:\t\t\t\t\n+\t\t\t\textract[0]=parse_name.groups(1)[0]\n+\n+\t\t\ttab_files[extract[0].replace(" (", ".").replace(" ", ".").replace(")", "").replace(":", ".").replace("/", ".")]=extract[1]\n+\t\t\t\n+\t# tabfiles[name]-> path\n+\treturn tab_files\n+\n+\n+def extract_compress_files_from_tabfiles(tab_files, tmp_input_dir):\n+\t\n+\t# for each file\n+\tfor key in tab_files.keys():\n+\t\t#test if is zip file\n+\t\tif (check_zip( tab_files[key] )):\n+\n+\t\t\t# extract all files names and added it in the tab\n+\t\t\tmyarchive = zipfile.ZipFile(tab_files[key], \'r\')\n+\t\t\tfor i in myarchive.namelist():\n+\t\t\t\ttab_files[i]=tmp_input_dir+"/"+i\n+\n+\t\t\t# extract all files\n+\t\t\tmyarchive.extractall(tmp_input_dir)\n+\n+\t\t\t#remove compress file from the tab\n+\t\t\tdel tab_files[key]\n+\n+\t\t#test if is tar.gz file\n+\t\telse:\n+\t\t\tif tarfile.is_tarfile( tab_files[key] ) and check_gzip( tab_files[key] ):\n+\t\t\t\t# extract all files names and added it in the tab\n+\t\t\t\tmygzfile = tarfile.open(tab_files[key], \'r\')\n+\t\t\t\n+\t\t\t\tfor i in mygzfile.getnames():\n+\t\t\t\t\ttab_files[i]=tmp_input_dir+"/"+i\n+\t\t\n+\t\t\t\t# extract all files\n+\t\t\t\tmygzfile.extractall(tmp_input_dir)\n+\n+\t\t\t\t#remove compress file from the tab\n+\t\t\t\tdel tab_files[key]\n+\n+\n+\n+def create_symlinks_from_tabfiles(tab_files, tmp_input_dir):\n+\n+\tfor key in tab_files.keys():\n+\t\t#print "file single: "+key+" -> "+tab_files[key]\n+\t\t#create a sym_link in our temp dir\n+\t\tif not os.path.exists(tmp_input_dir+\'/\'+key):\n+\t\t\tos.symlink(tab_files[key], tmp_input_dir+\'/\'+key)\n+\n+\n+"""\n+\n+PROCESS RADTAGS METHODS\n+\n+generate_additional_file(tmp_output_dir, output_archive)\n+\n+"""\n+\n+def change_outputs_procrad_name(tmp_output_dir, sample_name):\n+\n+\tlist_files = glob.glob(tmp_output_dir+\'/*\')\n+\n+\tfor file in list_files:\n+\t\t# change sample name\n+\t\tnew_file_name=os.path.basename(file.replace("_",".").replace("sample", sample_name))\n+\n+\t\t# transform .fa -> .fasta or .fq->.fastq\n+\t\tif os.path.splitext(new_file_name)[1] == ".fa":\n+\t\t\tnew_file_name = os.path.splitext(new_file_name)[0]+\'.fasta\'\n+\t\telse:\n+\t\t\tnew_file_name = os.path.splitext(new_file_name)[0]+\'.fastq\'\n+\n+\t\tshutil.move(tmp_output_dir+\'/\'+os.path.basename(file), tmp_output_dir+\'/\'+new_file_name)\n+\n+\n+def generate_additional_archive_file(tmp_output_dir, output_archive):\n+\n+\tlist_files = glob.glob(tmp_output_dir+\'/*\')\n+\n+ myzip=zipfile.ZipFile("archive.zip.temp", \'w\', allowZip64=True)\n+\n+\t# for each fastq file\n+\tfor fastq_file in list_files:\n+\t\t# add file to the archive output\n+\t\tmyzip.write(fastq_file, os.path.basename(fastq_file))\n+\n+\tshutil.move("archive.zip.temp", output_archive)\n+\n+\n+"""\n+\n+DENOVOMAP METHODS\n+\n+check_fastq_extension_and_add(tab_files, tmp_input_dir)\n+\n+"""\n+\n+def check_fastq_extension_and_add(tab_files,'..b'trip())\n+\n+\t\tif not parse_line:\n+\t\t\tprint "[WARNING] Wrong input infos file structure : "+line\n+\t\telse:\n+\t\t\tbarcode=parse_line.groups(1)[0]\n+\t\t\tpopulation_name=parse_line.groups(1)[1]\n+\n+\t\t\t# if its the first meet with the population\n+\t\t\tif population_name not in pop_to_int:\n+\t\t\t\tpop_to_int.append(population_name)\t\t\n+\n+\t\t\t# manage ext if present, because the population map file should not have the ext\n+\t\t\tif re.search("(\\.fq$|\\.fastq$)", fq_name_for_barcode[barcode]):\n+\t\t\t\tfqfile=os.path.splitext(fq_name_for_barcode[barcode])[0]\n+\t\t\telse:\n+\t\t\t\tfqfile=fq_name_for_barcode[barcode]\n+\n+\n+\t\t\t# write in the file\n+\t\t\tmy_output_file.write(fqfile+"\\t"+str(pop_to_int.index(population_name))+"\\n")\n+\n+\t# close files\n+\tmy_output_file.close()\n+\tmy_open_info_file.close()\n+\n+\n+\n+\n+def generate_popmap_for_refmap(tab_fq_files, tab_sam_files, infos_file, pop_map):\n+\n+\t# initiate the dict : barcode -> tab[seq]\n+\tseq_id_for_barcode={}\n+\n+\t# initiate the dict : barcode -> sam_name\n+\tsam_name_for_barcode={}\n+\n+\t### Parse fastqfiles ###\n+\t# insert my barcode into a tab with sequences ID associated\n+\tfor fastq_file in tab_fq_files.keys():\n+\t\tsingle_barcode=re.search("([ATCG]*)(\\.fq|\\.fastq)", fastq_file).groups(0)[0]\n+ \n+ \t# open the fasq file\n+ \topen_fastqfile=open(tab_fq_files[fastq_file], \'r\')\n+\t\t\n+\t\t# for each line, get the seq ID\n+\t\ttab_seq_id=[]\n+\t\tfor line in open_fastqfile:\n+\t\t my_match_seqID=re.search("^@([A-Z0-9]+\\.[0-9]+)\\s.*", line)\n+\t\t if my_match_seqID: \n+\t\t tab_seq_id.append(my_match_seqID.groups(0)[0])\n+\n+\t\t# push in a dict the tab of seqID for the current barcode\n+\t\tseq_id_for_barcode[single_barcode]=tab_seq_id\n+\n+\n+\t### Parse samfiles and get the first seq id ###\n+\tfor sam_file in tab_sam_files.keys():\n+\n+\t\t# open the sam file\n+ \topen_samfile=open(tab_sam_files[sam_file], \'r\')\n+\t\t\n+\t\t# get the first seq id\n+\t\tfirst_seq_id=\'\'\n+\t\tfor line in open_samfile:\n+\t\t\tif not re.search("^@", line):\n+\t\t\t\tfirst_seq_id=line.split("\\t")[0]\n+\t\t\t\tbreak\n+\n+\n+\t\t# map with seq_id_for_barcode structure\n+\t\tfor barcode in seq_id_for_barcode:\n+\t\t\tfor seq in seq_id_for_barcode[barcode]:\n+\t\t\t\tif seq == first_seq_id:\n+\t\t\t\t\t#print "sam -> "+sam_file+" seq -> "+first_seq_id+" barcode -> "+barcode\n+\t\t\t\t\tsam_name_for_barcode[barcode]=sam_file\n+\t\t\t\t\tbreak\n+\n+\t# open the infos file and output file\n+\tmy_open_info_file=open(infos_file, \'r\')\n+\tmy_output_file=open(pop_map, \'w\')\n+\n+\t# conversion tab for population to integer\n+\tpop_to_int=[]\n+\n+\t# write infos into the final output\n+\tfor line in my_open_info_file:\n+\t\tparse_line=re.search("(^[ATCG]+)\\t(.*)", line)\n+\n+\t\tif not parse_line:\n+\t\t\tprint "[WARNING] Wrong input infos file structure : "+line\n+\t\telse:\n+\n+\t\t\t# if its the first meet with the population\n+\t\t\tif parse_line.groups(1)[1] not in pop_to_int:\n+\t\t\t\tpop_to_int.append(parse_line.groups(1)[1])\t\t\n+\n+\t\t\t# manage ext if present, because the population map file should not have the ext\n+\t\t\tif re.search("\\.sam", sam_name_for_barcode[parse_line.groups(1)[0]]):\n+\t\t\t\tsamfile=os.path.splitext(sam_name_for_barcode[parse_line.groups(1)[0]])[0]\n+\t\t\telse:\n+\t\t\t\tsamfile=sam_name_for_barcode[parse_line.groups(1)[0]]\n+\n+\t\t\t# write in the file\n+\t\t\tmy_output_file.write(samfile+"\\t"+str(pop_to_int.index(parse_line.groups(1)[1]))+"\\n")\n+\n+\t# close files\n+\tmy_output_file.close()\n+\tmy_open_info_file.close()\n+\n+\n+"""\n+\n+STACKS POPULATION\n+\n+\n+"""\n+\n+\n+def extract_compress_files(myfile, tmp_input_dir):\n+\n+ #test if is zip file\n+ if (check_zip( myfile )):\n+\n+ # extract all files names and added it in the tab\n+ myarchive = zipfile.ZipFile(myfile, \'r\')\n+\n+ # extract all files\n+ myarchive.extractall(tmp_input_dir)\n+\n+\n+ #test if is tar.gz file\n+ else:\n+ # extract all files names and added it in the tab\n+ mygzfile = tarfile.open(myfile, \'r\')\n+\n+ # extract all files\n+ mygzfile.extractall(tmp_input_dir)\n+\n+\n' |
b |
diff -r 000000000000 -r d6ba40f6c824 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Mon Aug 24 09:29:12 2015 +0000 |
b |
@@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of indexes in the BWA mapper format --> + <table name="bwa_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/bwa_index.loc" /> + </table> +</tables> |
b |
diff -r 000000000000 -r d6ba40f6c824 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Aug 24 09:29:12 2015 +0000 |
b |
@@ -0,0 +1,13 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="stacks" version="1.18"> + <repository name="package_stacks_1_18" changeset_revision="6bb02bb29819" owner="cmonjeau" toolshed="https://toolshed.g2.bx.psu.edu/" /> + </package> + <package name="bwa" version="0.6.2"> + <repository name="package_bwa_0_6_2" changeset_revision="0778635a84ba" owner="devteam" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu/" /> + </package> + <package name="velvet" version="1.2.10"> + <repository name="package_velvet_1_2_10" changeset_revision="1c500c3e7fdf" owner="rjullien" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu/" /> + </package> +</tool_dependency> + |