Mercurial > repos > scisjnu123 > ngsap_vc
changeset 2:2c7824a8d764 draft
Uploaded
author | scisjnu123 |
---|---|
date | Thu, 12 Sep 2019 06:50:21 -0400 |
parents | 0052747b878f |
children | 0d10255b5434 |
files | GATK/gatk/analyze_covariates.xml GATK/gatk/base_recalibrator.xml GATK/gatk/combine_gvcfs.xml GATK/gatk/combine_variants.xml GATK/gatk/gatk.xml GATK/gatk/gatk_macros.xml GATK/gatk/generation/gatk.xsl GATK/gatk/generation/gatk.xsldb.xml GATK/gatk/genotype_gvcfs.xml GATK/gatk/haplotype_caller.xml GATK/gatk/indel_realigner.xml GATK/gatk/print_reads.xml GATK/gatk/realigner_target_creator.xml GATK/gatk/tool-data/destinations.py GATK/gatk/tool-data/picard_index.loc.sample GATK/gatk/tool_data_table_conf.xml.sample GATK/gatk/tool_dependencies.xml GATK/package_picard_1_135/tool_dependencies.xml GATK/package_r_for_gatk_3_4_0/tool_dependencies.xml ebola.len ebolamutant.fasta |
diffstat | 21 files changed, 1289 insertions(+), 276 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/analyze_covariates.xml Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,37 @@ +<macros> + <xml name="AnalyzeCovariatesParameters" tokens="tag"> + + <!-- BQSR in main config --> + + <param name="afterReportFile" type="data" format="tabular" optional="true" label="file containing the BQSR second-pass report file" help="-after,‑‑afterReportFile &lt;afterReportFile&gt;" /> + + <param name="beforeReportFile" type="data" format="tabular" optional="true" label="file containing the BQSR first-pass report file" help="-before,‑‑beforeReportFile &lt;beforeReportFile&gt;" /> + + </xml> + + <xml name="AnalyzeCovariatesOutput"> + <data format="pdf" name="ac_plotsReportFile" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (PDF Recalibration Report)"> + <yield /> + </data> + </xml> + + <template name="AnalyzeCovariatesPreprocessing"> +<![CDATA[ +]]> + </template> + + <template name="AnalyzeCovariatesOptions"> +<![CDATA[ + --plotsReportFile ${ac_plotsReportFile} + + #if str($analysis_type.afterReportFile) + --afterReportFile $analysis_type.afterReportFile + #end if + #if str($analysis_type.beforeReportFile) + --beforeReportFile $analysis_type.beforeReportFile + #end if +]]> + </template> +</macros> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/base_recalibrator.xml Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,38 @@ +<macros> + <xml name="BaseRecalibratorParameters" tokens="tag"> + + <expand macro="macro_bam_input" tag="@TAG@" /> + + <param name="knownSites" type="data" format="vcf,bcf,bed,pileup,tabular,table" label="Database of known Sites (ROD files; e.g. VCF format)" multiple="true" title="A database of known polymorphic sites to skip over in the recalibration algorithm" help="-knownSites,‑‑knownSites &lt;knownSites&gt;" /> + + </xml> + + <xml name="BaseRecalibratorOutput"> + <data format="tabular" name="br_table" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (Table)"> + <yield /> + </data> + </xml> + + <template name="BaseRecalibratorPreprocessing"> +<![CDATA[ + @token_bam_input_pre@ + #for $i, $variant in enumerate($analysis_type.knownSites): + ln -s -f ${variant} variant_${i}.vcf && + #end for +]]> + </template> + + <template name="BaseRecalibratorOptions"> +<![CDATA[ + --out ${br_table} + + @token_bam_input@ + + #for $i, $variant in enumerate($analysis_type.knownSites): + --knownSites variant_${i}.vcf + #end for +]]> + </template> +</macros> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/combine_gvcfs.xml Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,44 @@ +<macros> + <xml name="CombineGVCFsParameters" tokens="tag"> + + <expand macro="macro_gvcf_input" tag="@TAG@" /> + + <expand macro="macro_optional_parameters"> + + <param name="breakBandsAtMultiplesOf" type="integer" value="0" label="If > 0, reference bands will be broken up at genomic positions that are multiples of this number" help="-breakBandsAtMultiplesOf,‑‑breakBandsAtMultiplesOf &lt;breakBandsAtMultiplesOf&gt;" /> + + </expand> + + </xml> + + <xml name="CombineGVCFsOutput"> + <data format="vcf" name="cg_output_vcf" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (VCF)"> + <yield /> + </data> + </xml> + + <template name="CombineGVCFsPreprocessing"> +<![CDATA[ + @token_gvcf_input_pre@ +]]> + </template> + + <template name="CombineGVCFsOptions"> +<![CDATA[ + --out ${cg_output_vcf} + + @token_gvcf_input@ + + #set $optionals = $analysis_type.optional_parameters + #if $optionals.optional_parameters_enabled + #if $optionals.breakBandsAtMultiplesOf > 0 + --breakBandsAtMultiplesOf $optionals.breakBandsAtMultiplesOf + #end if + #end if +]]> + </template> + + +</macros> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/combine_variants.xml Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,96 @@ +<macros> + <xml name="CombineVariantsParameters" tokens="tag"> + + <expand macro="macro_vcf_input" tag="@TAG@" /> + + <expand macro="macro_optional_parameters"> + + <param name="filteredRecordsMergeType" type="select" label="Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields" help="-filteredRecordsMergeType,‑‑filteredrecordsmergetype &lt;filteredrecordsmergetype&gt;"> + <option value="">No Selection</option> + <option value="KEEP_IF_ANY_UNFILTERED">KEEP_IF_ANY_UNFILTERED</option> + <option value="KEEP_IF_ALL_UNFILTERED">KEEP_IF_ALL_UNFILTERED</option> + <option value="KEEP_UNCONDITIONAL">KEEP_UNCONDITIONAL</option> + </param> + + <param name="genotypeMergeOptions" type="select" label="Determines how we should merge genotype records for samples shared across the ROD files" help="-genotypeMergeOptions,‑‑genotypemergeoption &lt;genotypemergeoption&gt;"> + <option value="">No Selection</option> + <option value="UNIQUIFY">UNIQUIFY</option> + <option value="PRIORITIZE">PRIORITIZE</option> + <option value="UNSORTED">UNSORTED</option> + <option value="REQUIRE_UNIQUE">REQUIRE_UNIQUE</option> + </param> + + <param name="minimumN" type="integer" value="1" optional="true" label="Combine variants and output site only if the variant is present in at least N input files" help="-minN,‑‑minimumN &lt;minimumN&gt;" /> + + <param name="rod_priority_list" type="text" value="" optional="true" label="A comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted" help="-priority,‑‑rod_priority_list &lt;rod_priority_list&gt;" /> + + <param name="setKey" type="text" value="" optional="true" label="Key used in the INFO key=value tag emitted describing which set the combined VCF record came from" help="-setKey,‑‑setKey &lt;setKey&gt;" /> + + <param name="assumeIdenticalSamples" type="boolean" truevalue="--assumeIdenticalSamples" falsevalue="" label="If true, assume input VCFs have identical sample sets and disjoint calls" help="-assumeIdenticalSamples,‑‑assumeIdenticalSamples" /> + + <param name="excludeNonVariants" type="boolean" truevalue="--excludeNonVariants" falsevalue="" label="Don't include loci found to be non-variant after the combining procedure" help="-env,‑‑excludeNonVariants" /> + + <param name="filteredAreUncalled" type="boolean" truevalue="--filteredAreUncalled" falsevalue="" label="If true, then filtered VCFs are treated as uncalled, so that filtered set annotations don't appear in the combined VCF" help="-filteredAreUncalled,‑‑filteredAreUncalled" /> + + <param name="mergeInfoWithMaxAC" type="boolean" truevalue="--mergeInfoWithMaxAC" falsevalue="" label="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records." help="-mergeInfoWithMaxAC,‑‑mergeInfoWithMaxAC" /> + + <param name="minimalVCF" type="boolean" truevalue="--minimalVCF" falsevalue="" label="If true, then the output VCF will contain no INFO or genotype FORMAT fields" help="-minimalVCF,‑‑minimalVCF" /> + + <param name="printComplexMerges" type="boolean" truevalue="--printComplexMerges" falsevalue="" label="Print out interesting sites requiring complex compatibility merging" help="-printComplexMerges,‑‑printComplexMerges" /> + + <param name="suppressCommandLineHeader" type="boolean" truevalue="--suppressCommandLineHeader" falsevalue="" label="If true, do not output the header containing the command line used" help="-suppressCommandLineHeader,‑‑suppressCommandLineHeader" /> + + </expand> + + </xml> + + <xml name="CombineVariantsOutput"> + <data format="vcf" name="cv_output_vcf" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (VCF)"> + <yield /> + </data> + </xml> + + <template name="CombineVariantsPreprocessing"> +<![CDATA[ + @token_vcf_input_pre@ +]]> + </template> + + <template name="CombineVariantsOptions"> +<![CDATA[ + --out ${cv_output_vcf} + + @token_vcf_input@ + + #set $optionals = $analysis_type.optional_parameters + #if $optionals.optional_parameters_enabled + + #if $optionals.filteredRecordsMergeType + --filteredRecordsMergeType $optionals.filteredRecordsMergeType + #end if + #if $optionals.genotypeMergeOptions + --genotypeMergeOptions $optionals.genotypeMergeOptions + #end if + #if $optionals.minimumN != 1 + --minimumN $optionals.minimumN + #end if + #if $optionals.rod_priority_list + --rod_priority_list $optionals.rod_priority_list + #end if + + $optionals.assumeIdenticalSamples + $optionals.excludeNonVariants + $optionals.filteredAreUncalled + $optionals.mergeInfoWithMaxAC + $optionals.minimalVCF + $optionals.printComplexMerges + $optionals.suppressCommandLineHeader + + #end if +]]> + </template> + + +</macros> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/gatk.xml Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,179 @@ +<?xml version="1.0" encoding="utf-8"?> +<tool id="gatk" name="GATK" version="@VERSION@.d9"> + <description>tool collection Version @VERSION@</description> + <macros> + <import>gatk_macros.xml</import> + <import>realigner_target_creator.xml</import> + <import>indel_realigner.xml</import> + <import>base_recalibrator.xml</import> + <import>analyze_covariates.xml</import> + <import>print_reads.xml</import> + <import>haplotype_caller.xml</import> + <import>genotype_gvcfs.xml</import> + <import>combine_gvcfs.xml</import> + <import>combine_variants.xml</import> + </macros> + <expand macro="requirements"/> + <stdio> + <regex match="^INFO" level="log"/> + <regex match="^WARN" level="warning"/> + <regex match="Using .* implementation of PairHMM" level="warning"/> + <regex match="There is insufficient memory for the Java Runtime Environment to continue" level="fatal"/> + <regex match="^##### ERROR" level="fatal"/> + <exit_code range="1:" level="fatal"/> + </stdio> + <command><![CDATA[ + ############################ + ## import analysis specific preprocessings by using cheetahs internal searchList + ## if not defined, ignore + ############################ + #if $analysis_type.analysis_type_selector + "Preprocessing" in vars()['SL'][2] + #set $analysisPreprocessing = vars()['SL'][2][$analysis_type.analysis_type_selector + "Preprocessing"] + #include source=$analysisPreprocessing + #end if + + ############################ + ## GATK tool unspecific options + ############################ + @GATK_EXEC@ + + --analysis_type ${analysis_type.analysis_type_selector} + --reference_sequence ${ref_file.fields.path} + + --log_to_file ${output_log} + + #if $cond_intervals.cond_intervals_enabled + #for $interval in $cond_intervals.intervals: + --intervals ${interval.L} + #end for + #end if + + #if $cond_BQSR.cond_BQSR_enabled + --BQSR $cond_BQSR.BQSR + #end if + + ############################ + ## import analysis specific options by using cheetahs internal searchList + ## if not defined throw raw python error until better idea + ############################ + #if $analysis_type.analysis_type_selector + "Options" in vars()['SL'][2] + #set $analysisOptions = vars()['SL'][2][$analysis_type.analysis_type_selector + "Options"] + #include source=$analysisOptions + #else + #set $analysisOptions = vars()['SL'][2][$analysis_type.analysis_type_selector + "Options"] + #end if + + ############################ + ## only put ERROR or FATAL log messages into stderr + ## but keep full log for printing into log file + ############################ + 2>&1 | awk '\$1 != "INFO" && \$1 != "WARN"' >&2 +]]></command> + <inputs> + <param name="ref_file" type="select" label="Using reference genome" help="-R,‑‑reference_sequence &lt;reference_sequence&gt;"> + <options from_data_table="picard_indexes"/> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + <conditional name="cond_intervals"> + <param name="cond_intervals_enabled" type="boolean" label="Select interval subset to operate on?"/> + <when value="true"> + <repeat name="intervals" title="genomic interval over which to operate" help="-L,‑‑intervals &lt;intervals&gt;"> + <param name="L" type="text" value=""/> + </repeat> + </when> + <when value="false"/> + </conditional> + <conditional name="cond_BQSR"> + <param name="cond_BQSR_enabled" type="boolean" label="Select covariates for on-the-fly recalibration?"/> + <when value="true"> + <param name="BQSR" type="data" format="tabular" label="Input covariates table file for on-the-fly base quality score recalibration" help="-BQSR,‑‑BQSR &lt;BQSR&gt; intended primarily for use with BaseRecalibrator and PrintReads"/> + </when> + <when value="false"/> + </conditional> + <conditional name="cond_threads"> + <param name="cond_threads_enabled" type="boolean" label="Set computational options (cpu, mem)?"/> + <when value="true"> + <param name="nt" type="integer" value="1" label="Number of data threads to allocate to this analysis" help="make sure, the option is available for the chosen tool"/> + <param name="nct" type="integer" value="1" label="Number of CPU threads to allocate per data thread" help="make sure, the option is available for the chosen tool"/> + <param name="mem" type="integer" value="0" label="Overwrite Memory in MB (0 = don't overwrite)" help="Overwrites all other defaults and might lead to crash the run. States mem per data thread"/> + </when> + <when value="false"/> + </conditional> + <conditional name="analysis_type"> + <param name="analysis_type_selector" type="select" label="Analysis Type"> + <option value="RealignerTargetCreator">RealignerTargetCreator</option> + <option value="IndelRealigner">IndelRealigner</option> + <option value="BaseRecalibrator">BaseRecalibrator</option> + <option value="AnalyzeCovariates">AnalyzeCovariates</option> + <option value="PrintReads">PrintReads</option> + <option value="HaplotypeCaller">HaplotypeCaller</option> + <option value="GenotypeGVCFs">GenotypeGVCFs</option> + <option value="CombineGVCFs">CombineGVCFs</option> + <option value="CombineVariants">CombineVariants</option> + </param> + <when value="RealignerTargetCreator"> + <expand macro="RealignerTargetCreatorParameters" tag="rtc"/> + </when> + <when value="IndelRealigner"> + <expand macro="IndelRealignerParameters" tag="ir"/> + </when> + <when value="BaseRecalibrator"> + <expand macro="BaseRecalibratorParameters" tag="br"/> + </when> + <when value="AnalyzeCovariates"> + <expand macro="AnalyzeCovariatesParameters" tag="ac"/> + </when> + <when value="PrintReads"> + <expand macro="PrintReadsParameters" tag="pr"/> + </when> + <when value="HaplotypeCaller"> + <expand macro="HaplotypeCallerParameters" tag="hc"/> + </when> + <when value="GenotypeGVCFs"> + <expand macro="GenotypeGVCFsParameters" tag="gg"/> + </when> + <when value="CombineGVCFs"> + <expand macro="CombineGVCFsParameters" tag="cg"/> + </when> + <when value="CombineVariants"> + <expand macro="CombineVariantsParameters" tag="cv"/> + </when> + </conditional> + </inputs> + <outputs> + <expand macro="RealignerTargetCreatorOutput" tag="rtc"> + <filter>analysis_type['analysis_type_selector'] == 'RealignerTargetCreator'</filter> + </expand> + <expand macro="IndelRealignerOutput" tag="ir"> + <filter>analysis_type['analysis_type_selector'] == 'IndelRealigner'</filter> + </expand> + <expand macro="BaseRecalibratorOutput" tag="br"> + <filter>analysis_type['analysis_type_selector'] == 'BaseRecalibrator'</filter> + </expand> + <expand macro="AnalyzeCovariatesOutput" tag="ac"> + <filter>analysis_type['analysis_type_selector'] == 'AnalyzeCovariates'</filter> + </expand> + <expand macro="PrintReadsOutput" tag="pr"> + <filter>analysis_type['analysis_type_selector'] == 'PrintReads'</filter> + </expand> + <expand macro="HaplotypeCallerOutput" tag="hc"> + <filter>analysis_type['analysis_type_selector'] == 'HaplotypeCaller'</filter> + </expand> + <expand macro="GenotypeGVCFsOutput" tag="gg"> + <filter>analysis_type['analysis_type_selector'] == 'GenotypeGVCFs'</filter> + </expand> + <expand macro="CombineGVCFsOutput" tag="cg"> + <filter>analysis_type['analysis_type_selector'] == 'CombineGVCFs'</filter> + </expand> + <expand macro="CombineVariantsOutput" tag="cv"> + <filter>analysis_type['analysis_type_selector'] == 'CombineVariants'</filter> + </expand> + <data format="txt" name="output_log" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (log)"/> + </outputs> + <expand macro="macro_tests"/> + <citations> + <citation type="doi">10.1101/gr.107524.110</citation> + <citation type="doi">10.1038/ng.806</citation> + <citation type="doi">10.1002/0471250953.bi1110s43</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/gatk_macros.xml Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,166 @@ +<macros> + + <xml name="requirements"> + <requirements> + <requirement type="package">gatk</requirement> + <requirement type="set_environment">GATK_PATH</requirement> + <requirement type="set_environment">GATK_SITE_OPTIONS</requirement> + <requirement type="package" version="3.1.2.1">package_r_for_gatk_3_4_0</requirement> + </requirements> + </xml> + + <xml name="version_command"> + <version_command><![CDATA[ @GATK_EXEC@ --help|grep '^The Genome' ]]></version_command> + </xml> + + <token name="@VERSION@">3.4-0</token> + <token name="@OUTPUT_NAME_PREFIX@">${tool.name} - ${analysis_type.analysis_type_selector}</token> + <token name="@GATK_EXEC@"> +<![CDATA[ + #if $cond_threads.cond_threads_enabled: + #if int($cond_threads.nct) > 1: + THREAD_STRING="-nct $cond_threads.nct" && + #end if + #if int($cond_threads.nt) > 1: + THREAD_STRING=$THREAD_STRING" -nt $cond_threads.nt" && + #end if + #if int($cond_threads.mem) > 0: + GATK_MEM=$cond_threads.mem && + #end if + #end if + java -Xmx\${GATK_MEM:-\${SLURM_MEM_PER_NODE:-4096}}M -jar "\$GATK_PATH/GenomeAnalysisTK.jar" \${THREAD_STRING:-} +]]> + </token> + + <xml name="macro_vcf_input" tokens="tag"> + <param name="input" type="data" format="vcf" multiple="true" label="Variant files (VCF format)" help="-V, ‑‑variant"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> + </param> + </xml> + <token name="@token_vcf_input_pre@" tokens="tag"> +<![CDATA[ + ############################ + ## create links to gVCF input files with correct extensions + ############################ + #for $i, $variant in enumerate($analysis_type.input): + ln -s -f ${variant} variant_${i}.vcf && + #end for +]]> + </token> + <token name="@token_vcf_input@"> +<![CDATA[ + #for $i, $variant in enumerate($analysis_type.input): + --variant variant_${i}.vcf + #end for + @token_reference_input@ +]]> + </token> + + + <xml name="macro_gvcf_input" tokens="tag"> + <param name="input" type="data" format="vcf" multiple="true" label="Variant files (gVCF format)" help="-V, ‑‑variant"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> + </param> + </xml> + <token name="@token_gvcf_input_pre@" tokens="tag"> +<![CDATA[ + ############################ + ## create links to gVCF input files with correct extensions + ############################ + #for $i, $variant in enumerate($analysis_type.input): + ln -s -f ${variant} variant_${i}.g.vcf && + #end for +]]> + </token> + <token name="@token_gvcf_input@"> +<![CDATA[ + #for $i, $variant in enumerate($analysis_type.input): + --variant variant_${i}.g.vcf + #end for + @token_reference_input@ +]]> + </token> + + <xml name="macro_bam_input"> + <conditional name="cond_bam_input"> + <param name="all_in_one" type="boolean" value="false" label="Input all BAM files in a single command" /> + <when value="true"> + <param name="input" type="data" format="bam" multiple="true" label="Input file containing sequence data (BAM)" help="-I, ‑‑input_file"> + <validator type="unspecified_build"/> + <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build."/> + </param> + </when> + <when value="false"> + <param name="input" type="data" format="bam" label="Input file containing sequence data (BAM)" help="-I, ‑‑input_file"> + <validator type="unspecified_build"/> + <validator type="dataset_metadata_in_data_table" table_name="picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build."/> + </param> + </when> + </conditional> + </xml> + <token name="@token_bam_input_pre@"> +<![CDATA[ + ############################ + ## create links to bam input files with correct extensions + ############################ + #if $analysis_type.cond_bam_input.all_in_one + #for $i, $bam in enumerate($analysis_type.cond_bam_input.input): + ln -s -f ${bam} input_${i}.bam && + ln -s -f ${bam.metadata.bam_index} input_${i}.bam.bai && + #end for + #else + ln -s -f ${analysis_type.cond_bam_input.input} input.bam && + ln -s -f ${analysis_type.cond_bam_input.input.metadata.bam_index} input.bam.bai && + #end if +]]> + </token> + <token name="@token_bam_input@"> +<![CDATA[ + #if $analysis_type.cond_bam_input.all_in_one + #for $i, $bam in enumerate($analysis_type.cond_bam_input.input): + --input_file input_${i}.bam + #end for + #else + --input_file input.bam + #end if + @token_reference_input@ +]]> + </token> + + <token name="@token_reference_input@"> +<![CDATA[ +]]> + </token> + <xml name="macro_input" tokens="tag"> + <yield /> + </xml> + + <xml name="macro_optional_parameters"> + <conditional name="optional_parameters"> + <param name="optional_parameters_enabled" type="boolean" label="Configure Optional Parameters" /> + <when value="true"> + <yield /> + </when> + <when value="false" /> + </conditional> + </xml> + + <xml name="macro_advanced_parameters"> + <conditional name="advanced_parameters"> + <param name="advanced_parameters_enabled" type="boolean" label="Configure Advanced Parameters" /> + <when value="true"> + <yield /> + </when> + <when value="false" /> + </conditional> + </xml> + + <xml name="macro_tests"> + <tests> + + </tests> + </xml> + +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/generation/gatk.xsl Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,162 @@ +<?xml version="1.0" encoding="UTF-8"?> +<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:output + method="xml" + encoding="utf-8" + indent="yes" + cdata-section-elements="script style" /> + +<xsl:template match="/"> + +<tool id="gatk" name="GATK" version="@VERSION@.d9"> + <description>tool collection Version @VERSION@</description> + + <macros> + <import>gatk_macros.xml</import> + <xsl:for-each select="analyses/analysis"> + <import><xsl:value-of select="macro_file" /></import> + </xsl:for-each> + </macros> + + <expand macro="requirements" /> + + <stdio> + <regex match="^INFO" level="log" /> + <regex match="^WARN" level="warning" /> + <regex match="Using .* implementation of PairHMM" level="warning" /> + <regex match="There is insufficient memory for the Java Runtime Environment to continue" level="fatal" /> + <regex match="^##### ERROR" level="fatal" /> + <exit_code range="1:" level="fatal"/> + </stdio> + + <command> +<xsl:text disable-output-escaping="yes"><![CDATA[ + ############################ + ## import analysis specific preprocessings by using cheetahs internal searchList + ## if not defined, ignore + ############################ + #if $analysis_type.analysis_type_selector + "Preprocessing" in vars()['SL'][2] + #set $analysisPreprocessing = vars()['SL'][2][$analysis_type.analysis_type_selector + "Preprocessing"] + #include source=$analysisPreprocessing + #end if + + ############################ + ## GATK tool unspecific options + ############################ + @GATK_EXEC@ + + --analysis_type ${analysis_type.analysis_type_selector} + --reference_sequence ${ref_file.fields.path} + + --log_to_file ${output_log} + + #if $cond_intervals.cond_intervals_enabled + #for $interval in $cond_intervals.intervals: + --intervals ${interval.L} + #end for + #end if + + #if $cond_BQSR.cond_BQSR_enabled + --BQSR $cond_BQSR.BQSR + #end if + + ############################ + ## import analysis specific options by using cheetahs internal searchList + ## if not defined throw raw python error until better idea + ############################ + #if $analysis_type.analysis_type_selector + "Options" in vars()['SL'][2] + #set $analysisOptions = vars()['SL'][2][$analysis_type.analysis_type_selector + "Options"] + #include source=$analysisOptions + #else + #set $analysisOptions = vars()['SL'][2][$analysis_type.analysis_type_selector + "Options"] + #end if + + ############################ + ## only put ERROR or FATAL log messages into stderr + ## but keep full log for printing into log file + ############################ + 2>&1 | awk '\$1 != "INFO" && \$1 != "WARN"' >&2 +]]></xsl:text> + </command> + + <inputs> + + <param name="ref_file" type="select" label="Using reference genome" help="-R,‑‑reference_sequence &lt;reference_sequence&gt;" > + <options from_data_table="picard_indexes"> + <!--filter type="data_meta" key="dbkey" ref="@TAG@_input" column="dbkey" /--> + </options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + + <conditional name="cond_intervals"> + <param name="cond_intervals_enabled" type="boolean" label="Select interval subset to operate on?" /> + <when value="true"> + <repeat name="intervals" title="genomic interval over which to operate" help="-L,‑‑intervals &lt;intervals&gt;"> + <param name="L" type="text" value="" /> + </repeat> + </when> + <when value="false" /> + </conditional> + + <conditional name="cond_BQSR"> + <param name="cond_BQSR_enabled" type="boolean" label="Select covariates for on-the-fly recalibration?" /> + <when value="true"> + <param name="BQSR" type="data" format="tabular" label="Input covariates table file for on-the-fly base quality score recalibration" help="-BQSR,‑‑BQSR &lt;BQSR&gt; intended primarily for use with BaseRecalibrator and PrintReads" /> + </when> + <when value="false" /> + </conditional> + + <conditional name="cond_threads"> + <param name="cond_threads_enabled" type="boolean" label="Set computational options (cpu, mem)?" /> + <when value="true"> + <param name="nt" type="integer" value="1" label="Number of data threads to allocate to this analysis" help="make sure, the option is available for the chosen tool" /> + <param name="nct" type="integer" value="1" label="Number of CPU threads to allocate per data thread" help="make sure, the option is available for the chosen tool" /> + <param name="mem" type="integer" value="0" label="Overwrite Memory in MB (0 = don't overwrite)" help="Overwrites all other defaults and might lead to crash the run. States mem per data thread" /> + </when> + <when value="false" /> + </conditional> + + <conditional name="analysis_type"> + <param name="analysis_type_selector" type="select" label="Analysis Type"> + <xsl:for-each select="analyses/analysis"> + <option value="{name}"><xsl:value-of select="name" /></option> + </xsl:for-each> + </param> + <xsl:for-each select="analyses/analysis"> + <when value="{name}"> + <!--xsl:choose> + <xsl:when test="input_type = 'bam'"> + <expand macro="macro_bam_input" tag="{tag}" /> + </xsl:when> + <xsl:when test="input_type = 'gvcf'"> + <expand macro="macro_gvcf_input" tag="{tag}" /> + </xsl:when> + </xsl:choose--> + <expand macro="{name}Parameters" tag="{tag}" /> + </when> + </xsl:for-each> + </conditional> + </inputs> + + <outputs> + <xsl:for-each select="analyses/analysis"> + <expand macro="{name}Output" tag="{tag}"> + <filter>analysis_type['analysis_type_selector'] == '<xsl:value-of select="name" />'</filter> + </expand> + </xsl:for-each> + <data format="txt" name="output_log" label="${{tool.name}} - ${{analysis_type.analysis_type_selector}} on ${{on_string}} (log)" /> + </outputs> + + <expand macro="macro_tests" /> + + <citations> + <citation type="doi">10.1101/gr.107524.110</citation> + <citation type="doi">10.1038/ng.806</citation> + <citation type="doi">10.1002/0471250953.bi1110s43</citation> + </citations> +</tool> + +</xsl:template> +</xsl:stylesheet> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/generation/gatk.xsldb.xml Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,57 @@ +<?xml version="1.0" encoding="UTF-8"?> +<analyses> + <analysis> + <name>RealignerTargetCreator</name> + <input_type>bam</input_type> + <tag>rtc</tag> + <macro_file>realigner_target_creator.xml</macro_file> + </analysis> + <analysis> + <name>IndelRealigner</name> + <input_type>bam</input_type> + <tag>ir</tag> + <macro_file>indel_realigner.xml</macro_file> + </analysis> + <analysis> + <name>BaseRecalibrator</name> + <input_type>bam</input_type> + <tag>br</tag> + <macro_file>base_recalibrator.xml</macro_file> + </analysis> + <analysis> + <name>AnalyzeCovariates</name> + <input_type>bam</input_type> + <tag>ac</tag> + <macro_file>analyze_covariates.xml</macro_file> + </analysis> + <analysis> + <name>PrintReads</name> + <input_type>bam</input_type> + <tag>pr</tag> + <macro_file>print_reads.xml</macro_file> + </analysis> + <analysis> + <name>HaplotypeCaller</name> + <input_type>bam</input_type> + <tag>hc</tag> + <macro_file>haplotype_caller.xml</macro_file> + </analysis> + <analysis> + <name>GenotypeGVCFs</name> + <input_type>gvcf</input_type> + <tag>gg</tag> + <macro_file>genotype_gvcfs.xml</macro_file> + </analysis> + <analysis> + <name>CombineGVCFs</name> + <input_type>gvcf</input_type> + <tag>cg</tag> + <macro_file>combine_gvcfs.xml</macro_file> + </analysis> + <analysis> + <name>CombineVariants</name> + <input_type>vcf</input_type> + <tag>cv</tag> + <macro_file>combine_variants.xml</macro_file> + </analysis> +</analyses>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/genotype_gvcfs.xml Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,43 @@ +<macros> + <xml name="GenotypeGVCFsParameters" tokens="tag"> + + <expand macro="macro_gvcf_input" tag="@TAG@" /> + + <expand macro="macro_optional_parameters"> + + + <param name="sample_ploidy" type="integer" value="2" label="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)" help="-ploidy,‑‑sample_ploidy &lt;sample_ploidy&gt;" /> + + </expand> + + </xml> + + <xml name="GenotypeGVCFsOutput"> + <data format="vcf" name="gg_output_gvcf" from_work_dir="output.g.vcf" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (gVCF)"> + <yield /> + </data> + </xml> + + <template name="GenotypeGVCFsPreprocessing"> +<![CDATA[ + @token_gvcf_input_pre@ +]]> + </template> + + <template name="GenotypeGVCFsOptions"> +<![CDATA[ + --out output.g.vcf + + @token_gvcf_input@ + + #set $optionals = $analysis_type.optional_parameters + #if $optionals.optional_parameters_enabled + --sample_ploidy $optionals.sample_ploidy + #end if +]]> + </template> + + +</macros> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/haplotype_caller.xml Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,65 @@ +<macros> + <xml name="HaplotypeCallerParameters" tokens="tag"> + + <expand macro="macro_bam_input" tag="@TAG@" /> + + <conditional name="cond_usage"> + <param name="cond_usage_selector" type="select" label="Select usage"> + <option value="GVCF">Single-sample all-sites calling on DNAseq (GVCF mode)</option> + </param> + <when value="GVCF"> + <expand macro="HaplotypeCallerGVCF" /> + </when> + </conditional> + + <expand macro="macro_optional_parameters"> + + <param name="sample_ploidy" type="integer" value="2" label="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy)" help="-ploidy,‑‑sample_ploidy &lt;sample_ploidy&gt;" /> + + </expand> + + </xml> + + <xml name="HaplotypeCallerOutput"> + <data format="vcf" name="hc_output_gvcf" from_work_dir="output.g.vcf" label="${tool.name} on ${on_string} (gVCF)"> + <yield /> + </data> + </xml> + + <template name="HaplotypeCallerPreprocessing"> +<![CDATA[ + @token_bam_input_pre@ +]]> + </template> + + <template name="HaplotypeCallerOptions"> +<![CDATA[ + --out output.g.vcf + + @token_bam_input@ + + #set $optionals = $analysis_type.optional_parameters + #if $optionals.optional_parameters_enabled + --sample_ploidy $optionals.sample_ploidy + #end if + + #set $usage_selector = $analysis_type.cond_usage.cond_usage_selector + #set $usage = $analysis_type.cond_usage + + #if str($usage_selector) == 'GVCF' + --emitRefConfidence "GVCF" + #end if +]]> + </template> + + + + <xml name="HaplotypeCallerGVCF"> + <param name="emitRefConfidence" type="select" optional="true" label="Mode for emitting reference confidence scores" help="-ERC,‑‑emitRefConfidence &lt;emitRefConfidence&gt;"> + <option value="GVCF">GVCF (Reference model emitted with condensed non-variant blocks)</option> + </param> + </xml> + +</macros> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/indel_realigner.xml Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,90 @@ +<macros> + <xml name="IndelRealignerParameters" tokens="tag"> + + <expand macro="macro_bam_input" tag="@TAG@" /> + + <param name="targetIntervals" type="data" format="gatk_interval" label="Intervals file output from RealignerTargetCreator" help="-targetIntervals,--targetIntervals &lt;targetIntervals&gt;" /> + + <expand macro="macro_optional_parameters"> + <repeat name="knownAlleles" title="Input VCF file(s) with known indels" help="-known,‑‑knownAlleles &lt;knownAlleles&gt;"> + <param name="knownAllele" type="data" format="vcf" label="Variant file (VCF format)" /> + </repeat> + + <param name="consensusDeterminationModel" type="select" label="minimum reads at a locus to enable using the entropy calculation" help="-model,‑‑consensusDeterminationModel &lt;consensusDeterminationModel&gt;"> + <option value="USE_READS">USE_READS - Additionally uses indels already present in the original alignments of the reads</option> + <option value="KNOWNS_ONLY">KNOWNS_ONLYS - Uses only indels from a provided ROD of known indels</option> + <option value="USE_SW">USE_SW - Additionally uses 'Smith-Waterman' to generate alternate consenses</option> + </param> + <param name="LODThresholdForCleaning" type="float" value="5.0" label="LOD threshold above which the cleaner will clean" help="-LOD,‑‑LODThresholdForCleaning &lt;LODThresholdForCleaning&gt;" /> + <!--param name="nWayOut" type="float" value="5.0" label="Generate one output file for each input (-I) bam file (not compatible with -output)" help="-nWayOut,--nWayOut &lt;nWayOut&gt;" /--> + </expand> + + + <expand macro="macro_advanced_parameters"> + <param name="entropyThreshold" type="float" value="0.15" label="Percentage of mismatches at a locus to be considered having high entropy (0.0 < entropy <= 1.0)" help="-entropy,‑‑entropyThreshold &lt;entropyThreshold&gt;" /> + + <param name="maxConsensuses" type="integer" value="30" label="Max alternate consensuses to try (necessary to improve performance in deep coverage)" help="-maxConsensuses,‑‑maxConsensuses &lt;maxConsensuses&gt;" /> + + <param name="maxIsizeForMovement" type="integer" value="3000" label="maximum insert size of read pairs that we attempt to realign" help="-maxIsize,‑‑maxIsizeForMovement &lt;maxIsizeForMovement&gt;" /> + + <param name="maxPositionalMoveAllowed" type="integer" value="200" label="Maximum positional move in basepairs that a read can be adjusted during realignment" help="-maxPosMove,‑‑maxPositionalMoveAllowed &lt;maxPositionalMoveAllowed&gt;" /> + + <param name="maxReadsForConsensuses" type="integer" value="120" label="Max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage)" help="-greedy,‑‑maxReadsForConsensuses &lt;maxReadsForConsensuses&gt;" /> + + <param name="maxReadsForRealignment" type="integer" value="20000" label="Max reads allowed at an interval for realignment" help="-maxReads,‑‑maxReadsForRealignment &lt;maxReadsForRealignment&gt;" /> + + <param name="maxReadsInMemory" type="integer" value="150000" label="max reads allowed to be kept in memory at a time by the SAMFileWriter" help="-maxInMemory,‑‑maxReadsInMemory &lt;maxReadsInMemory&gt;" /> + + <param name="noOriginalAlignmentTags" type="boolean" truevalue="--noOriginalAlignmentTags" falsevalue="" label="Don't output the original cigar or alignment start tags for each realigned read in the output bam" help="-noTags,‑‑noOriginalAlignmentTags" /> + </expand> + + </xml> + + <xml name="IndelRealignerOutput"> + <data format="bam" name="ir_output_bam" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (BAM)"> + <yield /> + </data> + </xml> + + <template name="IndelRealignerPreprocessing"> +<![CDATA[ + @token_bam_input_pre@ + + ln -s -f ${analysis_type.targetIntervals} target.intervals && + + #if $analysis_type.optional_parameters.optional_parameters_enabled + #for $i, $knownAllele in enumerate($analysis_type.optional_parameters.knownAlleles): + ln -s -f ${knownAllele.knownAllele} knownAllele_${i}.vcf && + #end for + #end if +]]> + </template> + + <template name="IndelRealignerOptions"> +<![CDATA[ + --out ${ir_output_bam} + + @token_bam_input@ + --targetIntervals target.intervals + + #if $analysis_type.optional_parameters.optional_parameters_enabled + #for $i, $knownAllele in enumerate($analysis_type.optional_parameters.knownAlleles): + --knownAlleles knownAllele_${i}.vcf + #end for + --consensusDeterminationModel ${analysis_type.consensusDeterminationModel} + --LODThresholdForCleaning ${analysis_type.LODThresholdForCleaning} + #end if + + #if $analysis_type.advanced_parameters.advanced_parameters_enabled + --entropyThreshold ${analysis_type.advanced_parameters.entropyThreshold} + --maxConsensuses ${analysis_type.advanced_parameters.maxConsensuses} + --maxIsizeForMovement ${analysis_type.advanced_parameters.maxIsizeForMovement} + --maxPositionalMoveAllowed ${analysis_type.advanced_parameters.maxPositionalMoveAllowed} + --maxReadsForConsensuses ${analysis_type.advanced_parameters.maxReadsForConsensuses} + --maxReadsForRealignment ${analysis_type.advanced_parameters.maxReadsForRealignment} + --maxReadsInMemory ${analysis_type.advanced_parameters.maxReadsInMemory} + ${analysis_type.advanced_parameters.noOriginalAlignmentTags} + #end if +]]> + </template> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/print_reads.xml Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,71 @@ +<macros> + <xml name="PrintReadsParameters" tokens="tag"> + + <expand macro="macro_bam_input" tag="@TAG@" /> + + <!-- BQSR in main config --> + + <expand macro="macro_optional_parameters"> + + <param name="number" type="integer" value="" optional="true" label="Print the first n reads from the file, discarding the rest" help="-n,‑‑number &lt;number&gt;" /> + + <param name="platform" type="text" value="" optional="true" label="Exclude all reads with this platform from the output" help="-platform,‑‑platform &lt;platform&gt;" /> + + <param name="readGroup" type="text" value="" optional="true" label="Exclude all reads with this read group from the output" help="-readGroup,‑‑readGroup &lt;readGroup&gt;" /> + + <param name="sample_file" type="data" format="txt" optional="true" label="File containing a list of samples (one per line). Can be specified multiple times" help="-sf,‑‑sample_file &lt;sample_file&gt;" /> + + <repeat name="sample_names" title="Sample names to be included in the analysis" help="-sn,‑‑sample_name &lt;sample_name&gt;"> + <param name="sample_name" type="text" value="" title="Sample name to be included in the analysis" /> + </repeat> + + <param name="simplify" type="text" truevalue="-s" falsevalue="" label="Erase all extra attributes in the read but keep the read group information" help="-s,‑‑simplify" /> + + </expand> + + </xml> + + <xml name="PrintReadsOutput"> + <data format="bam" name="pr_output_bam" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (BAM)"> + <yield /> + </data> + </xml> + + <template name="PrintReadsPreprocessing"> +<![CDATA[ + @token_bam_input_pre@ +]]> + </template> + + <template name="PrintReadsOptions"> +<![CDATA[ + --out ${pr_output_bam} + + @token_bam_input@ + + #set $optionals = $analysis_type.optional_parameters + #if $optionals.optional_parameters_enabled + #if int($optionals.number) > 0 + --number $optionals.number + #end if + #if str($optionals.platform) + --platform $optionals.platform + #end if + #if str($optionals.readGroup) + --readGroup $optionals.readGroup + #end if + #if $optionals.sample_file + --sample_file $optionals.sample_file + #end if + #if $optionals.sample_names + #for $sample in $optionals.sample_names: + --intervals ${sample.sample_name} + #end for + #end if + $optionals.simplify + #end if +]]> + </template> +</macros> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/realigner_target_creator.xml Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,57 @@ +<macros> + <xml name="RealignerTargetCreatorParameters" tokens="tag"> + + <expand macro="macro_bam_input" tag="@TAG@" /> + + <expand macro="macro_optional_parameters"> + <param name="maxIntervalSize" type="integer" value="500" label="maximum interval size; any intervals larger than this value will be dropped" help="-maxInterval,‑‑maxIntervalSize &lt;maxIntervalSize&gt;" /> + <param name="minReadsAtLocus" type="integer" value="4" label="minimum reads at a locus to enable using the entropy calculation" help="-minReads,‑‑minReadsAtLocus &lt;minReadsAtLocus&gt;" /> + <param name="windowSize" type="integer" value="10" label="window size for calculating entropy or SNP clusters" help="-window,‑‑windowSize &lt;windowSize&gt;" /> + + <param name="mismatchFraction" type="float" value="0.0" label="fraction of base qualities needing to mismatch for a position to have high entropy" help="-mismatch,‑‑mismatchFraction &lt;mismatchFraction&gt;" /> + <repeat name="rod_bindings" title="Input VCF files with known indels" help="-known,‑‑known &lt;known&gt;"> + <param name="input_rod" type="data" format="vcf" label="Variant file (VCF format)" /> + </repeat> + </expand> + + </xml> + + <xml name="RealignerTargetCreatorOutput"> + <data format="gatk_interval" name="rtc_output_intervals" label="${tool.name} - ${analysis_type.analysis_type_selector} on ${on_string} (GATK intervals)"> + <yield /> + </data> + </xml> + + <template name="RealignerTargetCreatorPreprocessing"> +<![CDATA[ + @token_bam_input_pre@ + + #if $analysis_type.optional_parameters.optional_parameters_enabled + #for $i, $rod_binding in enumerate($analysis_type.optional_parameters.rod_bindings): + ln -s -f ${rod_binding.input_rod} rod_binding_${i}.vcf && + #end for + #end if +]]> + </template> + + <template name="RealignerTargetCreatorOptions"> +<![CDATA[ + --out ${rtc_output_intervals} + + @token_bam_input@ + + #if $analysis_type.optional_parameters.optional_parameters_enabled + --maxIntervalSize ${analysis_type.optional_parameters.maxIntervalSize} + --minReadsAtLocus ${analysis_type.optional_parameters.minReadsAtLocus} + --windowSize ${analysis_type.optional_parameters.windowSize} + --mismatchFraction ${analysis_type.optional_parameters.mismatchFraction} + + #for $i, $rod_binding in enumerate($analysis_type.optional_parameters.rod_bindings): + --known rod_binding_${i}.vcf + #end for + #end if +]]> + </template> +</macros> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/tool-data/destinations.py Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,62 @@ +from galaxy.jobs import JobDestination +import os +import sys +import json +import cStringIO +import logging + +log = logging.getLogger( __name__ ) + + +def dump(obj, nested_level=0, output=sys.stdout): + spacing = ' ' + if type(obj) == dict: + print >> output, '%s{' % ((nested_level) * spacing) + for k, v in obj.items(): + if hasattr(v, '__iter__'): + print >> output, '%s%s:' % ((nested_level + 1) * spacing, k) + dump(v, nested_level + 1, output) + else: + print >> output, '%s%s: %s' % ((nested_level + 1) * spacing, k, v) + print >> output, '%s}' % (nested_level * spacing) + elif type(obj) == list: + print >> output, '%s[' % ((nested_level) * spacing) + for v in obj: + if hasattr(v, '__iter__'): + dump(v, nested_level + 1, output) + else: + print >> output, '%s%s' % ((nested_level + 1) * spacing, v) + print >> output, '%s]' % ((nested_level) * spacing) + else: + print >> output, '%s%s' % (nested_level * spacing, obj) + + +def dynamic_slurm_cluster_gatk(job, tool_id): + # Allocate extra time + inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] ) + inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] ) + inp_data.update( [ ( da.name, json.loads(da.value) ) for da in job.parameters ] ) + out = cStringIO.StringIO() + dump(inp_data, 1, out) + log.debug(out.getvalue()) + + nativeSpecs = '--nodes=1 --ntasks=1' + + # runner doesn't allow to specify --cpus-per-task + # thus the mem calculation gets messy with more than 1 node + # --> translate nt ==> nodes, nct ==> ntasks + + if 'cond_threads' not in inp_data: + return JobDestination(runner="slurm") + + if inp_data['cond_threads']['cond_threads_enabled'] == "True": + nNodes = int(inp_data['cond_threads']['nt']) + nCPU = int(inp_data['cond_threads']['nct']) + nMEM = int(inp_data['cond_threads']['mem']) + if nMEM > 0: + nativeSpecs = '--nodes=%d --ntasks=%d --mem=%d' % (nNodes, nCPU*nNodes, nMEM) + else: + nativeSpecs = '--nodes=%d --ntasks=%d' % (nNodes, nCPU*nNodes) + + return JobDestination(runner="slurm", params={"nativeSpecification": nativeSpecs}) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/tool-data/picard_index.loc.sample Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,26 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Picard dict and associated files. You will need +#to create these data files and then create a picard_index.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The picard_index.loc +#file has this format (longer white space is the TAB character): +# +#<unique_build_id> <dbkey> <display_name> <fasta_file_path> +# +#So, for example, if you had hg18 indexed and stored in +#/depot/data2/galaxy/srma/hg18/, +#then the srma_index.loc entry would look like this: +# +#hg18 hg18 hg18 Pretty /depot/data2/galaxy/picard/hg18/hg18.fa +# +#and your /depot/data2/galaxy/srma/hg18/ directory +#would contain the following three files: +#hg18.fa +#hg18.dict +#hg18.fa.fai +# +#The dictionary file for each reference (ex. hg18.dict) must be +#created via Picard (http://picard.sourceforge.net). Note that +#the dict file does not have the .fa extension although the +#path list in the loc file does include it. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/tool_data_table_conf.xml.sample Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,7 @@ +<tables> + <!-- Location of Picard dict files valid for GATK --> + <table name="picard_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/picard_index.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/gatk/tool_dependencies.xml Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<tool_dependency> + <set_environment version="1.0"> + <environment_variable action="set_to" name="GATK_PATH">/mnt/galaxy/tools/GATK/3.4-0</environment_variable> + </set_environment> + <!-- + Use GATK_SITE_OPTIONS to set additional parameters that should be inserted in every GATK call. + The intended use case was to prohibit GATK to collect and send data. + For example: + + -et "NO_ET" -K "/data/gatk_key_file" ##ET no phone home + --> + <set_environment version="1.0"> + <environment_variable action="set_to" name="GATK_SITE_OPTIONS"> </environment_variable> + </set_environment> + <package name="package_r_for_gatk_3_4_0" version="3.1.2.1"> + <repository changeset_revision="49c62e9b71ad" name="package_r_for_gatk_3_4_0" owner="avowinkel" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/package_picard_1_135/tool_dependencies.xml Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,22 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="picard" version="1.135"> + <install version="1.0"> + <actions_group> + <actions architecture="x86_64" os="linux"> + <action type="download_by_url">https://github.com/broadinstitute/picard/releases/download/1.135/picard-tools-1.135.zip</action> + <action type="move_directory_files"> + <source_directory>.</source_directory> + <destination_directory>$INSTALL_DIR</destination_directory> + </action> + </actions> + <action type="set_environment"> + <environment_variable name="JAVA_JAR_PATH" action="set_to">$INSTALL_DIR</environment_variable> + </action> + </actions_group> + </install> + <readme> +This picard package dependency is retrieved directly from https://github.com/broadinstitute/picard/releases + </readme> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATK/package_r_for_gatk_3_4_0/tool_dependencies.xml Thu Sep 12 06:50:21 2019 -0400 @@ -0,0 +1,48 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="R" version="3.1.2"> + <repository changeset_revision="9f2fddb9d6e2" name="package_r_3_1_2" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="package_r_for_gatk_3_4_0" version="3.1.2.1"> + <install version="1.0"> + <actions> + <action type="setup_r_environment"> + + <repository changeset_revision="9f2fddb9d6e2" name="package_r_3_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu"> + <package name="R" version="3.1.2" /> + </repository> + <package>https://github.com/cran/stringi/archive/0.5-5.tar.gz</package> + <package>https://github.com/cran/magrittr/archive/1.5.tar.gz</package> + <package>https://github.com/cran/stringr/archive/1.0.0.tar.gz</package> + <package>https://github.com/cran/RColorBrewer/archive/1.1-2.tar.gz</package> + <package>https://github.com/cran/dichromat/archive/2.0-0.tar.gz</package> + <package>https://github.com/cran/colorspace/archive/1.2-6.tar.gz</package> + <package>https://github.com/cran/munsell/archive/0.4.2.tar.gz</package> + <package>https://github.com/cran/labeling/archive/0.3.tar.gz</package> + <package>https://github.com/cran/Rcpp/archive/0.11.6.tar.gz</package> + <package>https://github.com/cran/digest/archive/0.6.8.tar.gz</package> + <package>https://github.com/cran/gtable/archive/0.1.2.tar.gz</package> + <package>https://github.com/cran/bitops/archive/1.0-6.tar.gz</package> + <package>https://github.com/cran/caTools/archive/1.17.1.tar.gz</package> + <package>https://github.com/cran/gtools/archive/3.5.0.tar.gz</package> + <package>https://github.com/cran/gdata/archive/2.17.0.tar.gz</package> + <package>https://github.com/cran/gsalib/archive/2.1.tar.gz</package> + <package>https://github.com/cran/gplots/archive/2.17.0.tar.gz</package> + <package>https://github.com/cran/plyr/archive/1.8.3.tar.gz</package> + <package>https://github.com/cran/reshape/archive/0.8.5.tar.gz</package> + <package>https://github.com/cran/reshape2/archive/1.4.1.tar.gz</package> + <package>https://github.com/cran/scales/archive/0.2.5.tar.gz</package> + <package>https://github.com/cran/proto/archive/0.3-10.tar.gz</package> + <package>https://github.com/cran/MASS/archive/7.3-43.tar.gz</package> + <package>https://github.com/cran/ggplot2/archive/1.0.1.tar.gz</package> + </action> + </actions> + </install> + <readme> + ggplot2 is a plotting system for R, based on the grammar of graphics, which tries to take the good parts of base and lattice graphics and none of the bad parts. + It takes care of many of the fiddly details that make plotting a hassle (like drawing legends) as well as providing a powerful model of graphics that makes it easy to produce complex multi-layered graphics. + + http://ggplot2.org/ + </readme> + </package> +</tool_dependency>
--- a/ebola.len Sun Mar 04 00:49:57 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -1 Chr1 18959 - -
--- a/ebolamutant.fasta Sun Mar 04 00:49:57 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,273 +0,0 @@ ->Chr1 -CGGACACACAAAAAGAAAGAAGAATTTTTAGGATCTTTTGTGTGCGAATAACTATGAGGAAGATTAATAA -TTTTCCTCTCATTGAAATTTATATCGGAATTTAAATTGAAATTGTTACTGTAATCACACCTGGTTTGTTT -CAGAGCCACATCACAAAGATAGAGAACAACCTAGGTCTCCGAAGGGAGCAAGGGCATCAGTGTGCTCAGT -TGAAAATCCCTTGTCAACACCTAGGTCTTATCACATCACAAGTTCCACCTCAGACTCTGCAGGGTGATCC -AACAACCTTAATAGAAACATTATTGTTAAAGGACAGCATTAGTTCACAGTCAAACAAGCAAGATTGAGAA -TTAACCTTGGTTTTGAACTTGAACACTTAGGGGATTGAAGATTCAACAACCCTAAAGCTTGGGGTAAAAC -ATTGGAAATAGTTAAAAGACAAATTGCTCGGAATCACAAAATTCCGAGTATGGATTCTCGTCCTCAGAAA -ATCTGGATGGCGCCGAGTCTCACTGAATCTGACATGGATTACCACAAGATCTTGACAGCAGGTCTGTCCG -TTCAACAGGGGATTGTTCGGCAAAGAGTCATCCCAGTGTATCAAGTAAACAATCTTGAAGAAATTTGCCA -ACTTATCATACAGGCCTTTGAAGCAGGTGTTGATTTTCAAGAGAGTGCGGACAGTTTCCTTCTCATGCTT -TGTCTTCATCATGCGTACCAGGGAGATTACAAACTTTTCTTGGAAAGTGGCGCAGTCAAGTATTTGGAAG -GGCACGGGTTCCGTTTTGAAGTCAAGAAGCGTGATGGAGTGAAGCGCCTTGAGGAATTGCTGCCAGCAGT -ATCTAGTGGAAAAAACATTAAGAGAACACTTGCTGCCATGCCGGAAGAGGAGACAACTGAAGCTAATGCC -GGTCAGTTTCTCTCCTTTGCAAGTCTATTCCTTCCGAAATTGGTAGTAGGAGAAAAGGCTTGCCTTGAGA -AGGTTCAAAGGCAAATTCAAGTACATGCAGAGCAAGGACTGATACAATATCCAACAGCTTGGCAATCAGT -AGGACACATGATGGTGATTTTCCGTTTGATGCGAACAAATTTTCTGATCAAATTTCTCCTAATACACCAA -GGGATGCACATGGTTGCCGGGCATGATGCCAACGATGCTGTGATTTCAAATTCAGTGGCTCAAGCTCGTT -TTTCAGGCTTATTGATTGTCAAAACAGTACTTGATCATATCCTACAAAAGACAGAACGAGGAGTTCGTCT -CCATCCTCTTGCAAGGACCGCCAAGGTAAAAAATGAGGTGAACTCCTTTAAGGCTGCACTCAGCTCCCTG -GCCAAGCATGGAGAGTATGCTCCTTTCGCCCGACTTTTGAACCTTTCTGGAGTAAATAATCTTGAGCATG -GTCTTTTCCCTCAACTATCGGCAATTGCACTCGGAGTCGCCACAGCACACGGGAGTACCCTCGCAGGAGT -AAATGTTGGAGAACAGTATCAACAACTCAGAGAGGCTGCCACTGAGGCTGAGAAGCAACTCCAACAATAT -GCAGAGTCTCGCGAACTTGACCATCTTGGACTTGATGATCAGGAAAAGAAAATTCTTATGAACTTCCATC -AGAAAAAGAACGAAATCAGCTTCCAGCAAACAAACGCTATGGTAACTCTAAGAAAAGAGCGCCTGGCCAA -GCTGACAGAAGCTATCACTGCTGCGTCACTGCCCAAAACAAGTGGACATTACGATGATGATGACGACATT -CCCTTTCCAGGACCCATCAATGATGACGACAATCCTGGCCATCAAGATGATGATCCGACTGACTCACAGG -ATACGACCATTCCCGATGTGGTGGTTGATCCCGATGATGGAAGCTACGGCGAATACCAGAGTTACTCGGA -AAACGGCATGAATGCACCAGATGACTTGGTCCTATTCGATCTAGACGAGGACGACGAGGACACTAAGCCA -GTGCCTAATAGATCGACCAAGGGTGGACAACAGAAGAACAGTCAAAAGGGCCAGCATATAGAGGGCAGAC -AGACACAATCCAGGCCAATTCAAAATGTCCCAGGCCCTCACAGAACAATCCACCACGCCAGTGCGCCACT -CACGGACAATGACAGAAGAAATGAACCCTCCGGCTCAACCAGCCCTCGCATGCTGACACCAATTAACGAA -GAGGCAGACCCACTGGACGATGCCGACGACGAGACGTCTAGCCTTCCGCCCTTGGAGTCAGATGATGAAG -AGCAGGACAGGGACGGAACTTCCAACCGCACACCCACTGTCGCCCCACCGGCTCCCGTATACAGAGATCA -CTCTGAAAAGAAAGAACTCCCGCAAGACGAGCAACAAGATCAGGACCACACTCAAGAGGCCAGGAACCAG -GACAGTGACAACACCCAGTCAGAACACTCTTTTGAGGAGATGTATCGCCACATTCTAAGATCACAGGGGC -CATTTGATGCTGTTTTGTATTATCATATGATGAAGGATGAGCCTGTAGTTTTCAGTACCAGTGATGGCAA -AGAGTACACGTATCCAGACTCCCTTGAAGAGGAATATCCACCATGGCTCACTGAAAAAGAGGCTATGAAT -GAAGAGAATAGATTTGTTACATTGGATGGTCAACAATTTTATTGGCCGGTGATGAATCACAAGAATAAAT -TCATGGCAATCCTGCAACATCATCAGTGAATGAGCATGGAACAATGGGATGATTCAACCGACAAATAGCT -AACATTAAGTAGTCAAGGAACGAAAACAGGAAGAATTTTTGATGTCTAAGGTGTGAATTATTATCACAAT -AAAAGTGATTCTTATTTTTGAATTTAAAGCTAGCTTATTATTACTAGCCGTTTTTCAAAGTTCAATTTGA -GTCTTAATGCAAATAGGCGTTAAGCCACAGTTATAGCCATAATTGTAACTCAATATTCTAACTAGCGATT -TATCTAAATTAAATTACATTATGCTTTTATAACTTACCTACTAGCCTGCCCAACATTTACACGATCGTTT -TATAATTAAGAAAAAACTAATGATGAAGATTAAAACCTTCATCATCCTTACGTCAATTGAATTCTCTAGC -ACTCGAAGCTTATTGTCTTCAATGTAAAAGAAAAGCTGGTCTAACAAGATGACAACTAGAACAAAGGGCA -GGGGCCATACTGCGGCCACGACTCAAAACGACAGAATGCCAGGCCCTGAGCTTTCGGGCTGGATCTCTGA -GCAGCTAATGACCGGAAGAATTCCTGTAAGCGACATCTTCTGTGATATTGAGAACAATCCAGGATTATGC -TACGCATCCCAAATGCAACAAACGAAGCCAAACCCGAAGACGCGCAACAGTCAAACCCAAACGGACCCAA -TTTGCAATCATAGTTTTGAGGAGGTAGTACAAACATTGGCTTCATTGGCTACTGTTGTGCAACAACAAAC -CATCGCATCAGAATCATTAGAACAACGCATTACGAGTCTTGAGAATGGTCTAAAGCCAGTTTATGATATG -GCAAAAACAATCTCCTCATTGAACAGGGTTTGTGCTGAGATGGTTGCAAAATATGATCTTCTGGTGATGA -CAACCGGTCGGGCAACAGCAACCGCTGCGGCAACTGAGGCTTATTGGGCCGAACATGGTCAACCACCACC -TGGACCATCACTTTATGAAGAAAGTGCGATTCGGGGTAAGATTGAATCTAGAGATGAGACCGTCCCTCAA -AGTGTTAGGGAGGCATTCAACAATCTAAACAGTACCACTTCACTAACTGAGGAAAATTTTGGGAAACCTG -ACATTTCGGCAAAGGATTTGAGAAACATTATGTATGATCACTTGCCTGGTTTTGGAACTGCTTTCCACCA -ATTAGTACAAGTGATTTGTAAATTGGGAAAAGATAGCAACTCATTGGACATCATTCATGCTGAGTTCCAG -GCCAGCCTGGCTGAAGGAGACTCTCCTCAATGTGCCCTAATTCAAATTACAAAAAGAGTTCCAATCTTCC -AAGATGCTGCTCCACCTGTCATCCACATCCGCTCTCGAGGTGACATTCCCCGAGCTTGCCAGAAAAGCTT -GCGTCCAGTCCCACCATCGCCCAAGATTGATCGAGGTTGGGTATGTGTTTTTCAGCTTCAAGATGGTAAA -ACACTTGGACTCAAAATTTGAGCCAATCTCCCTTCCCTCCGAAAGAGGCGAATAATAGCAGAGGCTTCAA -CTGCTGAACTATAGGGTACGTTACATTAATGATACACTTGTGAGTATCAGCCCTGGATAATATAAGTCAA -TTAAACGACCAAGATAAAATTGTTCATATCTCGCTAGCAGCTTAAAATATAAATGTAATAGGAGCTATAT -CTCTGACAGTATTATAATCAATTGTTATTAAGTAACCCAAACCAAAAGTGATGAAGATTAAGAAAAACCT -ACCTCGGCTGAGAGAGTGTTTTTTCATTAACCTTCATCTTGTAAACGTTGAGCAAAATTGTTAAAAATAT -GAGGCGGGTTATATTGCCTACTGCTCCTCCTGAATATATGGAGGCCATATACCCTGTCAGGTCAAATTCA -ACAATTGCTAGAGGTGGCAACAGCAATACAGGCTTCCTGACACCGGAGTCAGTCAATGGGGACACTCCAT -CGAATCCACTCAGGCCAATTGCCGATGACACCATCGACCATGCCAGCCACACACCAGGCAGTGTGTCATC -AGCATTCATCCTTGAAGCTATGGTGAATGTCATATCGGGCCCCAAAGTGCTAATGAAGCAAATTCCAATT -TGGCTTCCTCTAGGTGTCGCTGATCAAAAGACCTACAGCTTTGACTCAACTACGGCCGCCATCATGCTTG -CTTCATACACTATCACCCATTTCGGCAAGGCAACCAATCCACTTGTCAGAGTCAATCGGCTGGGTCCTGG -AATCCCGGATCATCCCCTCAGGCTCCTGCGAATTGGAAACCAGGCTTTCCTCCAGGAGTTCGTTCTTCCG -CCAGTCCAACTACCCCAGTATTTCACCTTTGATTTGACAGCACTCAAACTGATCACCCAACCACTGCCTG -CTGCAACATGGACCGATGACACTCCAACAGGATCAAATGGAGCGTTGCGTCCAGGAATTTCATTTCATCC -AAAACTTCGCCCCATTCTTTTACCCAACAAAAGTGGGAAGAAGGGGAACAGTGCCGATCTAACATCTCCG -GAGAAAATCCAAGCAATAATGACTTCACTCCAGGACTTTAAGATCGTTCCAATTGATCCAACCAAAAATA -TCATGGGAATCGAAGTGCCAGAAACTCTGGTCCACAAGCTGACCGGTAAGAAGGTGACTTCTAAAAATGG -ACAACCAATCATCCCTGTTCTTTTGCCAAAGTACATTGGGTTGGACCCGGTGGCTCCAGGAGACCTCACC -ATGGTAATCACACAGGATTGTGACACGTGTCATTCTCCTGCAAGTCTTCCAGCTGTGATTGAGAAGTAAT -TGCAATAATTGACTCAGATCCAGTTTTATAGAATCTTCTCAGGGATAGTGATAACATCTATTTAGTAATC -CGTCCATTAGAGGAGACACTTTTAATTGATCAATATACTAAAGGTGCTTTACACCATTGTCTTTTTTCTC -TCCTAAATGTAGAACTTAACAAAAGACTCATAATATACTTGTTTTTAAAGGATTGATTGATGAAAGATCA -TAACTAATAACATTACAAATAATCCTACTATAATCAATACGGTGATTCAAATGTTAATCTTTCTCATTGC -ACATACTTTTTGCCCTTATCCTCAAATTGCCTGCATGCTTACATCTGAGGATAGCCAGTGTGACTTGGAT -TGGAAATGTGGAGAAAAAATCGGGACCCATTTCTAGGTTGTTCACAATCCAAGTACAGACATTGCCCTTC -TAATTAAGAAAAAATCGGCGATGAAGATTAAGCCGACAGTGAGCGTAATCTTCATCTCTCTTAGATTATT -TGTTTTCCAGAGTAGGGGTCGTCAGGTCCTTTTCAATCGTGTAACCAAAATAAACTCCACTAGAAGGATA -TTGTGGGGCAACAACACAATGGGCGTTACAGGAATATTGCAGTTACCTCGTGATCGATTCAAGAGGACAT -CATTCTTTCTTTGGGTAATTATCCTTTTCCAAAGAACATTTTCCATCCCACTTGGAGTCATCCACAATAG -CACATTACAGGTTAGTGATGTCGACAAACTAGTTTGTCGTGACAAACTGTCATCCACAAATCAATTGAGA -TCAGTTGGACTGAATCTCGAAGGGAATGGAGTGGCAACTGACGTGCCATCTGCAACTAAAAGATGGGGCT -TCAGGTCCGGTGTCCCACCAAAGGTGGTCAATTATGAAGCTGGTGAATGGGCTGAAAACTGCTACAATCT -TGAAATCAAAAAACCTGACGGGAGTGAGTGTCTACCAGCAGCGCCAGACGGGATTCGGGGCTTCCCCCGG -TGCCGGTATGTGCACAAAGTATCAGGAACGGGACCGTGTGCCGGAGACTTTGCCTTCCATAAAGAGGGTG -CTTTCTTCCTGTATGATCGACTTGCTTCCACAGTTATCTACCGAGGAACGACTTTCGCTGAAGGTGTCGT -TGCATTTCTGATACTGCCCCAAGCTAAGAAGGACTTCTTCAGCTCACACCCCTTGAGAGAGCCGGTCAAT -GCAACGGAGGACCCGTCTAGTGGCTACTATTCTACCACAATTAGATATCAGGCTACCGGTTTTGGAACCA -ATGAGACAGAGTACTTGTTCGAGGTTGACAATTTGACCTACGTCCAACTTGAATCAAGATTCACACCACA -GTTTCTGCTCCAGCTGAATGAGACAATATATACAAGTGGGAAAAGGAGCAATACCACGGGAAAACTAATT -TGGAAGGTCAACCCCGAAATTGATACAACAATCGGGGAGTGGGCCTTCTGGGAAACTAAAAAAACCTCAC -TAGAAAAATTCGCAGTGAAGAGTTGTCTTTCACAGTTGTATCAAACGGAGCCAAAAACATCAGTGGTCAG -AGTCCGGCGCGAACTTCTTCCGACCCAGGGACCAACACAACAACTGAAGACCACAAAATCATGGCTTCAG -AAAATTCCTCTGCAATGGTTCAAGTGCACAGTCAAGGAAGGGAAGCTGCAGTGTCGCATCTAACAACCCT -TGCCACAATCTCCACGAGTCCCCAATCCCTCACAACCAAACCAGGTCCGGACAACAGCACCCATAATACA -CCCGTGTATAAACTTGACATCTCTGAGGCAACTCAAGTTGAACAACATCACCGCAGAACAGACAACGACA -GCACAGCCTCCGACACTCCCTCTGCCACGACCGCAGCCGGACCCCCAAAAGCAGAGAACACCAACACGAG -CAAGAGCACTGACTTCCTGGACCCCGCCACCACAACAAGTCCCCAAAACCACAGCGAGACCGCTGGCAAC -AACAACACTCATCACCAAGATACCGGAGAAGAGAGTGCCAGCAGCGGGAAGCTAGGCTTAATTACCAATA -CTATTGCTGGAGTCGCAGGACTGATCACAGGCGGGAGAAGAACTCGAAGAGAAGCAATTGTCAATGCTCA -ACCCAAATGCAACCCTAATTTACATTACTGGACTACTCAGGATGAAGGTGCTGCAATCGGACTGGCCTGG -ATACCATATTTCGGGCCAGCAGCCGAGGGAATTTACATAGAGGGGCTAATGCACAATCAAGATGGTTTAA -TCTGTGGGTTGAGACAGCTGGCCAACGAGACGACTCAAGCTCTTCAACTGTTCCTGAGAGCCACAACTGA -GCTACGCACCTTTTCAATCCTCAACCGTAAGGCAATTGATTTCTTGCTGCAGCGATGGGGCGGCACATGC -CACATTCTGGGACCGGACTGCTGTATCGAACCACATGATTGGACCAAGAACATAACAGACAAAATTGATC -AGATTATTCATGATTTTGTTGATAAAACCCTTCCGGACCAGGGGGACAATGACAATTGGTGGACAGGATG -GAGACAATGGATACCGGCAGGTATTGGAGTTACAGGCGTTATAATTGCAGTTATCGCTTTATTCTGTATA -TGCAAATTTGTCTTTTAGTTTTTCTTCAGATTGCTTCATGGAAAAGCTCAGCCTCAAATCAATGAAACCA -GGATTTAATTATATGGATTACTTGAATCTAAGATTACTTGACAAATGATAATATAATACACTGGAGCTTT -AAACATAGCCAATGTGATTCTAACTCCTTTAAACTCACAGTTAATCATAAACAAGGTTTGACATCAATCT -AGTTATCTCTTTGAGAATGATAAACTTGATGAAGATTAAGAAAAAGGTAATCTTTCGATTATCTTTAATC -TTCATCCTTGATTCTACAATCATGACAGTTGTCTTTAGTGACAAGGGAAAGAAGCCTTTTTATTAAGTTG -TAATAATCAGATCTGCGAACCGGTAGAGTTTAGTTGCAACCTAACACACATAAAGCATTGGTCAAAAAGT -CAATAGAAATTTAAACAGTGAGTGGAGACAACTTTTAAATGGAAGCTTCATATGAGAGAGGACGCCCACG -AGCTGCCAGACAGCATTCAAGGGATGGACACGACCACCATGTTCGAGCACGATCATCATCCAGAGAGAAT -TATCGAGGTGAGTACCGTCAATCAAGGAGCGCCTCACAAGTGCGCGTTCCTACTGTATTTCATAAGAAGA -GAGTTGAACCATTAACAGTTCCTCCAGCACCTAAAGACATATGTCCGACCTTGAAAAAAGGATTTTTGTG -TGACAGTAGTTTTTGCAAAAAAGATCACCAGTTGGAGAGTTTAACTGATAGGGAATTACTCCTACTAATC -GCCCGTAAGACTTGTGGATCAGTAGAACAACAATTAAATATAACTGCACCCAAGGACTCGCGCTTAGCAA -ATCCAACGGCTGATGATTTCCAGCAAGAGGAAGGTCCAAAAATTACCTTGTTGACACTGATCAAGACGGC -AGAACACTGGGCGAGACAAGACATCAGAACCATAGAGGATTCAAAATTAAGAGCATTGTTGACTCTATGT -GCTGTGATGACGAGGAAATTCTCAAAATCCCAGCTGAGTCTTTTATGTGAGACACACCTAAGGCGCGAGG -GGCTTGGGCAAGATCAGGCAGAACCCGTTCTCGAAGTATATCAACGATTACACAGTGATAAAGGAGGCAG -TTTTGAAGCTGCACTATGGCAACAATGGGACCGACAATCCCTAATTATGTTTATCACTGCATTCTTGAAT -ATTGCTCTCCAGTTACCGTGTGAAAGTTCTGCTGTCGTTGTTTCAGGGTTAAGAACATTGGTTCCTCAAT -CAGATAATGAGGAAGCTTCAACCAACCCGGGGACATGCTCATGGTCTGATGAGGGTACCCCTTAATAAGG -CTGACTAAAACACTATATAACCTTCTACTTGATCACAATACTCCGTATACCTATCATCATATATTTAATC -AAGACGATATCCTTTAAAACTTATTCAGTACTATAATCACTCTCGTTTCAAATTAATAAGATGTGCATGA -TTGCCCTAATATATGAAGAGGTATGATACAACCCTAACAGTGATCAAAGAAAATCATAATCTCGTATCGC -TCGTAATATAACCTGCCAAGCATACCTCTTGCACAAAGTGATTCTTGTACACAAATAATGTTTTACTCTA -CAGGAGGTAGCAACGATCCATCCCATCAAAAAATAAGTATTTCATGACTTACTAATGATCTCTTAAAATA -TTAAGAAAAACTGACGGAACATAAATTCTTTATGCTTCAAGCTGTGGAGGAGGTGTTTGGTATTGGCTAT -TGTTATATTACAATCAATAACAAGCTTGTAAAAATATTGTTCTTGTTTCAAGAGGTAGATTGTGACCGGA -AATGCTAAACTAATGATGAAGATTAATGCGGAGGTCTGATAAGAATAAACCTTATTATTCAGATTAGGCC -CCAAGAGGCATTCTTCATCTCCTTTTAGCAAAGTACTATTTCAGGGTAGTCCAATTAGTGGCACGTCTTT -TAGCTGTATATCAGTCGCCCCTGAGATACGCCACAAAAGTGTCTCTAAGCTAAATTGGTCTGTACACATC -CCATACATTGTATTAGGGGCAATAATATCTAATTGAACTTAGCCGTTTAAAATTTAGTGCATAAATCTGG -GCTAACACCACCAGGTCAACTCCATTGGCTGAAAAGAAGCTTACCTACAACGAACATCACTTTGAGCGCC -CTCACAATTAAAAAATAGGAACGTCGTTCCAACAATCGAGCGCAAGGTTTCAAGGTTGAACTGAGAGTGT -CTAGACAACAAAATATTGATACTCCAGACACCAAGCAAGACCTGAGAAAAAACCATGGCTAAAGCTACGG -GACGATACAATCTAATATCGCCCAAAAAGGACCTGGAGAAAGGGGTTGTCTTAAGCGACCTCTGTAACTT -CTTAGTTAGCCAAACTATTCAGGGGTGGAAGGTTTATTGGGCTGGTATTGAGTTTGATGTGACTCACAAA -GGAATGGCCCTATTGCATAGACTGAAAACTAATGACTTTGCCCCTGCATGGTCAATGACAAGGAATCTCT -TTCCTCATTTATTTCAAAATCCGAATTCCACAATTGAATCACCGCTGTGGGCATTGAGAGTCATCCTTGC -AGCAGGGATACAGGACCAGCTGATTGACCAGTCTTTGATTGAACCCTTAGCAGGAGCCCTTGGTCTGATC -TCTGATTGGCTGCTAACAACCAACACTAACCATTTCAACATGCGAACACAACGTGTCAAGGAACAATTGA -GCCTAAAAATGCTGTCGTTGATTCGATCCAATATTCTCAAGTTTATTAACAAATTGGATGCTCTACATGT -CGTGAACTACAACGGATTGTTGAGCAGTATTGAAATTGGAACTCAAAATCATACAATCATCATAACTCGA -ACTAACATGGGTTTTCTGGTGGAGCTCCAAGAACCCGACAAATCGGCAATGAACCGCATGAAGCCTGGGC -CGGCGAAATTTTCCCTCCTTCATGAGTCCACACTGAAAGCATTTACACAAGGATCCTCGACACGAATGCA -AAGTTTGATTCTTGAATTTAATAGCTCTCTTGCTATCTAACTAAGGTAGAATACTTCATATTGAGCTAAC -TCATATATGCTGACTCAATAGTTATCTTGACATCTCTGCTTTCATAATCAGATATATAAGCATAATAAAT -AAATACTCATATTTCTTGATAATTTGTTTAACCACAGATAAATCCTCACTGTAAGCCAGCTTCCAAGTTG -ACACCCTTACAAAAACCAGGACTCAGAATCCCTCAAACAAGAGATTCCAAGACAACATCATAGAATTGCT -TTATTATATGAATAAGCATTTTATCACCAGAAATCCTATATACTAAATGGTTAATTGTAACTGAACCCGC -AGGTCACATGTGTTAGGTTTCACAGATTCTATATATTACTAACTCTATACTCGTAATTAACATTAGATAA -GTAGATTAAGAAAAAAGCCTGAGGAAGATTAAGAAAAACTGCTTATTGGGTCTTTCCGTGTTTTAGATGA -AGCAGTTGAAATTCTTCCTCTTGATATTAAATGGCTACACAACATACCCAATACCCAGACGCTAGGTTAT -CATCACCAATTGTATTGGACCAATGTGACCTAGTCACTAGAGCTTGCGGGTTATATTCATCATACTCCCT -TAATCCGCAACTACGCAACTGTAAACTCCCGAAACATATCTACCGTTTGAAATACGATGTAACTGTTACC -AAGTTCTTGAGTGATGTACCAGTGGCGACATTGCCCATAGATTTCATAGTCCCAGTTCTTCTCAAGGCAC -TGTCAGGCAATGGATTCTGTCCTGTTGAGCCGCGGTGCCAACAGTTCTTAGATGAAATCATTAAGTACAC -AATGCAAGATGCTCTCTTCTTGAAATATTATCTCAAAAATGTGGGTGCTCAAGAAGACTGTGTTGATGAA -CACTTTCAAGAGAAAATCTTATCTTCAATTCAGGGCAATGAATTTTTACATCAAATGTTTTTCTGGTATG -ATCTGGCTATTTTAACTCGAAGGGGTAGATTAAATCGAGGAAACTCTAGATCAACATGGTTTGTTCATGA -TGATTTAATAGACATCTTAGGCTATGGGGACTATGTTTTTTGGAAGATCCCAATTTCAATGTTACCACTG -AACACACAAGGAATCCCCCATGCTGCTATGGACTGGTATCAGGCATCAGTATTCAAAGAAGCGGTTCAAG -GGCATACACACATTGTTTCTGTTTCTACTGCCGACGTCTTGATAATGTGCAAAGATTTAATTACATGTCG -ATTCAACACAACTCTAATCTCAAAAATAGCAGAGATTGAGGATCCAGTTTGTTCTGATTATCCCAATTTT -AAGATTGTGTCTATGCTTTACCAGAGCGGAGATTACTTACTCTCCATATTAGGGTCTGATGGGTATAAAA -TTATTAAGTTCCTCGAACCATTGTGCTTGGCCAAAATTCAATTATGCTCAAAGTACACTGAGAGGAAGGG -CCGATTCTTAACACAAATGCATTTAGCTGTAAATCACACCCTAGAAGAAATTACAGAAATGCGTGCACTA -AAGCCTTCACAGGCTCAAAAGATCCGTGAATTCCATAGAACATTGATAAGGCTGGAGATGACGCCACAAC -AACTTTGTGAGCTATTTTCCATTCAAAAACACTGGGGGCATCCTGTGCTACATAGTGAAACAGCAATCCA -AAAAGTTAAAAAACATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTT -TTTAAATATAGTATTGCCAAACATTATTTTGATAGTCAAGGATCTTGGTACAGTGTTACTTCAGATAGGA -ATCTAACACCGGGTCTTAATTCTTATATCAAAAGAAATCAATTCCCTCCGTTGCCAATGATTAAAGAACT -ACTATGGGAATTTTACCACCTTGACCACCCTCCACTTTTCTCAACCAAAATTATTAGTGACTTAAGTATT -TTTATAAAAGACAGAGCTACCGCAGTAGAAAGGACATGCTGGGATGCAGTATTCGAGCCTAATGTTCTAG -GATATAATCCACCTCACAAATTTAGTACTAAACGTGTACCGGAACAATTTTTAGAGCAAGAAAACTTTTC -TATTGAGAATGTTCTTTCCTACGCACAAAAACTCGAGTATCTACTACCACAATATCGGAACTTTTCTTTC -TCATTGAAAGAGAAAGAGTTGAATGTAGGTAGAACCTTCGGAAAATTGCCTTATCCGACTCGCAATGTTC -AAACACTTTGTGAAGCTCTGTTAGCTGATGGTCTTGCTAAAGCATTTCCTAGCAATATGATGGTAGTTAC -GGAACGTGAGCAAAAAGAAAGCTTATTGCATCAAGCATCATGGCACCACACAAGTGATGATTTTGGTGAA -CATGCCACAGTTAGAGGGAGTAGCTTTGTAACTGATTTAGAGAAATACAATCTTGCATTTAGATATGAGT -TTACAGCACCTTTTATAGAATATTGCAACCGTTGCTATGGTGTTAAGAATGTTTTTAATTGGATGCATTA -TACAATCCCACAGTGTTATATGCATGTCAGTGATTATTATAATCCACCACATAACCTCACACTGGAGAAT -CGAGACAACCCCCCCGAAGGGCCTAGTTCATACAGGGGTCATATGGGAGGGATTGAAGGACTGCAACAAA -AACTCTGGACAAGTATTTCATGTGCTCAAATTTCTTTAGTTGAAATTAAGACTGGTTTTAAGTTACGCTC -AGCTGTGATGGGTGACAATCAGTGCATTACTGTTTTATCAGTCTTCCCCTTAGAGACTGACGCAGACGAG -CAGGAACAGAGCGCCGAAGACAATGCAGCGAGGGTGGCCGCCAGCCTAGCAAAAGTTACAAGTGCCTGTG -GAATCTTTTTAAAACCTGATGAAACATTTGTACATTCAGGTTTTATCTATTTTGGAAAAAAACAATATTT -GAATGGGGTCCAATTGCCTCAGTCCCTTAAAACGGCTACAAGAATGGCACCATTGTCTGATGCAATTTTT -GATGATCTTCAAGGGACCCTGGCTAGTATAGGCACTGCTTTTGAGCGATCCATCTCTGAGACACGACATA -TCTTTCCTTGCAGGATAACCGCAGCTTTCCATACGTTTTTTTCGGTGAGAATCTTGCAATATCATCATCT -CGGGTTCAATAAAGGTTTTGACCTTGGACAGTTAACACTCGGCAAACCTCTGGATTTCGGAACAATATCA -TTGGCACTAGCGGTACCGCAGGTGCTTGGAGGGTTATCCTTCTTGAATCCTGAGAAATGTTTCTACCGGA -ATCTAGGAGATCCAGTTACCTCAGGCTTATTCCAGTTAAAAACTTATCTCCGAATGATTGAGATGGATGA -TTTATTCTTACCTTTAATTGCGAAGAACCCTGGGAACTGCACTGCCATTGACTTTGTGCTAAATCCTAGC -GGATTAAATGTCCCTGGGTCGCAAGACTTAACTTCATTTCTGCGCCAGATTGTACGCAGGACCATCACCC -TAAGTGCGAAAAACAAACTTATTAATACCTTATTTCATGCGTCAGCTGACTTCGAAGACGAAATGGTTTG -TAAATGGCTATTATCATCAACTCCTGTTATGAGTCGTTTTGCGGCCGATATCTTTTCACGCACGCCGAGC -GGGAAGCGATTGCAAATTCTAGGATACCTGGAAGGAACACGCACATTATTAGCCTCTAAGATCATCAACA -ATAATACAGAGACACCGGTTTTGGACAGACTGAGGAAAATAACATTGCAAAGGTGGAGCCTATGGTTTAG -TTATCTTGATCATTGTGATAATATCCTGGCGGAGGCTTTAACCCAAATAACTTGCACAGTTGATTTAGCA -CAGATTCTGAGGGAATATTCATGGGCTCATATTTTAGAGGGAAGACCTCTTATTGGAGCCACACTCCCAT -GTATGATTGAGCAATTCAAAGTGTTTTGGCTGAAACCCTACGAACAATGTCCGCAGTGTTCAAATGCAAA -GCAACCAGGTGGGAAACCATTCGTGTCAGTGGCAGTCAAGAAACATATTGTTAGTGCATGGCCGAACGCA -TCCCGAATAAGCTGGACTATCGGGGATGGAATCCCATACATTGGATCAAGGACAGAAGATAAGATAGGAC -AACCTGCTATTAAACCAAAATGTCCTTCCGCAGCCTTAAGAGAGGCCATTGAATTGGCGTCCCGTTTAAC -ATGGGTAACTCAAGGCAGTTCGAACAGTGACTTGCTAATAAAACCATTTTTGGAAGCACGAGTAAATTTA -AGTGTTCAAGAAATACTTCAAATGACCCCTTCACATTACTCAGGAAATATTGTTCACAGGTACAACGATC -AATACAGTCCTCATTCTTTCATGGCCAATCGTATGAGTAATTCAGCAACGCGATTGATTGTTTCTACAAA -CACTTTAGGTGAGTTTTCAGGAGGTGGCCAGTCTGCACGCGACAGCAATATTATTTTCCAGAATGTTATA -AATTATGCAGTTGCACTGTTCGATATTAAATTTAGAAACACTGAGGCTACAGATATCCAATATAATCGTG -CTCACCTTCATCTAACTAAGTGTTGCACCCGGGAAGTACCAGCTCAGTATTTAACATACACATCTACATT -GGATTTAGATTTAACAAGATACCGAGAAAACGAATTGATTTATGACAGTAATCCTCTAAAAGGAGGACTC -AATTGCAATATCTCATTCGATAATCCATTTTTCCAAGGTAAACGGCTGAACATTATAGAAGATGATCTTA -TTCGACTGCCTCACTTATCTGGATGGGAGCTAGCCAAGACCATCATGCAATCAATTATTTCAGATAGCAA -CAATTCATCTACAGACCCAATTAGCAGTGGAGAAACAAGATCATTCACTACCCATTTCTTAACTTATCCC -AAGATAGGACTTCTGTACAGTTTTGGGGCCTTTGTAAGTTATTATCTTGGCAATACAATTCTTCGGACTA -AGAAATTAACACTTGACAATTTTTTATATTACTTAACTACTCAAATTCATAATCTACCACATCGCTCATT -GCGAATACTTAAGCCAACATTCAAACATGCAAGCGTTATGTCACGGTTAATGAGTATTGATCCTCATTTT -TCTATTTACATAGGCGGTGCTGCAGGTGACAGAGGACTCTCAGATGCGGCCAGGTTATTTTTGAGAACGT -CCATTTCATCTTTTCTTACATTTGTAAAAGAATGGATAATTAATCGCGGAACAATTGTCCCTTTATGGAT -AGTATATCCGCTAGAGGGTCAAAACCCAACACCTGTGAATAATTTTCTCTATCAGATCGTAGAACTGCTG -GTGCATGATTCATCAAGACAACAGGCTTTTAAAACTACCATAAGTGATCATGTACATCCTCACGACAATC -TTGTTTACACATGTAAGAGTACAGCCAGCAATTTCTTCCATGCATCATTGGCGTACTGGAGGAGCAGACA -CAGAAACAGCAACCGAAAATACTTGGCAAGAGACTCTTCAACTGGATCAAGCACAAACAACAGTGATGGT -CATATTGAGAGAAGTCAAGAACAAACCACCAGAGATCCACATGATGGCACTGAACGGAATCTAGTCCTAC -AAATGAGCCATGAAATAAAAAGAACGACAATTCCACAAGAAAACACGCACCAGGGTCCGTCGTTCCAGTC -CTTTCTAAGTGACTCTGCTTGTGGTACAGCAAATCCAAAACTAAATTTCGATCGATCGAGACACAATGTG -AAATTTCAGGATCATAACTCGGCATCCAAGAGGGAAGGTCATCAAATAATCTCACACCGTCTAGTCCTAC -CTTTCTTTACATTATCTCAAGGGACACGCCAATTAACGTCATCCAATGAGTCACAAACCCAAGACGAGAT -ATCAAAGTACTTACGGCAATTGAGATCCGTCATTGATACCACAGTTTATTGTAGATTTACCGGTATAGTC -TCGTCCATGCATTACAAACTTGATGAGGTCCTTTGGGAAATAGAGAGTTTCAAGTCGGCTGTGACGCTAG -CAGAGGGAGAAGGTGCTGGTGCCTTACTATTGATTCAGAAATACCAAGTTAAGACCTTATTTTTCAACAC -GCTAGCTACTGAGTCCAGTATAGAGTCAGAAATAGTATCAGGAATGACTACTCCTAGGATGCTTCTACCT -GTTATGTCAAAATTCCATAATGACCAAATTGAGATTATTCTTAACAACTCAGCAAGCCAAATAACAGACA -TAACAAATCCTACTTGGTTTAAAGACCAAAGAGCAAGGCTACCTAAGCAAGTCGAGGTTATAACCATGGA -TGCAGAGACAACAGAGAATATAAACAGATCGAAATTGTACGAAGCTGTATATAAATTGATCTTACACCAT -ATTGATCCTAGCGTATTGAAAGCAGTGGTCCTTAAAGTCTTTCTAAGTGATACTGAGGGTATGTTATGGC -TAAATGATAATTTAGCCCCGTTTTTTGCCACTGGTTATTTAATTAAGCCAATAACGTCAAGTGCTAGATC -TAGTGAGTGGTATCTTTGTCTGACGAACTTCTTATCAACTACACGTAAGATGCCACACCAAAACCATCTC -AGTTGTAAACAGGTAATACTTACGGCATTGCAACTGCAAATTCAACGAAGCCCATACTGGCTAAGTCATT -TAACTCAGTATGCTGACTGTGAGTTACATTTAAGTTATATCCGCCTTGGTTTTCCATCATTAGAGAAAGT -ACTATACCACAGGTATAACCTCGTCGATTCAAAAAGAGGTCCACTAGTCTCTATCACTCAGCACTTAGCA -CATCTTAGAGCAGAGATTCGAGAATTAACTAATGATTATAATCAACAGCGACAAAGTCGGACTCAAACAT -ATCACTTTATTCGTACTGCAAAAGGACGAATCACAAAACTAGTCAATGATTATTTAAAATTCTTTCTTAT -TGTGCAAGCATTAAAACATAATGGGACATGGCAAGCTGAGTTTAAGAAATTACCAGAGTTGATTAGTGTG -TGCAATAGGTTCTACCATATTAGAGATTGCAATTGTGAAGAACGTTTCTTAGTTCAAACCTTATATTTAC -ATAGAATGCAGGATTCTGAAGTTAAGCTTATCGAAAGGCTGACAGGGCTTCTGAGTTTATTTCCGGATGG -TCTCTACAGGTTTGATTGAATTACCGTGCATAGTATCCTGATACTTGCAAAGGTTGGTTATTAACATACA -GATTATAAAAAACTCATAAATTGCTCTCATACATCATATTGATCTAATCTCAATAAACAACTATTTAAAT -AACGAAAGGAGTCCCTATATTATATACTATATTTAGCCTCTCTCCCTGCGTGATAATCAAAAAATTCACA -ATGCAGCATGTGTGACATATTACTGCCGCAATGAATTTAACGCAACATAATAAACTCTGCACTCTTTATA -ATTAAGCTTTAACGAAAGGTCTGGGCTCATATTGTTATTGATATAATAATGTTGTATCAATATCCTGTCA -GATGGAATAGTGTTTTGGTTGATAACACAACTTCTTAAAACAAAATTGATCTTTAAGATTAAGTTTTTTA -TAATTATCATTACTTTAATTTGTCGTTTTAAAAACGGTGATAGCCTTAATCTTTGTGTAAAATAAGAGAT -TAGGTGTAATAACCTTAACATTTTTGTCTAGTAAGCTACTATTTCATACAGAATGATAAAATTAAAAGAA -AAGGCAGGACTGTAAAATCAGAAATACCTTCTTTACAATATAGCAGACTAGATAATAATCTTCGTGTTAA -TGATAATTAAGACATTGACCACGCTCATCAGAAGGCTCGCCAGAATAAACGTTGCAAAAAGGATTCCTGG -AAAAATGGTCGCACACAAAAATTTAAAAATAAATCTATTTCTTCTTTTTTGTGTGTCCA -