Mercurial > repos > matnguyen > pathogist
changeset 2:baf1e89b42eb draft
Added config builder
author | matnguyen |
---|---|
date | Tue, 05 Mar 2019 02:36:40 -0500 |
parents | c1b6f5fbbcad |
children | 56f1b7f06222 |
files | galaxy/tools/concatenator/concatenator galaxy/tools/concatenator/concatenator.xml galaxy/tools/config_builder/config_builder galaxy/tools/config_builder/config_builder.xml galaxy/tools/visualization/visualization.xml |
diffstat | 5 files changed, 512 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/galaxy/tools/concatenator/concatenator Fri Feb 01 16:56:03 2019 -0500 +++ b/galaxy/tools/concatenator/concatenator Tue Mar 05 02:36:40 2019 -0500 @@ -6,9 +6,18 @@ parser = ap.ArgumentParser(prog='concatenater', conflict_handler='resolve', description="Concatenates all accessions and their associated paths") -input = parser.add_argument_group('Input', '') -input.add_argument('-n', '--name', nargs='+', required=True, help="Sample name (accessions)") -input.add_argument('-i', '--input', nargs='+', required=True, help="Paths to calls") +subparser = parser.add_subparsers(dest='subcommand') + +variant_parser = subparser.add_parser(name='variant', help="Concatenate variant call files") +variant_parser.add_argument('-n', '--name', nargs='+', required=True, help="Sample name (accessions)") +variant_parser.add_argument('-i', '--input', nargs='+', required=True, help="Paths to calls") + +cluster_parser = subparser.add_parser('cluster') +cluster_parser.add_argument('--mlst', metavar="MLST", type=str, help="MLST clusters/distance matrix") +cluster_parser.add_argument('--snp', metavar="SNP", type=str, help="SNP clusters/distance matrix") +cluster_parser.add_argument('--cnv', metavar="CNV", type=str, help="CNV clusters/distance matrix") +cluster_parser.add_argument('--spoligo', metavar="SPOLIGO", type=str, help="Spoligotyping clusters/distance matrix") +# cluster_parser.add_argument('--kwip', metavar="KWIP", type=str, help="Kwip clusters/distance matrix") if len(sys.argv) == 1: parser.print_usage() @@ -17,5 +26,13 @@ args = parser.parse_args() output = open('paths.txt', 'w') -for index,path in enumerate(args.input): - output.write("%s=%s\n" % (args.name[index], path)) +if args.subcommand == 'variant': + for index,path in enumerate(args.input): + output.write("%s=%s\n" % (args.name[index], path)) +elif args.subcommand == 'cluster': + variant_types = [(args.mlst, 'MLST'), (args.snp, 'SNP'), (args.cnv, 'CNV'), (args.spoligo, 'spoligotyping')] + for variant in variant_types: + if variant[0] == None: + continue + else: + output.write("%s=%s\n" % (variant[1], variant[0]))
--- a/galaxy/tools/concatenator/concatenator.xml Fri Feb 01 16:56:03 2019 -0500 +++ b/galaxy/tools/concatenator/concatenator.xml Tue Mar 05 02:36:40 2019 -0500 @@ -5,12 +5,70 @@ </requirements> <command><![CDATA[ - $__tool_directory__/concatenator -n #for $path in $paths# ${path.element_identifier} #end for# - -i #for $path in $paths# $path #end for# + $__tool_directory__/concatenator + + #if $input_type.input_type_selector == "variant" + variant + -n #for $path in $input_type.paths# ${path.element_identifier} #end for# + -i #for $path in $input_type.paths# $path #end for# + #elif $input_type.input_type_selector == "clustering" + cluster + #if $input_type.mlst.mlst_checkbox + --mlst $input_type.mlst.mlst_path + #end if + #if $input_type.snp.snp_checkbox + --snp $input_type.snp.snp_path + #end if + #if $input_type.cnv.cnv_checkbox + --cnv $input_type.cnv.cnv_path + #end if + #if $input_type.spoligo.spoligo_checkbox + --spoligo $input_type.spoligo.spoligo_path + #end if + #end if ]]></command> + <inputs> - <param name="paths" type="data" label="Collection of calls" help="" optional="False" multiple="True"/> + <conditional name="input_type"> + <param name="input_type_selector" type="select" label="Concatenation of variant calls, or clustering files"> + <option value="variant">Variant Call Files</option> + <option value="clustering">Clustering/Distance Matrix Files</option> + </param> + <when value="variant"> + <param name="paths" type="data" label="Collection of calls" help="" optional="False" multiple="True"/> + </when> + <when value="clustering"> + <conditional name="mlst"> + <param name="mlst_checkbox" type="boolean" label="Include MLST"/> + <when value="true"> + <param name="mlst_path" type="data" label="MLST Clustering/Distance Matrix file" + format="txt,tabular"/> + </when> + </conditional> + <conditional name="snp"> + <param name="snp_checkbox" type="boolean" label="Include SNP"/> + <when value="true"> + <param name="snp_path" type="data" label="SNP Clustering/Distance Matrix file" + format="txt,tabular"/> + </when> + </conditional> + <conditional name="cnv"> + <param name="cnv_checkbox" type="boolean" label="Include CNV"/> + <when value="true"> + <param name="cnv_path" type="data" label="CNV Clustering/Distance Matrix file" + format="txt,tabular"/> + </when> + </conditional> + <conditional name="spoligo"> + <param name="spoligo_checkbox" type="boolean" label="Include Spoligotyping"/> + <when value="true"> + <param name="spoligo_path" type="data" label="Spoligotyping Clustering/Distance Matrix file" + format="txt,tabular"/> + </when> + </conditional> + </when> + </conditional> </inputs> <outputs>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy/tools/config_builder/config_builder Tue Mar 05 02:36:40 2019 -0500 @@ -0,0 +1,185 @@ +#!/usr/bin/env python + +import sys +import yaml +import argparse as ap + +parser = ap.ArgumentParser(prog='config_builder', conflict_handler='resolve', + description="Builds the config file for PathOGiST") + +parser.add_argument('yaml', help="Blank config file") +parser.add_argument('forward', help="File to list of forward reads") +parser.add_argument('reverse', help="File to list of reverse reads") +parser.add_argument('output', help="Output prefix for final consensus clustering and visualization") + +tools = parser.add_argument_group('Tools') +tools.add_argument('--snippy', action='store_true', help="Run Snippy") +tools.add_argument('--kwip', action='store_true', help="Run Kwip") +tools.add_argument('--prince', action='store_true', help="Run Prince") +tools.add_argument('--spotyping', action='store_true', help="Run Spotyping") +tools.add_argument('--mentalist', action='store_true', help="Run Mentalist") + +mentalist = parser.add_argument_group('Mentalist') +mentalist.add_argument('--local_file', action='store_true', help="Use local database") +mentalist.add_argument('--build_db', action='store_true', help="Build a database") +mentalist.add_argument('--download_pubmlst', action='store_true', help="Download pubMLST scheme") +mentalist.add_argument('--download_cgmlst', action='store_true', help="Download cgMLST scheme") +mentalist.add_argument('--download_enterobase', action='store_true', help="Download Enterobase scheme") + +mentalist.add_argument('--local_db', metavar="STRING", help="Location of database") +mentalist.add_argument('--kmer', metavar="INT", help="Kmer size") +mentalist.add_argument('--fasta_files', metavar="STRING", help="Location of FASTA files for MLST scheme") +mentalist.add_argument('--profile', metavar="STRING", help="Profile file for known genotypes") +mentalist.add_argument('--scheme', metavar="STRING", help="Species name or scheme ID") +mentalist.add_argument('--entero_scheme', metavar="STRING", help="(S)almonella, (Y)ersinia, or (E)scherichia/Shigella") +mentalist.add_argument('--entero_type', metavar="STRING", help="'cg' or 'wg' for cgMLST or wgMLST, respectively.") +mentalist.add_argument('--mutation_threshold', metavar="INT", help="Maximum number of mutations when looking for novel " + "alleles") +mentalist.add_argument('--kt', metavar="INT", help="Minimum number of times a kmer is seen to be considered present " + "in the sample") + +kwip = parser.add_argument_group('Kwip') +kwip.add_argument('--N', metavar="INT", help="Number of tables") +kwip.add_argument('--x', metavar="INT", help="Maximum table size") +kwip.add_argument('--ksize', metavar="INT", help="kmer size to use") +kwip.add_argument('--unique_kmers', metavar="INT", help="Approximate number of unique kmers in the input set") + +snippy = parser.add_argument_group('Snippy') +snippy.add_argument('--reference', metavar="STRING", help="Reference genome. Supports FASTA, GenBank, EMBL (not GFF)") +snippy.add_argument('--mapqual', metavar="INT", help="Minimum read mapping quality to consider") +snippy.add_argument('--basequal', metavar="INT", help="Minimum base quality to consider") +snippy.add_argument('--mincov', metavar="INT", help="Minimum coverage of variant site") +snippy.add_argument('--minfrac', metavar="FLOAT", help="Minimum proportion for variant evidence") + +clustering = parser.add_argument_group('Clustering') +clustering.add_argument('--fine_snp', action='store_true', help="Use SNP as a fine datatype") +clustering.add_argument('--fine_mlst', action='store_true', help="Use MLST as a fine datatype") +clustering.add_argument('--fine_kwip', action='store_true', help="Use Kwip as a fine datatype") +clustering.add_argument('--fine_spoligo', action='store_true', help="Use Spoligotyping as a fine datatype") +clustering.add_argument('--fine_cnv', action='store_true', help="Use CNV as a fine datatype") +clustering.add_argument('--snp_thresh', metavar="INT", help="Threshold value for SNP") +clustering.add_argument('--mlst_thresh', metavar="INT", help="Threshold value for MLST") +clustering.add_argument('--kwip_thresh', metavar="INT", help="Threshold value for Kwip") +clustering.add_argument('--spoligo_thresh', metavar="INT", help="Threshold value for Spoligotyping") +clustering.add_argument('--cnv_thresh', metavar="INT", help="Threshold value for CNV") +clustering.add_argument('--less_constraints', action='store_true', help="Use less constraints when clustering") +clustering.add_argument('--method', metavar="STRING", help="Use `C4` or `ILP` as clustering method") +clustering.add_argument('--no_presolve', action='store_true', help="Do not perform presolving") +clustering.add_argument('--visualize', action='store_true', help="Visualize clusters") + +if len(sys.argv) == 1: + parser.print_usage() + sys.exit(1) + +args = parser.parse_args() + +with open(args.yaml) as f: + list_doc = yaml.load(f) + +for line in list_doc: + if line == 'temp': + list_doc[line] = 'tmp_dir' + + if line == 'threads': + pass + + if line == 'run': + if args.snippy: + list_doc[line]['snippy'] = 1 + if args.kwip: + list_doc[line]['kwip'] = 1 + if args.prince: + list_doc[line]['prince'] = 1 + if args.spotyping: + list_doc[line]['spotyping'] = 1 + if args.mentalist: + list_doc[line]['mentalist'] = 1 + + if line == 'genotyping': + # Forward and reverse reads + list_doc[line]['input_reads']['forward_reads'] = args.forward + list_doc[line]['input_reads']['reverse_reads'] = args.reverse + + if args.mentalist: + if args.local_file: + list_doc[line]['mentalist']['local_file']['database'] = args.local_db + elif args.build_db: + list_doc[line]['mentalist']['build_db']['options']['k'] = args.kmer + list_doc[line]['mentalist']['build_db']['options']['fasta_files'] = args.fasta_files + list_doc[line]['mentalist']['build_db']['options']['profile'] = args.profile + elif args.download_pubmlst: + list_doc[line]['mentalist']['download_pubmlst']['options']['k'] = args.kmer + list_doc[line]['mentalist']['download_pubmlst']['options']['scheme'] = args.scheme + elif args.download_cgmlst: + list_doc[line]['mentalist']['download_cgmlst']['options']['k'] = args.kmer + list_doc[line]['mentalist']['download_cgmlst']['options']['scheme'] = args.scheme + elif args.download_enterobase: + list_doc[line]['mentalist']['download_enterobase']['options']['k'] = args.kmer + list_doc[line]['mentalist']['download_enterobase']['options']['scheme'] = args.entero_scheme + list_doc[line]['mentalist']['download_enterobase']['options']['type'] = args.entero_type + + if args.mutation_threshold: + list_doc[line]['mentalist']['call']['options']['mutation_threshold'] = args.mutation_threshold + if args.kt: + list_doc[line]['mentalist']['call']['options']['kt'] = args.kt + + if args.kwip: + if args.N: + list_doc[line]['kwip']['khmer_options']['N'] = args.N + if args.x: + list_doc[line]['kwip']['khmer_options']['x'] = args.x + if args.ksize: + list_doc[line]['kwip']['khmer_options']['ksize'] = args.ksize + if args.unique_kmers: + list_doc[line]['kwip']['khmer_options']['unique-kmers'] = args.unique_kmers + + if args.snippy: + list_doc[line]['snippy']['options']['reference'] = args.reference + if args.mapqual: + list_doc[line]['snippy']['options']['mapqual'] = args.mapqual + if args.basequal: + list_doc[line]['snippy']['options']['basequal'] = args.basequal + if args.mincov: + list_doc[line]['snippy']['options']['mincov'] = args.mincov + if args.minfrac: + list_doc[line]['snippy']['options']['minfrac'] = args.minfrac + + if line == 'clustering': + list_doc[line]['output_prefix'] = args.output + + fine = [] + if args.fine_snp: + fine.append('SNP') + if args.fine_mlst: + fine.append('MLST') + if args.fine_kwip: + fine.append('kWIP') + if args.fine_spoligo: + fine.append('spoligotyping') + if args.fine_cnv: + fine.append('CNV') + list_doc[line]['fine_clusterings'] = fine + + if args.snp_thresh: + list_doc[line]['thresholds']['SNP'] = args.snp_thresh + if args.mlst_thresh: + list_doc[line]['thresholds']['MLST'] = args.snp_thresh + if args.kwip_thresh: + list_doc[line]['thresholds']['kWIP'] = args.snp_thresh + if args.spoligo_thresh: + list_doc[line]['thresholds']['spoligotyping'] = args.snp_thresh + if args.cnv_thresh: + list_doc[line]['thresholds']['CNV'] = args.snp_thresh + + if args.less_constraints: + list_doc[line]['all_constraints'] = False + if args.method: + list_doc[line]['method'] = args.method + if args.no_presolve: + list_doc[line]['presolve'] = False + if args.visualize: + list_doc[line]['visualize'] = True + +with open("config.yaml", 'w') as f: + yaml.dump(list_doc, f) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy/tools/config_builder/config_builder.xml Tue Mar 05 02:36:40 2019 -0500 @@ -0,0 +1,243 @@ +<tool id="config_builder" name="PathOGiST - Config Builder" version="1.0.0"> + <description>: Creates and populates the config file to run PathOGiST</description> + <requirements> + <requirement type="package" version="0.2.3">pathogist</requirement> + </requirements> + + <command><![CDATA[ + PATHOGIST all config.yaml --new_config + + $__tool_directory__/config_builder + + #if snippy.snippy_checkbox + --snippy + --reference $snippy.reference + #if snippy.snippy_adv.snippy_adv_checkbox + --mapqual $snippy.mapqual + --basequal $snippy.basequal + --mincov $snippy.mincov + --minfrac $snippy.minfrac + #endif + $snippy.snp_fine + --snp_thresh $snippy.snp_threshold + #endif + + #if mentalist.mentalist_checkbox + --mentalist + #if mentalist.db_loc.db_selector == "local_file" + --local_file + --local_db $mentalist.mlst_database + #endif + #if mentalist.db_loc.db_selector == "build_db" + --build_db + --kmer $mentalist.k + --fasta_files $mentalist.fasta_files + --profile $mentalist.profile + #endif + #if mentalist.db_loc.db_selector == "download_pubmlst" + --download_pubmlst + --kmer $mentalist.k + --scheme $mentalist.scheme + #endif + #if mentalist.db_loc.db_selector == "download_cgmlst" + --download_cgmlst + --kmer $mentalist.k + --scheme $mentalist.scheme + #endif + #if mentalist.db_loc.db_selector == "download_enterobase" + --download_enterobase $mentalist.k + --entero_scheme $mentalist.scheme + --entero_type $mentalist.type + #endif + #if mentalist.mentalist_adv.mentalist_adv_checkbox + --mutation_threshold $mentalist.mutation_thresh + --kt $mentalist.kt + #endif + $mentalist.mlst_fine + --mlst_thresh $mentalist.mlst_threshold + #endif + + #if prince.prince_checkbox + --prince + $prince.cnv_fine + --cnv_thresh $prince.cnv_threshold + #endif + + #if kwip.kwip_checkbox + --kwip + #if kwip.kwip_adv.kwip_adv_checkbox + --N $kwip.N + --x $kwip.x + --ksize $kwip.ksize + --unique_kmers $kwip.unique_kmers + #endif + $kwip.kwip_fine + --kwip_thresh $kwip.kwip_threshold + #endif + + #if spotyping.spotyping_checkbox + --spotyping + $spotyping.spoligo_fine + --spoligo_thresh $spotyping.spoligo_threshold + #endif + + #if adv_clustering.adv_clustering_checkbox + $adv_clustering.all_constraints + --method $adv_clustering.method + $adv_clustering.presolve + $adv_clustering.visualize + #endif + ]]></command> + + + <inputs> + <!--<conditional name="task">--> + <!--<param name="task_selector" type="select" label="Create Blank Config or Populate Config File">--> + <!--<option value="create">Create Blank Config File</option>--> + <!--<option value="populate">Populate Config File</option>--> + <!--</param>--> + <!--<when value="create">--> + + <!--</when>--> + <!--<when value="populate">--> + <conditional name="snippy"> + <param name="snippy_checkbox" type="boolean" label="Run Snippy"/> + <when value="true"> + <param name="reference" type="data" format="fasta,fa,fasta.gz,fa.gz" label="Reference Genome"/> + <conditional name="snippy_adv"> + <param name="snippy_adv_checkbox" type="boolean" label="Advanced Snippy Parameters"/> + <when value="true"> + <param name="mapqual" type="integer" value="60" min="0" label="Minimum read mapping + quality to consider"/> + <param name="basequal" type="integer" value="20" min="0" label="Minimum base quality + to consider"/> + <param name="mincov" type="integer" value="10" min="0" label="Minimum coverage of + variant site"/> + <param name="minfrac" type="float" value="0.9" min="0" label="Minimum proportion of + variant evidence"/> + </when> + </conditional> + <param name="snp_fine" type="boolean" label="Consider SNP as a fine datatype"/> + <param name="snp_threshold" type="integer" value="2500" min="0" + label="Correlation clustering threshold for MLST"/> + </when> + </conditional> + + <conditional name="mentalist"> + <param name="mentalist_checkbox" type="boolean" label="Run MentaLiST"/> + <when value="true"> + <conditional name="db_loc"> + <param name="db_selector" type="select" label="Option for obtaining MLST database"> + <option value="local_file">Local File</option> + <option value="build_db">Build DB</option> + <option value="download_pubmlst">Download PubMLST Scheme</option> + <option value="download_cgmlst">Download cgMLST Scheme</option> + <option value="download_enterobase">Download Enterobase Scheme</option> + </param> + <when value="local_file"> + <param name="mlst_database" format="db" type="data" label="MLST database file"/> + </when> + <when value="build_db"> + <param name="k" type="integer" value="31" min="1" label="kmer size"/> + <param name="fasta_files" type="data" format="txt,tabular" label="List of FASTA files"/> + <param name="profile" type="data" format="txt" label="Profile file for known + genotypes"/> + </when> + <when value="download_pubmlst"> + <param name="k" type="integer" value="31" min="1" label="kmer size"/> + <param name="scheme" type="text" label="Species name or scheme ID"/> + </when> + <when value="download_cgmlst"> + <param name="k" type="integer" value="31" min="1" label="kmer size"/> + <param name="scheme" type="text" label="Species name or scheme ID"/> + </when> + <when value="download_enterobase"> + <param name="k" type="integer" value="31" min="1" label="kmer size"/> + <param name="scheme" type="text" label="(S)almonella, (Y)ersinia, or + (E)scherichia/Shigella"/> + <param name="type" type="text" label="'cg' or 'wg' for cgMLST or wgMLST, respectively"/> + </when> + </conditional> + + <conditional name="mentalist_adv"> + <param name="mentalist_adv_checkbox" type="boolean" label="Advanced MentaLiST Parameters"/> + <when value="true"> + <param name="mutation_thresh" type="integer" min="1" value="6" label="Maximum number of + mutations when looking for novel alleles"/> + <param name="kt" type="integer" value="10" min="1" label="Minimum number of times a + kmer is seen to be considered present in the sample"/> + </when> + </conditional> + <param name="mlst_fine" type="boolean" label="Consider MLST as a fine datatype"/> + <param name="mlst_threshold" type="integer" value="300" min="0" + label="Correlation clustering threshold for MLST"/> + </when> + </conditional> + + <conditional name="prince"> + <param name="prince_checkbox" type="boolean" label="Run PRINCE"/> + <when value="true"> + <param name="cnv_fine" type="boolean" label="Consider CNV as a fine datatype"/> + <param name="cnv_threshold" type="integer" value="100" min="0" + label="Correlation clustering threshold for CNV"/> + </when> + </conditional> + + <conditional name="kwip"> + <param name="kwip_checkbox" type="boolean" label="Run kWIP"/> + <when value="true"> + <conditional name="kwip_adv"> + <param name="kwip_adv_checkbox" type="boolean" label="Advance kWIP Parameters"/> + <when value="true"> + <param name="N" type="integer" value="1" min="1" label="Number of tables"/> + <param name="x" type="float" value="1e9" min="1" label="Maximum table size"/> + <param name="ksize" type="integer" value="31" min="1" label="kmer size"/> + <param name="unique_kmers" type="integer" value="0" min="0" label="Approximate number + of unique kmers in the input set"/> + </when> + </conditional> + <param name="kwip_fine" type="boolean" label="Consider kWIP as a fine datatype"/> + <param name="kwip_threshold" type="float" value="0.4" min="0" + label="Correlation clustering threshold for kWIP"/> + </when> + </conditional> + + <conditional name="spotyping"> + <param name="spotyping_checkbox" type="boolean" label="Run SpoTyping"/> + <when value="true"> + <param name="spoligo_fine" type="boolean" label="Consider spoligotype as a fine datatype"/> + <param name="spoligo_threshold" type="integer" value="8" min="0" + label="Correlation clustering threshold for Spoligotyping"/> + </when> + </conditional> + + <conditional name="adv_clustering"> + <param name="adv_clustering_checkbox" type="boolean" + label="Advanced settings for correlation and consensus clustering"/> + <when value="true"> + <param name="all_constraints" type="boolean" checked="true" falsevalue="--less_constraints" + truevalue="" label="Use all constraints when performing clustering"/> + <param name="method" type="text" value="C4" size="3" + label="`C4` or `ILP` method for clustering algorithm"/> + <param name="presolve" type="boolean" checked="true" falsevalue="--no_presolve" truevalue="" + label="Perform presolving for clustering"/> + <param name="visualize" type="boolean" checked="false" falsevalue="" truevalue="--visualize" + label="Visualize clusters"/> + </when> + </conditional> + <!--</when>--> + <!--</conditional>--> + </inputs> + + <outputs> + <data name="output" label="PathOGiST Config File" format="yaml" from_work_dir="config.yaml"/> + </outputs> + + <help> + + </help> + + <citations> + + </citations> +</tool>
--- a/galaxy/tools/visualization/visualization.xml Fri Feb 01 16:56:03 2019 -0500 +++ b/galaxy/tools/visualization/visualization.xml Tue Mar 05 02:36:40 2019 -0500 @@ -5,7 +5,7 @@ </requirements> <command><![CDATA[ - PATHOGIST vis $distance $sample $pdf + PATHOGIST visualize $distance $sample $pdf ]]></command> <inputs>