Repository 'pathogist'
hg clone https://toolshed.g2.bx.psu.edu/repos/matnguyen/pathogist

Changeset 2:baf1e89b42eb (2019-03-05)
Previous changeset 1:c1b6f5fbbcad (2019-02-01) Next changeset 3:56f1b7f06222 (2019-03-05)
Commit message:
Added config builder
modified:
galaxy/tools/concatenator/concatenator
galaxy/tools/concatenator/concatenator.xml
galaxy/tools/visualization/visualization.xml
added:
galaxy/tools/config_builder/config_builder
galaxy/tools/config_builder/config_builder.xml
b
diff -r c1b6f5fbbcad -r baf1e89b42eb galaxy/tools/concatenator/concatenator
--- a/galaxy/tools/concatenator/concatenator Fri Feb 01 16:56:03 2019 -0500
+++ b/galaxy/tools/concatenator/concatenator Tue Mar 05 02:36:40 2019 -0500
[
@@ -6,9 +6,18 @@
 parser = ap.ArgumentParser(prog='concatenater', conflict_handler='resolve',
                            description="Concatenates all accessions and their associated paths")
 
-input = parser.add_argument_group('Input', '')
-input.add_argument('-n', '--name', nargs='+', required=True, help="Sample name (accessions)")
-input.add_argument('-i', '--input', nargs='+', required=True, help="Paths to calls")
+subparser = parser.add_subparsers(dest='subcommand')
+
+variant_parser = subparser.add_parser(name='variant', help="Concatenate variant call files")
+variant_parser.add_argument('-n', '--name', nargs='+', required=True, help="Sample name (accessions)")
+variant_parser.add_argument('-i', '--input', nargs='+', required=True, help="Paths to calls")
+
+cluster_parser = subparser.add_parser('cluster')
+cluster_parser.add_argument('--mlst', metavar="MLST", type=str, help="MLST clusters/distance matrix")
+cluster_parser.add_argument('--snp', metavar="SNP", type=str, help="SNP clusters/distance matrix")
+cluster_parser.add_argument('--cnv', metavar="CNV", type=str, help="CNV clusters/distance matrix")
+cluster_parser.add_argument('--spoligo', metavar="SPOLIGO", type=str, help="Spoligotyping clusters/distance matrix")
+# cluster_parser.add_argument('--kwip', metavar="KWIP", type=str, help="Kwip clusters/distance matrix")
 
 if len(sys.argv) == 1:
     parser.print_usage()
@@ -17,5 +26,13 @@
 args = parser.parse_args()
 output = open('paths.txt', 'w')
 
-for index,path in enumerate(args.input):
-    output.write("%s=%s\n" % (args.name[index], path))
+if args.subcommand == 'variant':
+    for index,path in enumerate(args.input):
+        output.write("%s=%s\n" % (args.name[index], path))
+elif args.subcommand == 'cluster':
+    variant_types = [(args.mlst, 'MLST'), (args.snp, 'SNP'), (args.cnv, 'CNV'), (args.spoligo, 'spoligotyping')]
+    for variant in variant_types:
+        if variant[0] == None:
+            continue
+        else:
+            output.write("%s=%s\n" % (variant[1], variant[0]))
b
diff -r c1b6f5fbbcad -r baf1e89b42eb galaxy/tools/concatenator/concatenator.xml
--- a/galaxy/tools/concatenator/concatenator.xml Fri Feb 01 16:56:03 2019 -0500
+++ b/galaxy/tools/concatenator/concatenator.xml Tue Mar 05 02:36:40 2019 -0500
[
@@ -5,12 +5,70 @@
     </requirements>
 
     <command><![CDATA[
-        $__tool_directory__/concatenator -n #for $path in $paths# ${path.element_identifier} #end for#
-        -i #for $path in $paths# $path #end for#
+        $__tool_directory__/concatenator
+
+        #if $input_type.input_type_selector == "variant"
+            variant
+            -n #for $path in $input_type.paths# ${path.element_identifier} #end for#
+            -i #for $path in $input_type.paths# $path #end for#
+        #elif $input_type.input_type_selector == "clustering"
+            cluster
+            #if $input_type.mlst.mlst_checkbox
+                --mlst $input_type.mlst.mlst_path
+            #end if
+            #if $input_type.snp.snp_checkbox
+                --snp $input_type.snp.snp_path
+            #end if
+            #if $input_type.cnv.cnv_checkbox
+                --cnv $input_type.cnv.cnv_path
+            #end if
+            #if $input_type.spoligo.spoligo_checkbox
+                --spoligo $input_type.spoligo.spoligo_path
+            #end if
+         #end if
     ]]></command>
 
+
     <inputs>
-        <param name="paths" type="data" label="Collection of calls" help="" optional="False" multiple="True"/>
+        <conditional name="input_type">
+            <param name="input_type_selector" type="select" label="Concatenation of variant calls, or clustering files">
+                <option value="variant">Variant Call Files</option>
+                <option value="clustering">Clustering/Distance Matrix Files</option>
+            </param>
+            <when value="variant">
+                <param name="paths" type="data" label="Collection of calls" help="" optional="False" multiple="True"/>
+            </when>
+            <when value="clustering">
+                <conditional name="mlst">
+                    <param name="mlst_checkbox" type="boolean" label="Include MLST"/>
+                    <when value="true">
+                        <param name="mlst_path" type="data" label="MLST Clustering/Distance Matrix file"
+                               format="txt,tabular"/>
+                    </when>
+                </conditional>
+                <conditional name="snp">
+                    <param name="snp_checkbox" type="boolean" label="Include SNP"/>
+                    <when value="true">
+                        <param name="snp_path" type="data" label="SNP Clustering/Distance Matrix file"
+                               format="txt,tabular"/>
+                    </when>
+                </conditional>
+                <conditional name="cnv">
+                    <param name="cnv_checkbox" type="boolean" label="Include CNV"/>
+                    <when value="true">
+                        <param name="cnv_path" type="data" label="CNV Clustering/Distance Matrix file"
+                               format="txt,tabular"/>
+                    </when>
+                </conditional>
+                <conditional name="spoligo">
+                    <param name="spoligo_checkbox" type="boolean" label="Include Spoligotyping"/>
+                    <when value="true">
+                        <param name="spoligo_path" type="data" label="Spoligotyping Clustering/Distance Matrix file"
+                               format="txt,tabular"/>
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
     </inputs>
 
     <outputs>
b
diff -r c1b6f5fbbcad -r baf1e89b42eb galaxy/tools/config_builder/config_builder
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy/tools/config_builder/config_builder Tue Mar 05 02:36:40 2019 -0500
[
b'@@ -0,0 +1,185 @@\n+#!/usr/bin/env python\n+\n+import sys\n+import yaml\n+import argparse as ap\n+\n+parser = ap.ArgumentParser(prog=\'config_builder\', conflict_handler=\'resolve\',\n+                           description="Builds the config file for PathOGiST")\n+\n+parser.add_argument(\'yaml\', help="Blank config file")\n+parser.add_argument(\'forward\', help="File to list of forward reads")\n+parser.add_argument(\'reverse\', help="File to list of reverse reads")\n+parser.add_argument(\'output\', help="Output prefix for final consensus clustering and visualization")\n+\n+tools = parser.add_argument_group(\'Tools\')\n+tools.add_argument(\'--snippy\', action=\'store_true\', help="Run Snippy")\n+tools.add_argument(\'--kwip\', action=\'store_true\', help="Run Kwip")\n+tools.add_argument(\'--prince\', action=\'store_true\', help="Run Prince")\n+tools.add_argument(\'--spotyping\', action=\'store_true\', help="Run Spotyping")\n+tools.add_argument(\'--mentalist\', action=\'store_true\', help="Run Mentalist")\n+\n+mentalist = parser.add_argument_group(\'Mentalist\')\n+mentalist.add_argument(\'--local_file\', action=\'store_true\', help="Use local database")\n+mentalist.add_argument(\'--build_db\', action=\'store_true\', help="Build a database")\n+mentalist.add_argument(\'--download_pubmlst\', action=\'store_true\', help="Download pubMLST scheme")\n+mentalist.add_argument(\'--download_cgmlst\', action=\'store_true\', help="Download cgMLST scheme")\n+mentalist.add_argument(\'--download_enterobase\', action=\'store_true\', help="Download Enterobase scheme")\n+\n+mentalist.add_argument(\'--local_db\', metavar="STRING", help="Location of database")\n+mentalist.add_argument(\'--kmer\', metavar="INT", help="Kmer size")\n+mentalist.add_argument(\'--fasta_files\', metavar="STRING", help="Location of FASTA files for MLST scheme")\n+mentalist.add_argument(\'--profile\', metavar="STRING", help="Profile file for known genotypes")\n+mentalist.add_argument(\'--scheme\', metavar="STRING", help="Species name or scheme ID")\n+mentalist.add_argument(\'--entero_scheme\', metavar="STRING", help="(S)almonella, (Y)ersinia, or (E)scherichia/Shigella")\n+mentalist.add_argument(\'--entero_type\', metavar="STRING", help="\'cg\' or \'wg\' for cgMLST or wgMLST, respectively.")\n+mentalist.add_argument(\'--mutation_threshold\', metavar="INT", help="Maximum number of mutations when looking for novel "\n+                                                                   "alleles")\n+mentalist.add_argument(\'--kt\', metavar="INT", help="Minimum number of times a kmer is seen to be considered present "\n+                                                   "in the sample")\n+\n+kwip = parser.add_argument_group(\'Kwip\')\n+kwip.add_argument(\'--N\', metavar="INT", help="Number of tables")\n+kwip.add_argument(\'--x\', metavar="INT", help="Maximum table size")\n+kwip.add_argument(\'--ksize\', metavar="INT", help="kmer size to use")\n+kwip.add_argument(\'--unique_kmers\', metavar="INT", help="Approximate number of unique kmers in the input set")\n+\n+snippy = parser.add_argument_group(\'Snippy\')\n+snippy.add_argument(\'--reference\', metavar="STRING", help="Reference genome. Supports FASTA, GenBank, EMBL (not GFF)")\n+snippy.add_argument(\'--mapqual\', metavar="INT", help="Minimum read mapping quality to consider")\n+snippy.add_argument(\'--basequal\', metavar="INT", help="Minimum base quality to consider")\n+snippy.add_argument(\'--mincov\', metavar="INT", help="Minimum coverage of variant site")\n+snippy.add_argument(\'--minfrac\', metavar="FLOAT", help="Minimum proportion for variant evidence")\n+\n+clustering = parser.add_argument_group(\'Clustering\')\n+clustering.add_argument(\'--fine_snp\', action=\'store_true\', help="Use SNP as a fine datatype")\n+clustering.add_argument(\'--fine_mlst\', action=\'store_true\', help="Use MLST as a fine datatype")\n+clustering.add_argument(\'--fine_kwip\', action=\'store_true\', help="Use Kwip as a fine datatype")\n+clustering.add_argument(\'--fine_spoligo\', action=\'store_true\', help="Use Spoligotyping as a fine datatype")\n+clustering.add_argument(\'--fine_cnv\', action=\'store_true\', help="Use CNV as '..b'ard_reads\'] = args.forward\n+        list_doc[line][\'input_reads\'][\'reverse_reads\'] = args.reverse\n+\n+        if args.mentalist:\n+            if args.local_file:\n+                list_doc[line][\'mentalist\'][\'local_file\'][\'database\'] = args.local_db\n+            elif args.build_db:\n+                list_doc[line][\'mentalist\'][\'build_db\'][\'options\'][\'k\'] = args.kmer\n+                list_doc[line][\'mentalist\'][\'build_db\'][\'options\'][\'fasta_files\'] = args.fasta_files\n+                list_doc[line][\'mentalist\'][\'build_db\'][\'options\'][\'profile\'] = args.profile\n+            elif args.download_pubmlst:\n+                list_doc[line][\'mentalist\'][\'download_pubmlst\'][\'options\'][\'k\'] = args.kmer\n+                list_doc[line][\'mentalist\'][\'download_pubmlst\'][\'options\'][\'scheme\'] = args.scheme\n+            elif args.download_cgmlst:\n+                list_doc[line][\'mentalist\'][\'download_cgmlst\'][\'options\'][\'k\'] = args.kmer\n+                list_doc[line][\'mentalist\'][\'download_cgmlst\'][\'options\'][\'scheme\'] = args.scheme\n+            elif args.download_enterobase:\n+                list_doc[line][\'mentalist\'][\'download_enterobase\'][\'options\'][\'k\'] = args.kmer\n+                list_doc[line][\'mentalist\'][\'download_enterobase\'][\'options\'][\'scheme\'] = args.entero_scheme\n+                list_doc[line][\'mentalist\'][\'download_enterobase\'][\'options\'][\'type\'] = args.entero_type\n+\n+            if args.mutation_threshold:\n+                list_doc[line][\'mentalist\'][\'call\'][\'options\'][\'mutation_threshold\'] = args.mutation_threshold\n+            if args.kt:\n+                list_doc[line][\'mentalist\'][\'call\'][\'options\'][\'kt\'] = args.kt\n+\n+        if args.kwip:\n+            if args.N:\n+                list_doc[line][\'kwip\'][\'khmer_options\'][\'N\'] = args.N\n+            if args.x:\n+                list_doc[line][\'kwip\'][\'khmer_options\'][\'x\'] = args.x\n+            if args.ksize:\n+                list_doc[line][\'kwip\'][\'khmer_options\'][\'ksize\'] = args.ksize\n+            if args.unique_kmers:\n+                list_doc[line][\'kwip\'][\'khmer_options\'][\'unique-kmers\'] = args.unique_kmers\n+\n+        if args.snippy:\n+            list_doc[line][\'snippy\'][\'options\'][\'reference\'] = args.reference\n+            if args.mapqual:\n+                list_doc[line][\'snippy\'][\'options\'][\'mapqual\'] = args.mapqual\n+            if args.basequal:\n+                list_doc[line][\'snippy\'][\'options\'][\'basequal\'] = args.basequal\n+            if args.mincov:\n+                list_doc[line][\'snippy\'][\'options\'][\'mincov\'] = args.mincov\n+            if args.minfrac:\n+                list_doc[line][\'snippy\'][\'options\'][\'minfrac\'] = args.minfrac\n+\n+    if line == \'clustering\':\n+        list_doc[line][\'output_prefix\'] = args.output\n+\n+        fine = []\n+        if args.fine_snp:\n+            fine.append(\'SNP\')\n+        if args.fine_mlst:\n+            fine.append(\'MLST\')\n+        if args.fine_kwip:\n+            fine.append(\'kWIP\')\n+        if args.fine_spoligo:\n+            fine.append(\'spoligotyping\')\n+        if args.fine_cnv:\n+            fine.append(\'CNV\')\n+        list_doc[line][\'fine_clusterings\'] = fine\n+\n+        if args.snp_thresh:\n+            list_doc[line][\'thresholds\'][\'SNP\'] = args.snp_thresh\n+        if args.mlst_thresh:\n+            list_doc[line][\'thresholds\'][\'MLST\'] = args.snp_thresh\n+        if args.kwip_thresh:\n+            list_doc[line][\'thresholds\'][\'kWIP\'] = args.snp_thresh\n+        if args.spoligo_thresh:\n+            list_doc[line][\'thresholds\'][\'spoligotyping\'] = args.snp_thresh\n+        if args.cnv_thresh:\n+            list_doc[line][\'thresholds\'][\'CNV\'] = args.snp_thresh\n+\n+        if args.less_constraints:\n+            list_doc[line][\'all_constraints\'] = False\n+        if args.method:\n+            list_doc[line][\'method\'] = args.method\n+        if args.no_presolve:\n+            list_doc[line][\'presolve\'] = False\n+        if args.visualize:\n+            list_doc[line][\'visualize\'] = True\n+\n+with open("config.yaml", \'w\') as f:\n+    yaml.dump(list_doc, f)\n+\n'
b
diff -r c1b6f5fbbcad -r baf1e89b42eb galaxy/tools/config_builder/config_builder.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy/tools/config_builder/config_builder.xml Tue Mar 05 02:36:40 2019 -0500
[
b'@@ -0,0 +1,243 @@\n+<tool id="config_builder" name="PathOGiST - Config Builder" version="1.0.0">\n+    <description>: Creates and populates the config file to run PathOGiST</description>\n+    <requirements>\n+        <requirement type="package" version="0.2.3">pathogist</requirement>\n+    </requirements>\n+\n+    <command><![CDATA[\n+        PATHOGIST all config.yaml --new_config\n+\n+        $__tool_directory__/config_builder\n+\n+        #if snippy.snippy_checkbox\n+            --snippy\n+            --reference $snippy.reference\n+            #if snippy.snippy_adv.snippy_adv_checkbox\n+                --mapqual $snippy.mapqual\n+                --basequal $snippy.basequal\n+                --mincov $snippy.mincov\n+                --minfrac $snippy.minfrac\n+            #endif\n+            $snippy.snp_fine\n+            --snp_thresh $snippy.snp_threshold\n+        #endif\n+\n+        #if mentalist.mentalist_checkbox\n+            --mentalist\n+            #if mentalist.db_loc.db_selector == "local_file"\n+                --local_file\n+                --local_db $mentalist.mlst_database\n+            #endif\n+            #if mentalist.db_loc.db_selector == "build_db"\n+                --build_db\n+                --kmer $mentalist.k\n+                --fasta_files $mentalist.fasta_files\n+                --profile $mentalist.profile\n+            #endif\n+            #if mentalist.db_loc.db_selector == "download_pubmlst"\n+                --download_pubmlst\n+                --kmer $mentalist.k\n+                --scheme $mentalist.scheme\n+            #endif\n+            #if mentalist.db_loc.db_selector == "download_cgmlst"\n+                --download_cgmlst\n+                --kmer $mentalist.k\n+                --scheme $mentalist.scheme\n+            #endif\n+            #if mentalist.db_loc.db_selector == "download_enterobase"\n+                --download_enterobase $mentalist.k\n+                --entero_scheme $mentalist.scheme\n+                --entero_type $mentalist.type\n+            #endif\n+            #if mentalist.mentalist_adv.mentalist_adv_checkbox\n+                --mutation_threshold $mentalist.mutation_thresh\n+                --kt $mentalist.kt\n+            #endif\n+            $mentalist.mlst_fine\n+            --mlst_thresh $mentalist.mlst_threshold\n+        #endif\n+\n+        #if prince.prince_checkbox\n+            --prince\n+            $prince.cnv_fine\n+            --cnv_thresh $prince.cnv_threshold\n+        #endif\n+\n+        #if kwip.kwip_checkbox\n+            --kwip\n+            #if kwip.kwip_adv.kwip_adv_checkbox\n+                --N $kwip.N\n+                --x $kwip.x\n+                --ksize $kwip.ksize\n+                --unique_kmers $kwip.unique_kmers\n+            #endif\n+            $kwip.kwip_fine\n+            --kwip_thresh $kwip.kwip_threshold\n+        #endif\n+\n+        #if spotyping.spotyping_checkbox\n+            --spotyping\n+            $spotyping.spoligo_fine\n+            --spoligo_thresh $spotyping.spoligo_threshold\n+        #endif\n+\n+        #if adv_clustering.adv_clustering_checkbox\n+            $adv_clustering.all_constraints\n+            --method $adv_clustering.method\n+            $adv_clustering.presolve\n+            $adv_clustering.visualize\n+        #endif\n+    ]]></command>\n+\n+\n+    <inputs>\n+        <!--<conditional name="task">-->\n+            <!--<param name="task_selector" type="select" label="Create Blank Config or Populate Config File">-->\n+                <!--<option value="create">Create Blank Config File</option>-->\n+                <!--<option value="populate">Populate Config File</option>-->\n+            <!--</param>-->\n+            <!--<when value="create">-->\n+\n+            <!--</when>-->\n+            <!--<when value="populate">-->\n+        <conditional name="snippy">\n+            <param name="snippy_checkbox" type="boolean" label="Run Snippy"/>\n+            <when value="true">\n+                <param name="reference" type="data" format="fasta,fa,fasta.gz,fa.gz" label="Reference Genome"/>\n+                <con'..b'"Maximum number of\n+                        mutations when looking for novel alleles"/>\n+                        <param name="kt" type="integer" value="10" min="1" label="Minimum number of times a\n+                        kmer is seen to be considered present in the sample"/>\n+                    </when>\n+                </conditional>\n+                <param name="mlst_fine" type="boolean" label="Consider MLST as a fine datatype"/>\n+                <param name="mlst_threshold" type="integer" value="300" min="0"\n+                       label="Correlation clustering threshold for MLST"/>\n+            </when>\n+        </conditional>\n+\n+        <conditional name="prince">\n+            <param name="prince_checkbox" type="boolean" label="Run PRINCE"/>\n+            <when value="true">\n+                <param name="cnv_fine" type="boolean" label="Consider CNV as a fine datatype"/>\n+                <param name="cnv_threshold" type="integer" value="100" min="0"\n+                       label="Correlation clustering threshold for CNV"/>\n+            </when>\n+        </conditional>\n+\n+        <conditional name="kwip">\n+            <param name="kwip_checkbox" type="boolean" label="Run kWIP"/>\n+            <when value="true">\n+                <conditional name="kwip_adv">\n+                    <param name="kwip_adv_checkbox" type="boolean" label="Advance kWIP Parameters"/>\n+                    <when value="true">\n+                        <param name="N" type="integer" value="1" min="1" label="Number of tables"/>\n+                        <param name="x" type="float" value="1e9" min="1" label="Maximum table size"/>\n+                        <param name="ksize" type="integer" value="31" min="1" label="kmer size"/>\n+                        <param name="unique_kmers" type="integer" value="0" min="0" label="Approximate number\n+                        of unique kmers in the input set"/>\n+                    </when>\n+                </conditional>\n+                <param name="kwip_fine" type="boolean" label="Consider kWIP as a fine datatype"/>\n+                <param name="kwip_threshold" type="float" value="0.4" min="0"\n+                       label="Correlation clustering threshold for kWIP"/>\n+            </when>\n+        </conditional>\n+\n+        <conditional name="spotyping">\n+            <param name="spotyping_checkbox" type="boolean" label="Run SpoTyping"/>\n+            <when value="true">\n+                <param name="spoligo_fine" type="boolean" label="Consider spoligotype as a fine datatype"/>\n+                <param name="spoligo_threshold" type="integer" value="8" min="0"\n+                       label="Correlation clustering threshold for Spoligotyping"/>\n+            </when>\n+        </conditional>\n+\n+        <conditional name="adv_clustering">\n+            <param name="adv_clustering_checkbox" type="boolean"\n+                   label="Advanced settings for correlation and consensus clustering"/>\n+            <when value="true">\n+                <param name="all_constraints" type="boolean" checked="true" falsevalue="--less_constraints"\n+                       truevalue="" label="Use all constraints when performing clustering"/>\n+                <param name="method" type="text" value="C4" size="3"\n+                       label="`C4` or `ILP` method for clustering algorithm"/>\n+                <param name="presolve" type="boolean" checked="true" falsevalue="--no_presolve" truevalue=""\n+                       label="Perform presolving for clustering"/>\n+                <param name="visualize" type="boolean" checked="false" falsevalue="" truevalue="--visualize"\n+                       label="Visualize clusters"/>\n+            </when>\n+        </conditional>\n+            <!--</when>-->\n+        <!--</conditional>-->\n+    </inputs>\n+\n+    <outputs>\n+        <data name="output" label="PathOGiST Config File" format="yaml" from_work_dir="config.yaml"/>\n+    </outputs>\n+\n+    <help>\n+\n+    </help>\n+\n+    <citations>\n+\n+    </citations>\n+</tool>\n'
b
diff -r c1b6f5fbbcad -r baf1e89b42eb galaxy/tools/visualization/visualization.xml
--- a/galaxy/tools/visualization/visualization.xml Fri Feb 01 16:56:03 2019 -0500
+++ b/galaxy/tools/visualization/visualization.xml Tue Mar 05 02:36:40 2019 -0500
[
@@ -5,7 +5,7 @@
     </requirements>
 
     <command><![CDATA[
-        PATHOGIST vis $distance $sample $pdf
+        PATHOGIST visualize $distance $sample $pdf
     ]]></command>
 
     <inputs>