Repository revision
8:77a1e60fd1de

Repository 'gemini_query'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/gemini_query

GEMINI query tool metadata
Miscellaneous
GEMINI query
Querying the GEMINI database
gemini_query
toolshed.g2.bx.psu.edu/repos/iuc/gemini_query/gemini_query/0.20.1+galaxy2
0.20.1+galaxy2
gemini --version
True
Version lineage of this tool (guids ordered most recent to oldest)
toolshed.g2.bx.psu.edu/repos/iuc/gemini_query/gemini_query/0.20.1+galaxy2 (this tool)
toolshed.g2.bx.psu.edu/repos/iuc/gemini_query/gemini_query/0.20.1+galaxy1
toolshed.g2.bx.psu.edu/repos/iuc/gemini_query/gemini_query/0.20.1
toolshed.g2.bx.psu.edu/repos/iuc/gemini_query/gemini_query/0.18.1.1
toolshed.g2.bx.psu.edu/repos/iuc/gemini_query/gemini_query/0.18.1.0
gemini_query
Requirements (dependencies defined in the <requirements> tag set)
name version type
gemini 0.20.1 package
Additional information about this tool
gemini query
            ${query.oformat.report.header}
            ${query.oformat.report.dgidb}

            #for $i in $query.filter_by_genotype:
                #set $multiline_sql_expr = str($i.gt_filter)
                #set $cmdln_param = "--gt-filter"
                
        #set $sql_expr = str($multiline_sql_expr).strip()
        #if str($sql_expr):
            #set $sql_expr = $sql_expr.replace('\r\n', '\n')
            #set $sql_expr = $sql_expr.replace('\r', '\n')
            #set $sql_expr = $sql_expr.replace('\\\n', ' ')
            $cmdln_param '$sql_expr'
        #end if
    
            #end for

            #for $i in $query.filter_by_sample:
                $i.family_wise
                #if int($i.min_kindreds) > 0:
                    --min-kindreds ${i.min_kindreds}
                #end if
                ${i.in}
                #set $multiline_sql_expr = str($i.sample_filter)
                #set $cmdln_param = "--sample-filter"
                
        #set $sql_expr = str($multiline_sql_expr).strip()
        #if str($sql_expr):
            #set $sql_expr = $sql_expr.replace('\r\n', '\n')
            #set $sql_expr = $sql_expr.replace('\r', '\n')
            #set $sql_expr = $sql_expr.replace('\\\n', ' ')
            $cmdln_param '$sql_expr'
        #end if
    
            #end for

            #if str($query.oformat.report.format) == 'with_samples':
                #set $sample_delim = str($query.oformat.report.sample_delim) or ','
                --show-samples --sample-delim '$sample_delim'
            #elif str($query.oformat.report.format) == 'with_samples_flattened':
                --show-samples --format sampledetail
            #elif str($query.oformat.report.format) == 'with_families':
                #set $sample_delim = str($query.oformat.report.sample_delim) or ','
                --show-families --sample-delim '$sample_delim'
            #elif str($query.oformat.report.format) == 'carrier_summary':
                --carrier-summary-by-phenotype
                #if str($query.oformat.report.phenotype).strip():
                    '${query.oformat.report.phenotype}'
                #else:
                    affected
                #end if
            #elif str($query.oformat.report.format) != 'maf':
                --format ${query.oformat.report.format}
            #end if

            #if str($query.interface) == 'basic':
                ## build the SQL query string from its components
                #if str($query.oformat.report.format) in ('vcf', 'tped'):
                    #set $cols = "*"
                #elif str($query.oformat.report.format) == 'maf':
                    #if str($query.oformat.report.tumor_sample_name):
                        #set $gt_string = 'gt_alt_depths.{0}, gt_ref_depths.{0}, gts.{0}'.format(str($query.oformat.report.tumor_sample_name))
                        #if str($query.oformat.report.normal_sample_name):
                            #set $gt_string = $gt_string + ', gt_alt_depths.{0}, gt_ref_depths.{0}, gts.{0}'.format(str($query.oformat.report.normal_sample_name))
                        #end if
                    #else:
                        #set $gt_string = '(gt_alt_depths).(*), (gt_ref_depths).(*), (gts).(*)'
                    #end if
                    #if str($query.oformat.report.mutation_status.status_select) == 'custom':
                        ## Need to quote the user-specified mutation status for the SQL query
                        #set $mutation_status = '"%s"' % str($query.oformat.report.mutation_status.status_custom)
                    #elif str($query.oformat.report.mutation_status.status_select) == 'expression':
                        ## For custom expressions, it is up to the user to ensure valid syntax
                        #set $mutation_status = str($query.oformat.report.mutation_status.status_expression)
                    #else:
                        ## The user selected a fixed value from the list, but
                        ## it still needs quoting.
                        #set $mutation_status = '"%s"' % str($query.oformat.report.mutation_status.status_select)
                    #end if
                    #set $cols = 'ifnull(g1.gene, "unknown") AS Hugo_Symbol, ifnull(ifnull(g2.entrez_id, g1.entrez_id), "") AS Entrez_Gene_Id, "" AS Center, "37" AS NCBI_Build, replace(v.chrom, "chr", "") AS Chromosome, v.start + 1 AS Start_Position, v.end AS End_Position, "" as Strand, v.impact_so AS Variant_Classification, ifnull(nullif(v.type, "indel"), v.sub_type) AS Variant_Type, v.ref AS Reference_Allele, "${tumor_seq_allele1}" AS Tumor_Seq_Allele1, "${tumor_seq_allele2}" AS Tumor_Seq_Allele2, ifnull(v.rs_ids, ifnull(nullif(ifnull(nullif(v.in_omim = 0 AND v.cosmic_ids IS NULL AND v.max_aaf_all = -1, 1), "novel"), 0), "")) AS dbSNP_RS, "" AS dbSNP_Val_Status, printf("%s", "' + str($query.oformat.report.tumor_sample_id) + '") AS Tumor_Sample_Barcode, printf("%s", "' + str($query.oformat.report.norm_sample_id) + '") AS Matched_Norm_Sample_Barcode, "${match_norm_seq_allele1}" AS Match_Norm_Seq_Allele1, "${match_norm_seq_allele2}" AS Match_Norm_Seq_Allele2, "" AS Tumor_Validation_Allele1, "" AS Tumor_Validation_Allele2, "" AS Match_Norm_Validation_Allele1, "" AS Match_Norm_Validation_Allele2, "" AS Verification_Status, "" AS Validation_Status, ' + $mutation_status + ' AS Mutation_Status, "" AS Sequencing_Phase, "" AS Sequence_Source, "" AS Validation_Method, "" AS Score, "" AS BAM_File, "" AS Sequencer, ifnull(nullif(v.aa_change, ""), "p.=") AS HGVSp_Short, "${t_alt_count}" AS t_alt_count, "${t_ref_count}" AS t_ref_count, "${n_alt_count}" AS n_alt_count, "${n_ref_count}" AS n_ref_count, v.alt, ' + $gt_string
                #else:
                    #set $report = $query.oformat.report.report
                    
        #if str($report.report_selector) == 'full':
            #set cols = "*"
        #else:
            #if $report.columns and str($report.columns) != '':
                #set $cols = str($report.columns)
            #else
                #set $cols = ''
            #end if
            #if str($report.extra_cols).strip():
                #if $cols:
                    #set $cols = $cols + ', ' + str($report.extra_cols)
                #else:
                    #set $cols = str($report.extra_cols)
                #end if
            #end if
            #if not $cols:
                #set $cols = "variant_id, gene"
            #end if
        #end if
    
                #end if
                #set $q = "SELECT %s FROM variants" % $cols
                #if str($query.oformat.report.format) == 'maf':
                    #set $q = $q + ' v LEFT JOIN (SELECT DISTINCT gene, is_hgnc, hgnc_id, entrez_id, chrom FROM gene_detailed) g1 ON v.gene = g1.gene AND v.chrom = g1.chrom LEFT JOIN (SELECT DISTINCT gene, is_hgnc, hgnc_id, entrez_id, transcript, chrom, ensembl_gene_id FROM gene_detailed) g2 ON g1.gene = g2.gene AND (v.transcript = g2.transcript OR v.transcript=g2.ensembl_gene_id)'
                #end if

                #set $where_clause_elements = []
                #if str($query.filter).strip():
                    #silent $where_clause_elements.append(str($query.filter).strip())
                #end if

                #set $regions = $query.regions
                
        #set $region_elements = []
        #for $r in $regions:
            ## The actual chromosome name needs to be single-quoted
            ## in SQL, so we need to quote the single quotes like the
            ## sanitize_query macro would if the whole was a parameter.
            #set $r_elements = ["chrom = '\"'\"'%s'\"'\"'" % str($r.chrom).strip()]
            #if str($r.start).strip():
                #silent $r_elements.append("start >= %d" % int($r.start))
            #end if
            #if str($r.stop).strip():
                #silent $r_elements.append("end <= %d" % int($r.stop))
            #end if
            #silent $region_elements.append("(%s)" % " AND ".join($r_elements))
        #end for
    
    
                #if $region_elements:
                    #silent $where_clause_elements.append(" OR ".join($region_elements))
                #end if
                #if $where_clause_elements:
                    #set $q = $q + " WHERE " + " AND ".join($where_clause_elements)
                #end if
                #if str($query.oformat.report.format) == 'maf':
                    #set $q = $q + " GROUP BY v.variant_id"
                #end if
                #if str($query.oformat.report.order_by).strip():
                    #set $q = $q + " ORDER BY " + str($query.oformat.report.order_by).strip() + str($query.oformat.report.sort_order)
                #end if
            #else
                ## The user entered the SQL query string directly.
                #set $q = str($query.q)
            #end if

            #set $multiline_sql_expr = $q
            #set $cmdln_param = "-q"
            
        #set $sql_expr = str($multiline_sql_expr).strip()
        #if str($sql_expr):
            #set $sql_expr = $sql_expr.replace('\r\n', '\n')
            #set $sql_expr = $sql_expr.replace('\r', '\n')
            #set $sql_expr = $sql_expr.replace('\\\n', ' ')
            $cmdln_param '$sql_expr'
        #end if
    

            '$infile'
            #if str($query.oformat.report.format) == 'maf':
                > temp.txt && python '$__tool_directory__/gemini_mafify.py' temp.txt '${query.oformat.report.tumor_sample_name}' '${query.oformat.report.normal_sample_name}'
            #end if
            > '$outfile'

    
None
False
Functional tests
name inputs outputs required files
Test-1 infile: gemini_load_result1.db
query|q: select chrom,start from variants limit 10
query|interface: advanced
name: value
gemini_load_result1.db
value
Test-2 infile: gemini_load_result1.db
query|oformat|report|tumor_sample_id: test
query|oformat|report|format: maf
query|interface: basic
name: value
gemini_load_result1.db
value
Test-3 infile: gemini_load_result1.db
query|regions_0|chrom: chr1
query|regions_0|start: 0
query|interface: basic
name: value
gemini_load_result1.db
value