diff variant_select.xml @ 6:35c00763cb5c draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gatk2 commit cf399638ebca4250bcc15f468238a9964de97b33
author iuc
date Mon, 04 Jun 2018 05:38:15 -0400
parents f244b8209eb8
children
line wrap: on
line diff
--- a/variant_select.xml	Mon Aug 25 17:44:53 2014 -0400
+++ b/variant_select.xml	Mon Jun 04 05:38:15 2018 -0400
@@ -1,9 +1,10 @@
-<tool id="gatk2_variant_select" name="Select Variants" version="@VERSION@.0">
+<tool id="gatk2_variant_select" name="Select Variants" version="@VERSION@.2">
   <description>from VCF files</description>
-  <expand macro="requirements" />
   <macros>
     <import>gatk2_macros.xml</import>
   </macros>
+  <expand macro="requirements" />
+  <expand macro="version_command" />
   <command interpreter="python">
     #from binascii import hexlify
 
@@ -17,7 +18,7 @@
 
     @THREADS@
     -o "${output_vcf}"
-    
+
     #if $reference_source.reference_source_selector != "history":
         -R "${reference_source.ref_file.fields.path}"
     #end if
@@ -29,74 +30,72 @@
     #if $input_discordance:
         --discordance "${input_discordance}"
     #end if
-    
+
     #for $exclude_sample_name in $exclude_sample_name_repeat:
         --exclude_sample_name "${exclude_sample_name.exclude_sample_name}"
     #end for
-    
+
     ${exclude_filtered}
-    
+
     #for $sample_name in $sample_name_repeat:
         --sample_name "${sample_name.sample_name}"
     #end for
     '
-    
+
     #for $select_expressions in $select_expressions_repeat:
         #set $select_expression = "--select_expressions '%s'" % ( str( $select_expressions.select_expressions ) )
         -o '${ hexlify( $select_expression ) }'
     #end for
-    
+
     ##start tool specific options
     #if str( $analysis_param_type.analysis_param_type_selector ) == 'advanced':
         -p '
-          #for $exclude_sample_file in $analysis_param_type.exclude_sample_file_repeat:
-              --exclude_sample_file "${exclude_sample_file.exclude_sample_file}"
+          #for $esf in $analysis_param_type.exclude_sample_file:
+              --exclude_sample_file "${esf}"
           #end for
-          
-          #for $sample_file in $analysis_param_type.sample_file_repeat:
-              --sample_file "${ample_file.sample_file}"
+
+          #for $sf in $analysis_param_type.sample_file:
+              --sample_file "${sf}"
           #end for
-          
+
           #if $analysis_param_type.input_keep_ids:
               --keepIDs "${analysis_param_type.input_keep_ids}"
           #end if
-          
+
           ${analysis_param_type.keep_original_AC}
-          
+
           ${analysis_param_type.mendelian_violation}
-          
+
           --mendelianViolationQualThreshold "${analysis_param_type.mendelian_violation_qual_threshold}"
-          
+
           --remove_fraction_genotypes "${analysis_param_type.remove_fraction_genotypes}"
-          
+
           --restrictAllelesTo "${analysis_param_type.restrict_alleles_to}"
-          
+
           #if str( $analysis_param_type.select_random_type.select_random_type_selector ) == 'select_random_fraction':
               --select_random_fraction "${analysis_param_type.select_random_type.select_random_fraction}"
           #elif str( $analysis_param_type.select_random_type.select_random_type_selector ) == 'select_random_number':
               --select_random_number "${analysis_param_type.select_random_type.select_random_number}"
           #end if
-          
+
           #if $analysis_param_type.select_type_to_include:
               #for $type_to_include in str( $analysis_param_type.select_type_to_include ).split( ',' ):
                   --selectTypeToInclude "${type_to_include}"
               #end for
           #end if
-          
+
           ${analysis_param_type.exclude_non_variants}
         '
-        
+
         #for $sample_expressions in $analysis_param_type.sample_expressions_repeat:
             #set $sample_expression = "--sample_expressions '%s'" % ( str( $sample_expressions.sample_expressions ) )
             -o '${ hexlify( $sample_expression ) }'
         #end for
-        
+
     #end if
     ##end tool specific options
-    
+
     #include source=$standard_gatk_options#
-    
-    
   </command>
   <inputs>
     <conditional name="reference_source">
@@ -115,7 +114,7 @@
         <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &amp;lt;reference_sequence&amp;gt;" />
       </when>
     </conditional>
-    
+
     <repeat name="select_expressions_repeat" title="Criteria to use when selecting the data" help="-select,--select_expressions &amp;lt;select_expressions&amp;gt;">
         <param name="select_expressions" type="text" label="JEXL expression">
             <sanitizer>
@@ -126,49 +125,44 @@
             </sanitizer>
         </param>
     </repeat>
-    
+
     <param name="input_concordance" type="data" format="vcf" label="Output variants that were also called in this comparison track" optional="True" help="-conc,--concordance &amp;lt;concordance&amp;gt;"/>
     <param name="input_discordance" type="data" format="vcf" label="Output variants that were not called in this comparison track" optional="True" help="-disc,--discordance &amp;lt;discordance&amp;gt;"/>
-    
+
     <repeat name="sample_name_repeat" title="Include Samples by name" help="-sn,--sample_name &amp;lt;sample_name&amp;gt;">
         <param name="sample_name" type="text" label="Include genotypes from this sample"/>
     </repeat>
-    
+
     <repeat name="exclude_sample_name_repeat" title="Exclude Samples by name" help="-xl_sn,--exclude_sample_name &amp;lt;exclude_sample_name&amp;gt;">
         <param name="exclude_sample_name" type="text" label="Exclude genotypes from this sample"/>
     </repeat>
-    
+
     <param name="exclude_filtered" type="boolean" truevalue="--excludeFiltered" falsevalue="" label="Don't include filtered loci in the analysis" help="-ef,--excludeFiltered" />
-    
+
     <expand macro="gatk_param_type_conditional" />
-    
-    
+
     <expand macro="analysis_type_conditional">
-        
-        <repeat name="exclude_sample_file_repeat" title="Exclude Samples by file" help="-xl_sf,--exclude_sample_file &amp;lt;exclude_sample_file&amp;gt;">
-            <param name="exclude_sample_file" type="data" format="txt" label="File containing a list of samples (one per line) to exclude"/>
-        </repeat>
-        
-        <repeat name="sample_file_repeat" title="Samples by file" help="-sf,--sample_file &amp;lt;sample_file&amp;gt;">
-            <param name="sample_file" type="data" format="txt" label="File containing a list of samples (one per line) to include" />
-        </repeat>
-        
-        <param name="input_keep_ids" type="data" format="text" label="Only emit sites whose ID is found in this file" optional="True" help="-IDs,--keepIDs &amp;lt;keepIDs&amp;gt;"/>
-        
+
+        <param name="exclude_sample_file" type="data" format="txt" multiple="True" label="Exclude Samples by file" help="File containing a list of samples (one per line) to exclude (-xl_sf,--exclude_sample_file &amp;lt;exclude_sample_file&amp;gt;)"/>
+
+        <param name="sample_file" type="data" format="txt" multiple="True" label="Samples by file"  help="File containing a list of samples (one per line) to include (-sf,--sample_file &amp;lt;sample_file&amp;gt;)"/>
+
+        <param name="input_keep_ids" type="data" format="txt" label="Only emit sites whose ID is found in this file" optional="True" help="-IDs,--keepIDs &amp;lt;keepIDs&amp;gt;"/>
+
         <param name="keep_original_AC" type="boolean" truevalue="--keepOriginalAC" falsevalue="" label="Don't update the AC, AF, or AN values in the INFO field after selecting" help="-keepOriginalAC,--keepOriginalAC" />
-        
+
         <param name="mendelian_violation" type="boolean" truevalue="--mendelianViolation" falsevalue="" label="output mendelian violation sites only" help="-mv,--mendelianViolation" />
-        
+
         <param name="mendelian_violation_qual_threshold" type="float" label="Minimum genotype QUAL score for each trio member required to accept a site as a mendelian violation" value="0" help="-mvq,--mendelianViolationQualThreshold &amp;lt;mendelianViolationQualThreshold&amp;gt;" />
-        
+
         <param name="remove_fraction_genotypes" type="float" label="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall" value="0" min="0" max="1" help="-fractionGenotypes,--remove_fraction_genotypes &amp;lt;remove_fraction_genotypes&amp;gt;" />
-        
+
         <param name="restrict_alleles_to" type="select" label="Select only variants of a particular allelicity" help="-restrictAllelesTo,--restrictAllelesTo &amp;lt;restrictAllelesTo&amp;gt;">
             <option value="ALL" selected="True">ALL</option>
             <option value="MULTIALLELIC">MULTIALLELIC</option>
             <option value="BIALLELIC">BIALLELIC</option>
         </param>
-        
+
         <repeat name="sample_expressions_repeat" title="Regular expression to select many samples from the ROD tracks provided" help="-se,--sample_expressions &amp;lt;sample_expressions&amp;gt;">
             <param name="sample_expressions" type="text" label="Regular expression">
                 <sanitizer>
@@ -179,7 +173,7 @@
                 </sanitizer>
             </param>
         </repeat>
-        
+
         <conditional name="select_random_type">
           <param name="select_random_type_selector" type="select" label="Select a random subset of variants">
             <option value="select_all" selected="True">Use all variants</option>
@@ -196,9 +190,9 @@
             <param name="select_random_number" type="integer" value="0" label="Count" help="-number,--select_random_number &amp;lt;select_random_number&amp;gt;" />
           </when>
         </conditional>
-        
+
         <param name="exclude_non_variants" type="boolean" truevalue="--excludeNonVariants" falsevalue="" label="Don't include loci found to be non-variant after the subsetting procedure" help="-env,--excludeNonVariants" />
-        
+
         <param name="select_type_to_include" type="select" label="Select only a certain type of variants from the input file" multiple="True" display="checkboxes" help="-selectType,--selectTypeToInclude &amp;lt;selectTypeToInclude&amp;gt;">
             <option value="INDEL">INDEL</option>
             <option value="SNP">SNP</option>
@@ -208,7 +202,7 @@
             <option value="NO_VARIATION">NO_VARIATION</option>
         </param>
     </expand>
-    
+
   </inputs>
   <outputs>
     <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string} (Variant File)" />
@@ -227,14 +221,14 @@
           <param name="sample_name_repeat" value="0" />
           <param name="gatk_param_type_selector" value="basic" />
           <param name="analysis_param_type_selector" value="basic" />
-          <output name="output_vcf" file="gatk/gatk_variant_select/gatk_variant_select_out_1.vcf" lines_diff="4" /> 
+          <output name="output_vcf" file="gatk/gatk_variant_select/gatk_variant_select_out_1.vcf" lines_diff="4" />
           <output name="output_log" file="gatk/gatk_variant_select/gatk_variant_select_out_1.log.contains" compare="contains" />
       </test>
   </tests>
   <help>
 **What it does**
 
-Often, a VCF containing many samples and/or variants will need to be subset in order to facilitate certain analyses (e.g. comparing and contrasting cases vs. controls; extracting variant or non-variant loci that meet certain requirements, displaying just a few samples in a browser like IGV, etc.). SelectVariants can be used for this purpose. Given a single VCF file, one or more samples can be extracted from the file (based on a complete sample name or a pattern match). Variants can be further selected by specifying criteria for inclusion, i.e. "DP &gt; 1000" (depth of coverage greater than 1000x), "AF &lt; 0.25" (sites with allele frequency less than 0.25). These JEXL expressions are documented in the `Using JEXL expressions section &lt;http://gatkforums.broadinstitute.org/discussion/1255/what-are-jexl-expressions-and-how-can-i-use-them-with-the-gatk&gt;`_. One can optionally include concordance or discordance tracks for use in selecting overlapping variants. 
+Often, a VCF containing many samples and/or variants will need to be subset in order to facilitate certain analyses (e.g. comparing and contrasting cases vs. controls; extracting variant or non-variant loci that meet certain requirements, displaying just a few samples in a browser like IGV, etc.). SelectVariants can be used for this purpose. Given a single VCF file, one or more samples can be extracted from the file (based on a complete sample name or a pattern match). Variants can be further selected by specifying criteria for inclusion, i.e. "DP &gt; 1000" (depth of coverage greater than 1000x), "AF &lt; 0.25" (sites with allele frequency less than 0.25). These JEXL expressions are documented in the `Using JEXL expressions section &lt;http://gatkforums.broadinstitute.org/discussion/1255/what-are-jexl-expressions-and-how-can-i-use-them-with-the-gatk&gt;`_. One can optionally include concordance or discordance tracks for use in selecting overlapping variants.
 
 For more information on using the SelectVariants module, see this `tool specific page &lt;http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_variantutils_SelectVariants.html&gt;`_.