changeset 3:67964b619af8 draft

Uploaded
author petr-novak
date Wed, 22 Apr 2020 02:29:47 -0400
parents 968f0867acc5
children d89d4fa8a17b
files repex_full_clustering.xml repex_tarean.xml tool_dependencies.xml
diffstat 3 files changed, 43 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/repex_full_clustering.xml	Mon Feb 03 02:34:46 2020 -0500
+++ b/repex_full_clustering.xml	Wed Apr 22 02:29:47 2020 -0400
@@ -1,4 +1,4 @@
-<tool id="repeatexplorer2" name="RepeatExplorer2 clustering: " version="2.3.7" >
+<tool id="repeatexplorer2" name="RepeatExplorer2 clustering: " version="2.3.8" >
     <stdio>
       <regex match="lastdb: can't open file: NEAR" source="stderr" level="fatal" description="Version of last is too old, use ver 956 or higher\n" />
       <regex match="Traceback" source="stderr" level="fatal" description="Unknown error" />
@@ -12,7 +12,7 @@
      <requirement type="package">imagemagick</requirement>
      <requirement type="package">mafft</requirement>
      <requirement type="package">blast</requirement>
-     <requirement type="package">diamond</requirement>
+     <requirement type="package" version="0.9.29" >diamond</requirement>
      <requirement type="package">blast-legacy</requirement>
      <requirement type="package">r-igraph</requirement>
      <requirement type="package">r-data.tree</requirement>
@@ -30,14 +30,14 @@
      <requirement type="package">r-rsqlite</requirement>
      <requirement type="package">r-rserve</requirement>
      <requirement type="package">bioconductor-biostrings</requirement>
-     <requirement type="package" version="2.3.7">repex_tarean</requirement>
+     <requirement type="package" version="2.3.8">repex_tarean</requirement>
      <requirement type="set_environment">REPEX</requirement>
      <requirement type="set_environment">REPEX_VERSION</requirement>
      <requirement type="package" version="0.9.1" >pyrserve</requirement>
    </requirements>
     <command >
       export PYTHONHASHSEED=0;
-      \${REPEX}/seqclust --sample ${sample} --output_dir=tarean_output --logfile=${log} --cleanup $paired --taxon $taxon
+      \${REPEX}/seqclust --sample ${read_sampling.sample} --output_dir=tarean_output --logfile=${log} --cleanup $paired --taxon $taxon
 
       #if $advanced_options.advanced:
       --mincl $advanced_options.size_threshold $advanced_options.keep_names $advanced_options.automatic_filtering  -D $advanced_options.blastx.options_blastx
@@ -82,7 +82,18 @@
 	       help="Input file must contain FASTA-formatted NGS reads. Illumina paired-end reads are recommended."/>
   <param name="paired" type="boolean" truevalue="--paired" falsevalue="" checked="True" label="Paired-end reads" help="If paired-end reads are used, left- and right-hand reads must be interlaced and all pairs must be complete. Example of the correct format is provided in the help below." />
  
-	<param name="sample" label="Sample size" type="integer" value="500000" min="10000"/>
+  <conditional name="read_sampling">
+    <param name="do_sampling" type="boolean" truevalue="true" falsevalue="false" checked="True" label="Use maximum processable sequence reads" />
+    <when value="true">
+      <!-- pass -->
+      <param name="sample" label="Sample size" hidden="True" type="integer" value="0" help="Number of sequence reads for clustering"/>
+    </when>
+    <when value="false">
+      <param name="sample" label="Sample size" type="integer" value="500000" min="10000" help="Number of reads for clustering"/>
+    </when>
+  </conditional>
+
+
   <param name="taxon" label="Select taxon and protein domain database version (REXdb)" type="select" help="Reference database of transposable element protein domains - REXdb - is used for annotation of repeats">
     <option value="VIRIDIPLANTAE3.0" selected="true">Viridiplantae version 3.0 </option>
     <option value="VIRIDIPLANTAE2.2" selected="true">Viridiplantae version 2.2</option>
@@ -117,11 +128,12 @@
 
       <conditional name="options">
         <param name="options" type="select" label="Similarity search options" help="Different similarity search parameters are used depending on the input data to adjust for differences in read length and error rate">
-          <option value="ILLUMINA" selected="true">Illumina reads, read length 100nt or more </option>
-          <option value="ILLUMINA_SHORT" selected="false">Illumina reads, shorter than 100nt (Do not use reads shorter than 50nt!) </option>
-          <option value="ILLUMINA_DUST_OFF" selected="false">Illumina reads, no masking of low complexity repeats  </option>
+          <option value="ILLUMINA" selected="true">Illumina reads, optimized for read length 100 nt or more (search parameters: mgblast, min PID 90, W18) </option>
+          <option value="ILLUMINA_DUST_OFF" selected="false">Illumina reads, no masking of low complexity repeats (search parameters: mgblast, min PID 90, -W18, -F F) </option>
+          <option value="ILLUMINA_SENSITIVE_MGBLAST" selected="false">Illumina reads, sensitive search (search parameters: mgblast,  min PID 80, -W8) slow, experimental feature!</option>
+          <option value="ILLUMINA_SENSITIVE_BLASTPLUS" selected="false">Illumina reads, more sensitive search (search parameters: blastn,  min PID 80, -W6) extremely slow, experimental feature!</option>
           <option value="OXFORD_NANOPORE" selected="false">
-            Pseudo short reads simulated from Oxford Nanopore data (experimental feature)
+            Pseudo short reads simulated from Oxford Nanopore data, experimental feature!
           </option>
         </param>
       </conditional>
--- a/repex_tarean.xml	Mon Feb 03 02:34:46 2020 -0500
+++ b/repex_tarean.xml	Wed Apr 22 02:29:47 2020 -0400
@@ -1,4 +1,4 @@
-<tool id="tarean" name="Tandem Repeat Analyzer"  version="2.3.7" >
+<tool id="tarean" name="Tandem Repeat Analyzer"  version="2.3.8" >
     <stdio>
       <regex match="Traceback" source="stderr" level="fatal" description="Unknown error" />
       <regex match="error" source="stderr" level="fatal" description="Unknown error" />
@@ -10,7 +10,7 @@
       <requirement type="package">imagemagick</requirement>
       <requirement type="package">mafft</requirement>
       <requirement type="package">blast</requirement>
-      <requirement type="package">diamond</requirement>
+      <requirement type="package" version="0.9.29">diamond</requirement>
       <requirement type="package">blast-legacy</requirement>
       <requirement type="package">r-igraph</requirement>
       <requirement type="package">r-data.tree</requirement>
@@ -28,14 +28,14 @@
       <requirement type="package">r-rsqlite</requirement>
       <requirement type="package">r-rserve</requirement>
       <requirement type="package">bioconductor-biostrings</requirement>
-      <requirement type="package" version="2.3.7">repex_tarean</requirement>
+      <requirement type="package" version="2.3.8">repex_tarean</requirement>
       <requirement type="set_environment">REPEX</requirement>
       <requirement type="set_environment">REPEX_VERSION</requirement>
       <requirement type="package" version="0.9.1">pyrserve</requirement>
     </requirements>
   <command detect_errors="exit_code">
     export PYTHONHASHSEED=0;
-    \${REPEX}/seqclust --paired --sample ${sample} --output_dir=tarean_output --logfile=${log} --cleanup --tarean_mode
+    \${REPEX}/seqclust --paired --sample ${read_sampling.sample} --output_dir=tarean_output --logfile=${log} --cleanup --tarean_mode
     #if $advanced_options.advanced:
       --mincl $advanced_options.size_threshold $advanced_options.keep_names $advanced_options.automatic_filtering -M $advanced_options.merging
       #if $advanced_options.custom_library.options_custom_library :
@@ -75,7 +75,18 @@
   <inputs>
 	  <param name="FastaFile" label="Paired-end Illumina reads" type="data" format="fasta"
 	         help="Input file must contain FASTA-formatted interlaced read pairs from paired-end sequencing. All pairs must be complete. Example of the input data format is provided in the help below."/>
-	  <param name="sample" label="Sample size" type="integer" value="500000" min="10000"/>
+
+	  <conditional name="read_sampling">
+      <param name="do_sampling" type="boolean" truevalue="true" falsevalue="false" checked="True" label="Use maximum processable sequence reads" />
+      <when value="true">
+        <!-- pass -->
+        <param name="sample" label="Sample size" hidden="True" type="integer" value="0" help="Number of sequence reads for clustering"/>
+      </when>
+      <when value="false">
+            <param name="sample" label="Sample size" type="integer" value="500000" min="10000" help="Number of reads for clustering"/>
+      </when>
+    </conditional>
+
 
     <conditional name="advanced_options">
       <param name="advanced" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Advanced options" />
@@ -98,9 +109,10 @@
         <param name="keep_names" label="Keep original read names" type="boolean" truevalue="--keep_names" falsevalue="" checked="false" help="By default, reads are renamed using integers. Use this option if you want to keep original names."/>
          <conditional name="options">
            <param name="options" type="select" label="Similarity search options" help="Different similarity search parameters are used depending on the input data to adjust search to differences in read length and error rate">
-             <option value="ILLUMINA" selected="true">Illumina reads, read length 100nt or more </option>
-             <option value="ILLUMINA_SHORT" selected="false">Illumina reads, shorter than 100nt (Do not use reads shorter than 50nt!) </option>
-             <option value="ILLUMINA_DUST_OFF" selected="false">Illumina reads, no masking of low complexity repeats  </option>
+             <option value="ILLUMINA" selected="true">Illumina reads, optimized for read length 100 nt or more (parameters: mgblast, min PID 90, W18) </option>
+             <option value="ILLUMINA_DUST_OFF" selected="false">Illumina reads, no masking of low complexity repeats (parameters: mgblast, min PID 90, -W18, -F F) </option>
+             <option value="ILLUMINA_SENSITIVE_MGBLAST" selected="false">Illumina reads, sensitive search (parameters: mgblast,  min PID 80, -W8) slow, experimental feature!</option>
+             <option value="ILLUMINA_SENSITIVE_BLASTPLUS" selected="false">Illumina reads, more sensitive search (parameters: blastn,  min PID 80, -W6) extremely slow, experimental feature!</option>
            </param>
          </conditional>
       </when>
--- a/tool_dependencies.xml	Mon Feb 03 02:34:46 2020 -0500
+++ b/tool_dependencies.xml	Wed Apr 22 02:29:47 2020 -0400
@@ -1,7 +1,7 @@
 <?xml version="1.0" ?>
 <tool_dependency>
-    <package name="repex_tarean" version="2.3.7">
-        <repository changeset_revision="73dd574fb04f" name="package_repex_tarean_1_0" owner="petr-novak" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu"/>
+    <package name="repex_tarean" version="2.3.8">
+        <repository changeset_revision="db7bc058aeca" name="package_repex_tarean_1_0" owner="petr-novak" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu"/>
         <readme>
       prepare repex database and scripts
     </readme>