view discosnp_RAD.xml @ 3:2af5a5ca9c87 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/colibread commit 0ed02865f048ad7471ea79509c0e23162bc29166"
author iuc
date Mon, 17 Aug 2020 10:37:37 -0400
parents 9ee3bf979757
children 923df36f6592
line wrap: on
line source

<?xml version='1.0' encoding='utf-8'?>
<tool profile="16.04" id="discosnp_rad" name="DiscoSnpRAD" version="@DISCOSNP_VERSION@">
    <description>discovering polymorphism from raw unassembled RADSeq NGS reads.</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="discosnp_requirements" />
    <command><![CDATA[
        ## simple option
        #if str( $input_type_options.input_type) == "single"
            @discosnp_single_reads@
        ## paired option
        #else if str( $input_type_options.input_type) == "paired"
            @discosnp_paired_reads@
        ## mix option
        #else
            @discosnp_single_reads@
            @discosnp_paired_reads@
        #end if

        ## get the absolute path for short_reads_connector
        CONNECTOR_PATH=`which short_read_connector.sh` &&
        CONNECTOR_DIR=`dirname \$CONNECTOR_PATH` &&

        run_discoSnpRad.sh
        @discosnp_inputs@
        -S "\$CONNECTOR_DIR"
        --max_size_cluster ${max_size_cluster}
        -a ${a}
        ${high_precision}
        --max_missing ${max_missing}
        --min_rank ${min_rank}

        --max_threads \${GALAXY_SLOTS:-1}

  ]]></command>

  <inputs>

      <expand macro="discosnp_inputs" />

      <param argument="--high_precision" type="boolean" checked="false" truevalue="--high_precision" falsevalue="" label="Lower recall / higher precision mode." help="With this parameter no symmetrical crossroads may be traversed during bubble detection (by default up to 5 symmetrical crossroads may be traversed during bubble detection)." />

      <param argument="-a" type="integer" label="Maximal size of ambiguity of INDELs" value="20" help="INDELS whose ambiguity is higher than this value are not output"/>

      <param argument="--max_size_cluster" type="integer" label="Discards cluster of variants containing more than this number of variants." value="150"/>

      <param argument="--max_missing" type="float" label="Remove variants with more than this proportion of missing values" value="0.95" help="Default 0.95, removes variants detected in 5% and less populations"/>

      <param argument="--min_rank" type="float" label="Remove variants whose rank is smaller than this threshold" value="0.4"/>

      <expand macro="discosnp_options" />

  </inputs>

  <outputs>
      <data name="vcf" from_work_dir="discoRad_*_clustered.vcf" format="vcf" label="VCF with ${tool.name} on $on_string"/>
      <data name="fasta" from_work_dir="discoRad_*_raw.fa" format="fasta" label="Fasta with ${tool.name} on $on_string"/>
  </outputs>

  <tests>
      <test>
          <conditional name="input_type_options">
              <param name="input_type" value="single"/>
              <param name="list_reads" value="discosnpRAD/loci_reads" ftype="fasta" />
          </conditional>
          <param name="D" value="0"/>
          <param name="P" value="4"/>
          <param name="k" value="31"/>
          <param name="b" value="2"/>
          <param name="max_size_cluster" value="50"/>
          <param name="max_missing" value="0.95"/>
          <param name="min_rank" value="0.0"/>
          <output name="fasta" file="discosnpRAD/fasta.fa"/>
          <output name="vcf">
              <assert_contents>
                  <has_n_lines n="31"/>
                  <has_text text="Ty=SNP;Rk=0.0;UL=23;UR=39;CL=23;CR=39;Genome=.;Sd=.;Cluster=0;ClSize=2"/>
                  <has_text text="0/1:84:604,15,604:42,42:0,0"/>
              </assert_contents>
          </output>
          <assert_command>
              <has_text text="--max_size_cluster 50"/>
              <has_text text="--max_missing 0.95"/>
              <has_text text="--min_rank 0.0"/>
          </assert_command>
      </test>
  </tests>

  <help><![CDATA[

**Description**

Software discoSnp is designed for discovering Single Nucleotide Polymorphism (SNP) from raw set(s) of reads obtained with Next Generation Sequencers (NGS).

DiscoSnpRad uses options specific to RAD-Seq: branching strategy, kind of extensions, abundance threshold, and kind of bubbles to be found.

Moreover, it clusters variants per locus by calling the `discoRAD_finalization.sh` pipeline. Cluster information is  reported in the final provided VCF file.

Note that from release of DiscoSnp++-2.0.6, the tool also detects close SNPs and indels.

@discosnp_help@


  ]]></help>
    <expand macro="citations">
        <expand macro="discosnp_citation" />
    </expand>
</tool>