Mercurial > repos > iuc > semibin_train
view train.xml @ 2:8b77643cf479 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit 13abac83068b126399ec415141007a48c2efaa84
author | iuc |
---|---|
date | Fri, 10 Nov 2023 20:49:30 +0000 |
parents | 43df7a4c33a2 |
children |
line wrap: on
line source
<tool id="semibin_train" name="SemiBin: Train" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description> the semi-supervised deep learning model </description> <macros> <import>macros.xml</import> </macros> <expand macro="biotools"/> <expand macro="requirements"/> <expand macro="version"/> <command detect_errors="exit_code"><![CDATA[ #import re #if $mode.select == 'single' #if $mode.input_fasta.ext.endswith(".gz") gunzip -c '$mode.input_fasta' > 'contigs.fasta' && #else ln -s '$mode.input_fasta' 'contigs.fasta' && #end if #else #for $e in $mode.input_fasta #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($e.element_identifier)) #if $e.ext.endswith(".gz") gunzip -c '$e' > '${identifier}.fasta' && #else ln -s '$e' '${identifier}.fasta' && #end if #end for #end if SemiBin2 train_semi #if $mode.select == 'single' --input-fasta 'contigs.fasta' --data '$mode.data' --data-split '$mode.data_split' --cannot-link '$mode.cannot_link' #else --train-from-many #for $e in $mode.input_fasta --input-fasta '${e.element_identifier}.fasta' #end for #for $e in $mode.data --data '$e' #end for #for $e in $mode.data_split --data-split '$e' #end for #for $e in $mode.cannot_link --cannot-link '$e' #end for #end if --output 'output' --threads \${GALAXY_SLOTS:-1} --epoches $epoches --batch-size $batch_size --random-seed $random_seed #if $min_len.method == 'min-len' --min-len $min_len.min_len #else if $min_len.method == 'ratio' --ratio $min_len.ratio #end if --orf-finder '$orf_finder' ]]></command> <inputs> <conditional name="mode"> <param argument="select" type="select" label="Mode to train the models"> <option value="single" selected="true">From one sample</option> <option value="several">From multiple samples (train model across several samples can get better pre-trained model for single-sample binning)</option> </param> <when value="single"> <param argument="--input-fasta" type="data" format="fasta,fasta.gz" label="Contig sequences"/> <param argument="--data" type="data" format="csv" label="Train data"/> <param argument="--data-split" type="data" format="csv" label="Split train data"/> <param argument="--cannot-link" type="data" format="txt" label="Cannot-link constraints"/> </when> <when value="several"> <param argument="--input-fasta" type="data" multiple="true" format="fasta,fasta.gz" label="Contig sequences"/> <param argument="--data" type="data" format="csv" multiple="true" label="Train data"/> <param argument="--data-split" type="data" format="csv" multiple="true" label="Split train data"/> <param argument="--cannot-link" type="data" format="txt" multiple="true" label="Cannot-link constraints"/> </when> </conditional> <expand macro="min_len"/> <expand macro="orf-finder"/> <expand macro="random-seed"/> <expand macro="epoches"/> <expand macro="batch-size"/> </inputs> <outputs> <expand macro="train_output"/> </outputs> <tests> <test expect_num_outputs="1"> <conditional name="mode"> <param name="select" value="single"/> <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> <param name="data" ftype="csv" value="data.csv"/> <param name="data_split" ftype="csv" value="data_split.csv"/> <param name="cannot_link" ftype="txt" value="cannot.txt"/> </conditional> <conditional name="min_len"> <param name="method" value="min-len"/> <param name="min_len" value="2500" /> </conditional> <param name="orf_finder" value="prodigal"/> <param name="random-seed" value="0"/> <param name="epoches" value="1"/> <param name="batch_size" value="2048"/> <output name="model" ftype="h5"> <assert_contents> <has_size value="3119000" delta="2000" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="mode"> <param name="select" value="several"/> <param name="input_fasta" ftype="fasta" value="input_single.fasta,input_single_2.fasta,input_single_3.fasta"/> <param name="data" ftype="csv" value="data.csv,data.csv,data.csv"/> <param name="data_split" ftype="csv" value="data_split.csv,data_split.csv,data_split.csv"/> <param name="cannot_link" ftype="txt" value="cannot.txt,cannot.txt,cannot.txt"/> </conditional> <conditional name="min_len"> <param name="method" value="ratio"/> <param name="ratio" value="0.05" /> </conditional> <param name="orf_finder" value="prodigal"/> <param name="random-seed" value="0"/> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> <output name="model" ftype="h5"> <assert_contents> <has_size value="3119000" delta="2000" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ @HELP_HEADER@ This tool trains the semi-supervised deep learning model Inputs ====== @HELP_INPUT_FASTA@ @HELP_CANNOT@ @HELP_DATA@ Outputs ======= @HELP_MODEL@ ]]></help> <expand macro="citations"/> </tool>