Mercurial > repos > iuc > semibin_train
diff train.xml @ 0:08cc1cd58f38 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
author | iuc |
---|---|
date | Fri, 14 Oct 2022 21:42:14 +0000 |
parents | |
children | 43df7a4c33a2 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/train.xml Fri Oct 14 21:42:14 2022 +0000 @@ -0,0 +1,154 @@ +<tool id="semibin_train" name="SemiBin: Train" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description> + the semi-supervised deep learning model + </description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="biotools"/> + <expand macro="requirements"/> + <expand macro="version"/> + <command detect_errors="exit_code"><![CDATA[ +#import re +#if $mode.select == 'single' + #if $mode.input_fasta.ext.endswith(".gz") +gunzip -c '$mode.input_fasta' > 'contigs.fasta' && + #else +ln -s '$mode.input_fasta' 'contigs.fasta' && + #end if +#else + #for $e in $mode.input_fasta + #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($e.element_identifier)) + #if $e.ext.endswith(".gz") +gunzip -c '$e' > '${identifier}.fasta' && + #else +ln -s '$e' '${identifier}.fasta' && + #end if + #end for +#end if +SemiBin train + --mode '$mode.select' +#if $mode.select == 'single' + --input-fasta 'contigs.fasta' + --data '$mode.data' + --data-split '$mode.data_split' + --cannot-link '$mode.cannot_link' +#else + #for $e in $mode.input_fasta + --input-fasta '${e.element_identifier}.fasta' + #end for + #for $e in $mode.data + --data '$e' + #end for + #for $e in $mode.data_split + --data-split '$e' + #end for + #for $e in $mode.cannot_link + --cannot-link '$e' + #end for +#end if + --output 'output' + --threads \${GALAXY_SLOTS:-1} + --epoches $epoches + --batch-size $batch_size + --random-seed $random_seed +#if $min_len.method == 'min-len' + --min-len $min_len.min_len +#else if $min_len.method == 'ratio' + --ratio $min_len.ratio +#end if + --orf-finder '$orf_finder' + ]]></command> + <inputs> + <conditional name="mode"> + <param argument="select" type="select" label="Mode to train the models"> + <option value="single" selected="true">From one sample</option> + <option value="several">From multiple samples (train model across several samples can get better pre-trained model for single-sample binning)</option> + </param> + <when value="single"> + <param argument="--input-fasta" type="data" format="fasta,fasta.gz" label="Contig sequences"/> + <param argument="--data" type="data" format="csv" label="Train data"/> + <param argument="--data-split" type="data" format="csv" label="Split train data"/> + <param argument="--cannot-link" type="data" format="txt" label="Cannot-link constraints"/> + </when> + <when value="several"> + <param argument="--input-fasta" type="data" multiple="true" format="fasta,fasta.gz" label="Contig sequences"/> + <param argument="--data" type="data" format="csv" multiple="true" label="Train data"/> + <param argument="--data-split" type="data" format="csv" multiple="true" label="Split train data"/> + <param argument="--cannot-link" type="data" format="txt" multiple="true" label="Cannot-link constraints"/> + </when> + </conditional> + <expand macro="min_len"/> + <expand macro="orf-finder"/> + <expand macro="random-seed"/> + <expand macro="epoches"/> + <expand macro="batch-size"/> + </inputs> + <outputs> + <expand macro="train_output"/> + </outputs> + <tests> + <test expect_num_outputs="1"> + <conditional name="mode"> + <param name="select" value="single"/> + <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> + <param name="data" ftype="csv" value="data.csv"/> + <param name="data_split" ftype="csv" value="data_split.csv"/> + <param name="cannot_link" ftype="txt" value="cannot.txt"/> + </conditional> + <conditional name="min_len"> + <param name="method" value="min-len"/> + <param name="min_len" value="2500" /> + </conditional> + <param name="orf_finder" value="prodigal"/> + <param name="random-seed" value="0"/> + <param name="epoches" value="1"/> + <param name="batch_size" value="2048"/> + <output name="model" ftype="h5"> + <assert_contents> + <has_size value="3119000" delta="1000" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <conditional name="mode"> + <param name="select" value="several"/> + <param name="input_fasta" ftype="fasta" value="input_single.fasta,input_single_2.fasta,input_single_3.fasta"/> + <param name="data" ftype="csv" value="data.csv,data.csv,data.csv"/> + <param name="data_split" ftype="csv" value="data_split.csv,data_split.csv,data_split.csv"/> + <param name="cannot_link" ftype="txt" value="cannot.txt,cannot.txt,cannot.txt"/> + </conditional> + <conditional name="min_len"> + <param name="method" value="ratio"/> + <param name="ratio" value="0.05" /> + </conditional> + <param name="orf_finder" value="prodigal"/> + <param name="random-seed" value="0"/> + <param name="epoches" value="20"/> + <param name="batch_size" value="2048"/> + <output name="model" ftype="h5"> + <assert_contents> + <has_size value="3119000" delta="1000" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +@HELP_HEADER@ + +This tool trains the semi-supervised deep learning model + +Inputs +====== + +@HELP_INPUT_FASTA@ +@HELP_CANNOT@ +@HELP_DATA@ + +Outputs +======= + +@HELP_MODEL@ + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file