Mercurial > repos > iuc > semibin_train
diff train.xml @ 6:3215ab8a94de draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/semibin commit 2c08a2e49a2844efe92340c5a9e9c8323e4a33d6
| author | iuc |
|---|---|
| date | Tue, 28 Oct 2025 08:20:57 +0000 |
| parents | 2344bc30a326 |
| children |
line wrap: on
line diff
--- a/train.xml Sun Aug 10 11:40:20 2025 +0000 +++ b/train.xml Tue Oct 28 08:20:57 2025 +0000 @@ -11,40 +11,37 @@ <command detect_errors="exit_code"><![CDATA[ #import re #if $mode.select == 'single' - #if $mode.input_fasta.ext.endswith(".gz") -gunzip -c '$mode.input_fasta' > 'contigs.fasta' && - #else -ln -s '$mode.input_fasta' 'contigs.fasta' && - #end if +ln -s '$mode.input_fasta' 'contigs.$input_fasta.ext' && #else #for $e in $mode.input_fasta - #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($e.element_identifier)) - #if $e.ext.endswith(".gz") -gunzip -c '$e' > '${identifier}.fasta' && - #else -ln -s '$e' '${identifier}.fasta' && - #end if + #set $identifier = re.sub('[^\s\w\-]', '_', str($e.element_identifier)) +ln -s '$e' '${identifier}.$e.ext' && #end for #end if SemiBin2 train_semi #if $mode.select == 'single' - --input-fasta 'contigs.fasta' + --input-fasta 'contigs.$input_fasta.ext' --data '$mode.data' --data-split '$mode.data_split' --cannot-link '$mode.cannot_link' #else --train-from-many + --input-fasta #for $e in $mode.input_fasta - --input-fasta '${e.element_identifier}.fasta' - #end for - #for $e in $mode.data - --data '$e' + #set $identifier = re.sub('[^\s\w\-]', '_', str($e.element_identifier)) + '${identifier}.$e.ext' #end for - #for $e in $mode.data_split - --data-split '$e' + --data + #for $e in $mode.data + '$e' #end for + --data-split + #for $e in $mode.data_split + '$e' + #end for + --cannot-link #for $e in $mode.cannot_link - --cannot-link '$e' + '$e' #end for #end if --output 'output' @@ -66,13 +63,13 @@ <option value="several">From multiple samples (train model across several samples can get better pre-trained model for single-sample binning)</option> </param> <when value="single"> - <param argument="--input-fasta" type="data" format="fasta,fasta.gz" label="Contig sequences"/> + <param argument="--input-fasta" type="data" format="fasta,fasta.gz,fasta.bz2" label="Contig sequences"/> <param argument="--data" type="data" format="csv" label="Train data"/> <param argument="--data-split" type="data" format="csv" label="Split train data"/> <param argument="--cannot-link" type="data" format="txt" label="Cannot-link constraints"/> </when> <when value="several"> - <param argument="--input-fasta" type="data" multiple="true" format="fasta,fasta.gz" label="Contig sequences"/> + <param argument="--input-fasta" type="data" multiple="true" format="fasta,fasta.gz,fasta.bz2" label="Contig sequences"/> <param argument="--data" type="data" format="csv" multiple="true" label="Train data"/> <param argument="--data-split" type="data" format="csv" multiple="true" label="Split train data"/> <param argument="--cannot-link" type="data" format="txt" multiple="true" label="Cannot-link constraints"/> @@ -112,6 +109,50 @@ </test> <test expect_num_outputs="1"> <conditional name="mode"> + <param name="select" value="single"/> + <param name="input_fasta" ftype="fasta.bz2" value="input_single.fasta.bz2"/> + <param name="data" ftype="csv" value="data.csv"/> + <param name="data_split" ftype="csv" value="data_split.csv"/> + <param name="cannot_link" ftype="txt" value="cannot.txt"/> + </conditional> + <conditional name="min_len"> + <param name="method" value="min-len"/> + <param name="min_len" value="2500" /> + </conditional> + <param name="orf_finder" value="prodigal"/> + <param name="random_seed" value="0"/> + <param name="epoches" value="1"/> + <param name="batch_size" value="2048"/> + <output name="model" ftype="h5"> + <assert_contents> + <has_size value="3119000" delta="2000" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <conditional name="mode"> + <param name="select" value="single"/> + <param name="input_fasta" ftype="fasta.gz" value="input_single.fasta.gz"/> + <param name="data" ftype="csv" value="data.csv"/> + <param name="data_split" ftype="csv" value="data_split.csv"/> + <param name="cannot_link" ftype="txt" value="cannot.txt"/> + </conditional> + <conditional name="min_len"> + <param name="method" value="min-len"/> + <param name="min_len" value="2500" /> + </conditional> + <param name="orf_finder" value="prodigal"/> + <param name="random_seed" value="0"/> + <param name="epoches" value="1"/> + <param name="batch_size" value="2048"/> + <output name="model" ftype="h5"> + <assert_contents> + <has_size value="3119000" delta="2000" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <conditional name="mode"> <param name="select" value="several"/> <param name="input_fasta" ftype="fasta" value="input_single.fasta,input_single_2.fasta,input_single_3.fasta"/> <param name="data" ftype="csv" value="data.csv,data.csv,data.csv"/>
