Mercurial > repos > iuc > colabfold_alphafold

<tool id="colabfold_alphafold" name="Colabfold Alphafold" version="@VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
    <description>Predict protein structures with Colabfold</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="biotools"/>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
    #import os
    mkdir input_data &&
    tar -xmf '$input' --strip-components 1 -C input_data &&
    mkdir output &&
    colabfold_batch
    #if str($advanced.num_recycles)!="":
        --num-recycle $advanced.num_recycles
    #end if
    #if str($advanced.recycle_early_stop_tolerance)!="":
        --recycle-early-stop-tolerance $advanced.recycle_early_stop_tolerance
    #end if
    #if $advanced.num_ensemble:
        --num-ensemble $advanced.num_ensemble
    #end if
    #if str($advanced.random_seed)!="":
        --random-seed $advanced.random_seed
    #end if
    #if str($advanced.num_seeds)!="":
        --num-seeds $advanced.num_seeds
    #end if
    #if $advanced.num_models:
        --num-models $advanced.num_models
    #end if
    $advanced.use_dropout
    --max-msa $advanced.max_msa
    #if $advanced.amber.use_amber == "yes":
        --amber
        --num-relax $advanced.amber.num_relaxed
    #end if
    $output_options.save_all
    $output_options.save_recycles
    $output_options.save_single_representations
    $output_options.save_pair_representations
    --jobname-prefix "galaxy"
    input_data
    output
    && cd output
    && mv *.a3m output.a3m
    && mkdir png_out
    && mkdir json_out
    && mkdir pdb_out
    && mv ./*.png png_out
    && mv ./*.json json_out
    && mv ./*.pdb pdb_out
    && mv json_out/config.json .
    #if  $output_options.save_all:
        && mkdir pickle_out
        && mv ./*.pickle pickle_out
    #end if
    #if  $output_options.save_pair_representations or $output_options.save_single_representations:
        && mkdir npy_out
        && mv ./*.npy npy_out
    #end if

    ]]></command>
    <inputs>
        <param name="input" type="data" format="colab.tar" label="Tar file output from colabfold MSA tool"/>
        <section name="advanced" title="Advanced options">
            <param argument="--num-recycles" label="How many recycles to run?" type="integer" optional="true" help="Number of prediction recycles. Increasing recycles can improve the prediction quality but slows down the prediction."/>
            <param argument="--recycle-early-stop-tolerance" type="float" optional="true" min="0.0" max="1.0" help="Specify convergence criteria. Run recycles until the distance between recycles is within the given tolerance value."/>
            <param argument='--num-ensemble' label="Number of ensembles" type="integer" min="1" optional="true" help="Number of ensembles. The trunk of the network is run multiple times with different random choices for the MSA cluster centers. This can result in a better prediction at the cost of longer runtime."/>
            <param argument="--random-seed" label="Set seed" type="integer" min="0" optional="true"/>
            <param argument="--num-seeds" label="Number of seeds" type="integer" min="0" optional="true" help="Number of seeds to try iterated based on random seed"/>
            <param argument="--num-models" label="Number of models to use for structure prediction" type="integer" min="1" max="5" help="Reducing the number of models speeds up the prediction but results in lower quality"/>
            <param name="max_msa" label="Max msa" type="select" help="Defines the ratio of max-seq to max-extra-seq for one run. Enable dropouts and increase the number of seeds to sample predictions from uncertainty of the model. Decrease to increase uncertainity">
                <!-- <option value="auto">auto</option> -->
                <option value="512:1024">512:1024</option>
                <option value="256:512">256:512</option>
                <option value="64:128">64:128</option>
                <option value="32:64">32:64</option>
                <option value="16:32">16:32</option>
            </param>
            <param argument="--use-dropout" label="Use dropouts" type="boolean" truevalue="--use-dropout" falsevalue="" help="Activate dropouts during inference to sample from the uncertainty of the models."/>
            <conditional name="amber">
                <param name="use_amber" label="Use AMBER" type="select" help="Use AMBER force field for structure refinement and side chain optimization">
                    <option value="yes">Use AMBER</option>
                    <option value="no">Don't use AMBER</option>
                </param>
                <when value="no"/>
                <when value="yes">
                    <param argument="--num-relaxed" label="How many top-ranked structures to relax using AMBER?" type="integer" min="0" value="0" help="Increased values may increase runtime"/>
                </when>
            </conditional>
        </section>
        <!-- Add for second version of tool for batch jobs -->
        <!-- <param name="stop_at" label="Stop score" type="float" min="0.0" optional="true" help="Compute models until pLDDT (single chain) or pTM-score (multimer) > threshold is reached. This speeds up prediction by running less models for easier queries."/> -->
        <section name="output_options" title="Output Options">
            <param argument="--save-all" type="boolean" label="Save raw outputs from model to a pickle file" truevalue="--save-all" falsevalue=""/>
            <param argument="--save-recycles" type="boolean" label="Save all intermediate predictions at each recycle iteration" truevalue="--save-recycles" falsevalue=""/>
            <param argument="--save-single-representations" type="boolean" label="Save the single representation embeddings of all models." truevalue="--save-single-representations" falsevalue=""/>
            <param argument="--save-pair-representations" type="boolean" label="Save the pair representation embeddings of all models." truevalue="--save-pair-representations" falsevalue=""/>
        </section>
    </inputs>
    <outputs>
        <collection name="png_files" type="list" format="png" label="${tool.name} on ${on_string}: Figures">
            <discover_datasets format="png" pattern="__name_and_ext__" directory="output/png_out"/>
        </collection>
        <collection name="json_files" type="list" format="json" label="${tool.name} on ${on_string}: JSON predictions">
            <discover_datasets format="json" pattern="__name_and_ext__" directory="output/json_out"/>
        </collection>
        <collection name="pdb" type="list" format="pdb" label="${tool.name} on ${on_string}: PDB predictions">
            <discover_datasets format="pdb" pattern="__name_and_ext__" directory="output/pdb_out"/>
        </collection>
        <collection name="pickle" type="list" format="pickle" label="${tool.name} on ${on_string}: Pickle file outputs">
            <discover_datasets format="pickle" pattern="__name_and_ext__" directory="output/pickle_out"/>
            <filter>output_options['save_all']</filter>
        </collection>
        <collection name="npy" type="list" format="npy" label="${tool.name} on ${on_string}: Numpy embeddings">
            <discover_datasets format="npy" pattern="__name_and_ext__" directory="output/npy_out"/>
            <filter>output_options['save_single_representations'] or output_options['save_pair_representations']</filter>
        </collection>
        <data name="a3m_out" format="a3m" from_work_dir="output/output.a3m" label="${tool.name} on ${on_string}: a3m file"/>
    </outputs>
    <tests>
        <test expect_failure="true" expect_exit_code="1">
            <param name="input" value="input.tar"/>
            <section name="advanced">
                <param name="num_recycles" value="4"/>
                <param name="recycle_early_stop_tolerance" value="0.4"/>
                <param name="num_ensemble" value="1"/>
                <param name="random_seed" value="43"/>
                <param name="num_seeds" value="2"/>
                <param name="num_models" value="2"/>
                <param name="max_msa" value="64:128"/>
                <param name="use_dropout" value="--use-dropout"/>
                <conditional name="amber">
                    <param name="use_amber" value="yes"/>
                    <param name="num_relaxed" value="0"/>
                </conditional>
            </section>
            <section name="output_options">
                <param name="save_all" value="--save-all"/>
                <param name="save_recycles" value="--save-recycles"/>
                <param name="save_single_representations" value="--save-single-representations"/>
                <param name="save_pair_representations" value="--save-pair-representations"/>
            </section>
            <assert_command>
                <has_text text="colabfold_batch --num-recycle 4 --recycle-early-stop-tolerance 0.4 --num-ensemble 1 --random-seed 43"/>
                <has_text text="--num-seeds 2 --num-models 2 --use-dropout --max-msa 64:128 --amber --num-relax 0"/>
                <has_text text="--save-all --save-recycles --save-single-representations --save-pair-representations"/>
            </assert_command>
        </test>
    </tests>
    <help><![CDATA[
        Generate run a folding step on the output of the colabfold MSA run
    ]]></help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Sun, 24 Mar 2024 22:10:50 +0000
parents
children	5f7a01f8ea28