diff colabfold_alphafold.xml @ 0:66cd8f416122 draft

planemo upload for repository https://github.com/sokrypton/ColabFold commit a95dcf30e8d801e477726fb997cbc55261731879
author iuc
date Sun, 24 Mar 2024 22:10:50 +0000
parents
children 5f7a01f8ea28
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/colabfold_alphafold.xml	Sun Mar 24 22:10:50 2024 +0000
@@ -0,0 +1,156 @@
+<tool id="colabfold_alphafold" name="Colabfold Alphafold" version="@VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
+    <description>Predict protein structures with Colabfold</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="biotools"/>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[ 
+    #import os
+    mkdir input_data &&
+    tar -xmf '$input' --strip-components 1 -C input_data &&
+    mkdir output &&
+    colabfold_batch
+    #if str($advanced.num_recycles)!="":
+        --num-recycle $advanced.num_recycles
+    #end if
+    #if str($advanced.recycle_early_stop_tolerance)!="":
+        --recycle-early-stop-tolerance $advanced.recycle_early_stop_tolerance
+    #end if
+    #if $advanced.num_ensemble:
+        --num-ensemble $advanced.num_ensemble 
+    #end if
+    #if str($advanced.random_seed)!="":
+        --random-seed $advanced.random_seed 
+    #end if
+    #if str($advanced.num_seeds)!="":
+        --num-seeds $advanced.num_seeds 
+    #end if    
+    #if $advanced.num_models:
+        --num-models $advanced.num_models
+    #end if
+    $advanced.use_dropout
+    --max-msa $advanced.max_msa
+    #if $advanced.amber.use_amber == "yes":
+        --amber
+        --num-relax $advanced.amber.num_relaxed
+    #end if
+    $output_options.save_all
+    $output_options.save_recycles
+    $output_options.save_single_representations
+    $output_options.save_pair_representations
+    --jobname-prefix "galaxy"
+    input_data
+    output
+    && cd output
+    && mv *.a3m output.a3m
+    && mkdir png_out
+    && mkdir json_out
+    && mkdir pdb_out
+    && mv ./*.png png_out
+    && mv ./*.json json_out
+    && mv ./*.pdb pdb_out
+    && mv json_out/config.json .
+    #if  $output_options.save_all:
+        && mkdir pickle_out    
+        && mv ./*.pickle pickle_out
+    #end if
+    #if  $output_options.save_pair_representations or $output_options.save_single_representations:
+        && mkdir npy_out    
+        && mv ./*.npy npy_out
+    #end if
+
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="colab.tar" label="Tar file output from colabfold MSA tool"/>
+        <section name="advanced" title="Advanced options">
+            <param argument="--num-recycles" label="How many recycles to run?" type="integer" optional="true" help="Number of prediction recycles. Increasing recycles can improve the prediction quality but slows down the prediction."/>
+            <param argument="--recycle-early-stop-tolerance" type="float" optional="true" min="0.0" max="1.0" help="Specify convergence criteria. Run recycles until the distance between recycles is within the given tolerance value."/>
+            <param argument='--num-ensemble' label="Number of ensembles" type="integer" min="1" optional="true" help="Number of ensembles. The trunk of the network is run multiple times with different random choices for the MSA cluster centers. This can result in a better prediction at the cost of longer runtime."/>
+            <param argument="--random-seed" label="Set seed" type="integer" min="0" optional="true"/>
+            <param argument="--num-seeds" label="Number of seeds" type="integer" min="0" optional="true" help="Number of seeds to try iterated based on random seed"/>
+            <param argument="--num-models" label="Number of models to use for structure prediction" type="integer" min="1" max="5" help="Reducing the number of models speeds up the prediction but results in lower quality"/>
+            <param name="max_msa" label="Max msa" type="select" help="Defines the ratio of max-seq to max-extra-seq for one run. Enable dropouts and increase the number of seeds to sample predictions from uncertainty of the model. Decrease to increase uncertainity">
+                <!-- <option value="auto">auto</option> -->
+                <option value="512:1024">512:1024</option>
+                <option value="256:512">256:512</option>
+                <option value="64:128">64:128</option>
+                <option value="32:64">32:64</option>
+                <option value="16:32">16:32</option>
+            </param>
+            <param argument="--use-dropout" label="Use dropouts" type="boolean" truevalue="--use-dropout" falsevalue="" help="Activate dropouts during inference to sample from the uncertainty of the models."/>
+            <conditional name="amber">
+                <param name="use_amber" label="Use AMBER" type="select" help="Use AMBER force field for structure refinement and side chain optimization">
+                    <option value="yes">Use AMBER</option>
+                    <option value="no">Don't use AMBER</option>
+                </param>
+                <when value="no"/>
+                <when value="yes">
+                    <param argument="--num-relaxed" label="How many top-ranked structures to relax using AMBER?" type="integer" min="0" value="0" help="Increased values may increase runtime"/>
+                </when>
+            </conditional>
+        </section>
+        <!-- Add for second version of tool for batch jobs -->
+        <!-- <param name="stop_at" label="Stop score" type="float" min="0.0" optional="true" help="Compute models until pLDDT (single chain) or pTM-score (multimer) > threshold is reached. This speeds up prediction by running less models for easier queries."/> -->
+        <section name="output_options" title="Output Options">
+            <param argument="--save-all" type="boolean" label="Save raw outputs from model to a pickle file" truevalue="--save-all" falsevalue=""/>
+            <param argument="--save-recycles" type="boolean" label="Save all intermediate predictions at each recycle iteration" truevalue="--save-recycles" falsevalue=""/>
+            <param argument="--save-single-representations" type="boolean" label="Save the single representation embeddings of all models." truevalue="--save-single-representations" falsevalue=""/>
+            <param argument="--save-pair-representations" type="boolean" label="Save the pair representation embeddings of all models." truevalue="--save-pair-representations" falsevalue=""/>
+        </section>
+    </inputs>
+    <outputs>
+        <collection name="png_files" type="list" format="png" label="${tool.name} on ${on_string}: Figures">
+            <discover_datasets format="png" pattern="__name_and_ext__" directory="output/png_out"/>
+        </collection>
+        <collection name="json_files" type="list" format="json" label="${tool.name} on ${on_string}: JSON predictions">
+            <discover_datasets format="json" pattern="__name_and_ext__" directory="output/json_out"/>
+        </collection>
+        <collection name="pdb" type="list" format="pdb" label="${tool.name} on ${on_string}: PDB predictions">
+            <discover_datasets format="pdb" pattern="__name_and_ext__" directory="output/pdb_out"/>
+        </collection>
+        <collection name="pickle" type="list" format="pickle" label="${tool.name} on ${on_string}: Pickle file outputs">
+            <discover_datasets format="pickle" pattern="__name_and_ext__" directory="output/pickle_out"/>
+            <filter>output_options['save_all']</filter>
+        </collection>
+        <collection name="npy" type="list" format="npy" label="${tool.name} on ${on_string}: Numpy embeddings">
+            <discover_datasets format="npy" pattern="__name_and_ext__" directory="output/npy_out"/>
+            <filter>output_options['save_single_representations'] or output_options['save_pair_representations']</filter>
+        </collection>
+        <data name="a3m_out" format="a3m" from_work_dir="output/output.a3m" label="${tool.name} on ${on_string}: a3m file"/>
+    </outputs>
+    <tests>
+        <test expect_failure="true" expect_exit_code="1">
+            <param name="input" value="input.tar"/>
+            <section name="advanced">
+                <param name="num_recycles" value="4"/>
+                <param name="recycle_early_stop_tolerance" value="0.4"/>
+                <param name="num_ensemble" value="1"/>
+                <param name="random_seed" value="43"/>
+                <param name="num_seeds" value="2"/>
+                <param name="num_models" value="2"/>
+                <param name="max_msa" value="64:128"/>
+                <param name="use_dropout" value="--use-dropout"/>
+                <conditional name="amber">
+                    <param name="use_amber" value="yes"/>
+                    <param name="num_relaxed" value="0"/>
+                </conditional>
+            </section>
+            <section name="output_options">
+                <param name="save_all" value="--save-all"/>
+                <param name="save_recycles" value="--save-recycles"/>
+                <param name="save_single_representations" value="--save-single-representations"/>
+                <param name="save_pair_representations" value="--save-pair-representations"/>
+            </section>
+            <assert_command>
+                <has_text text="colabfold_batch --num-recycle 4 --recycle-early-stop-tolerance 0.4 --num-ensemble 1 --random-seed 43"/>
+                <has_text text="--num-seeds 2 --num-models 2 --use-dropout --max-msa 64:128 --amber --num-relax 0"/>
+                <has_text text="--save-all --save-recycles --save-single-representations --save-pair-representations"/>
+            </assert_command>
+        </test>
+    </tests>
+    <help><![CDATA[
+        Generate run a folding step on the output of the colabfold MSA run
+    ]]></help>
+    <expand macro="citations"/>
+</tool>