view linear_genome_plot.xml @ 2:b36bdd17d99a draft

planemo upload commit 668d52b2fe1b64d0a8cf11fff57a991fdeb85b85-dirty
author cpt
date Fri, 28 Jun 2024 02:21:43 +0000
parents e923c686ead9
children b79e98299a78
line wrap: on
line source

<tool id="edu.tamu.cpt.genome_viz.linear_genome_plot" name="Linear Genome Plot" version="1.0">
    <description>Linear Genome Plot</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
    </expand>
    <command detect_errors="aggressive"><![CDATA[
python '$__tool_directory__/linear_genome_plot.py'
'$input_file'
--plot_width '$plot_width'
--common_features_excluded  "$common_features_excluded"
--features_excluded "$features_excluded"
--common_ignore_feature_labels "$common_ignore_feature_labels"
--ignored_feature_labels "$ignored_feature_labels"
--common_ignore_product_labels "$common_ignore_product_labels"
--ignore_labeling "$ignore_labeling"
--feature_label_order "$feature_label_order"
--title "$title"
$label_algo
$label_box
#if $selectregion.custom_region:
    --sz '$selectregion.start_zoom'
    --ez '$selectregion.end_zoom'
#end if
#if $multiline.multi_line:
    --multiline
    --nucl_per_line '$multiline.nucl_per_line'
#end if
#for $feature_color in $feature_colors:
    #if $feature_color.feature_color_selector.feature_color_options == "add_colors":
        --feature_id #for $feat_id in $feature_color.feature_color_selector.feature_id:
            "${feat_id}" #end for
        --feature_id_color #for $feat_col in $feature_color.feature_color_selector.feature_id_color:
            "${feat_col}" #end for
    #end if
#end for
#for $gene_color in $gene_colors:
    #if $gene_color.gene_color_selector.gene_color_options == "add_colors":
        --gene_id #for $gene_id in $gene_color.gene_color_selector.gene_id:
            "${gene_id}" #end for
        --gene_id_color #for $gene_col in $gene_color.gene_color_selector.gene_id_color:
            "${gene_col}" #end for
    #end if
#end for
--file_stats '$file_stats'
--out_img '$out_img'
    ]]></command>
    <inputs>
        <param label="Annotated Genome File (Gbk)" name="input_file" type="data" format="genbank"/>
        <param label="Plot Width" name="plot_width" type="integer" value="10" help="Width of the plot. Increase for larger genomes."/>
        <param label="Box Label" name="label_box" type="boolean" checked="true" help="Select 'no' to have no label box around feature labels" truevalue="--label_box" falsevalue=""/>
        <conditional name="selectregion">
            <param label="Select Custom Region" name="custom_region" type="boolean" checked="false" help="Plot a specific region of the genome"/>
            <when value="true">
                <param name="start_zoom" label="Start Zoom" type="integer" help="start zoom" optional="true"/>
                <param name="end_zoom" label="End Zoom" type="integer" help="end zoom" optional="true"/>
            </when>
            <when value="false">
            </when>
        </conditional>
        <conditional name="multiline">
            <param label="Multi Line Plot" name="multi_line" type="boolean" checked="false" help="Breaks up the plot into multiple lines"/>
            <when value="true">
                <param name="nucl_per_line" label="Nucleotides Per Line" type="integer" optional="true"/>
            </when>
            <when value="false">
            </when>
        </conditional>
        <param argument="common_features_excluded" label="Common Feature(s) to EXCLUDE" type="select" multiple="true" help="Common Features to be excluded from the plot">
            <option value="source" selected="true">source</option>
            <option value="gene" selected="true">gene</option>
            <option value="CDS">CDS</option>
            <option value="RBS">RBS</option>
            <option value="misc_feature">misc_feature</option>
            <option value="misc_difference">misc_difference</option>
        </param>
        <param label="Extra Feature(s) to EXCLUDE" name="features_excluded" type="text" optional="true" help="Feature(s) to exclude from plot (example: variation,misc_recomb). Separate by comma(s)."/>
        <param argument="common_ignore_feature_labels" label="Common Feature(s) label(s) to EXCLUDE" type="select" multiple="true" help="Common Feature labels to exclude from the plot">
            <option value="source" selected="true">source</option>
            <option value="gene" selected="true">gene</option>
            <option value="CDS">CDS</option>
            <option value="RBS" selected="true">RBS</option>
            <option value="misc_feature">misc_feature</option>
            <option value="misc_difference">misc_difference</option>
        </param>
        <param label="Extra Feature(s) label(s) to EXCLUDE" name="ignored_feature_labels" type="text" optional="true" help="If plotting multiple features, especially when using product names, and you want to ignore labeling certain types of features, input the feature(s) (example: variation,misc_recomb). For multiple feature labels to exclude, separate by commas."/>
        <param argument="common_ignore_product_labels" label="Common Product names to EXCLUDE from labeling" type="select" multiple="true" help="Common product labels to ignore from the plot">
            <option value="hypothetical protein" selected="true">hypothetical protein</option>
            <option value="uncharacterized protein" selected="true">uncharacterized protein</option>
            <!--<option value="phage" selected="true">phage</option> MIGHT ADD THIS LATER-->
        </param>
        <param label="Extra Product(s) name label(s) to EXCLUDE" name="ignore_labeling" type="text" optional="true" help="Product names to ignore as well as character patterns (example: hypothetical protein, JX). Separate by commas"/>
        <param label="Name ordering" help="Order of features to decide naming convention (example: locus_tag,product). Separate by commas." name="feature_label_order" type="text" value="product"/>
        <param label="Title of Graph" name="title" type="text" value="Linear Genome Plot"/>
        <param label="Label algorithm dictates label placement" name="label_algo" type="boolean" help="Some features will be displayed inside if there is sufficient space. If set to no, all labels will be placed in the box labels above the feature" checked="true" truevalue="--label_algo" falsevalue=""/>
        <repeat name="feature_colors" title="Feature Colors">
            <conditional name="feature_color_selector">
                <param name="feature_color_options" type="select" label="Feature Color Options">
                    <option value="none" selected="false"/>
                    <option value="add_colors" selected="false">Add Custom Colors</option>
                </param>
                <when value="add_colors">
                    <param name="feature_id" label="Feature Name" type="text" optional="true" multiple="true" help="Feature name. Example: CDS. Case Sensitive."/>
                    <param name="feature_id_color" label="Feature Color" type="color" optional="true" multiple="true" value="#FFFFFF">
                        <sanitizer>
                            <valid initial="string.ascii_letters,string.digits">
                                <add value="#"/>
                            </valid>
                        </sanitizer>
                    </param>
                </when>
                <when value="none">
                </when>
            </conditional>
        </repeat>
        <repeat name="gene_colors" title="Product Name Colors">
            <conditional name="gene_color_selector">
                <param name="gene_color_options" type="select" label="Product Color Options">
                    <option value="none" selected="false"/>
                    <option value="add_colors" selected="false">Add Custom Colors</option>
                </param>
                <when value="add_colors">
                    <param name="gene_id" label="Product Name" type="text" optional="true" multiple="true" help="Product Name. Example: holin. Case Sensitive."/>
                    <param name="gene_id_color" label="Product Color" type="color" optional="true" multiple="true" value="#D3D3D3">
                        <sanitizer>
                            <valid initial="string.ascii_letters,string.digits">
                                <add value="#"/>
                            </valid>
                        </sanitizer>
                    </param>
                </when>
                <when value="none">
                </when>
            </conditional>
        </repeat>
    </inputs>
    <outputs>
        <data format="text" name="file_stats" label="out_stats.txt"/>
        <data format="svg" name="out_img" label="genome_plot.svg"/>
    </outputs>
    <tests>
        <test>
            <!-- Testing zoom and 95% or > sim -->
            <param name="input_file" value="mu_reanno.gb"/>
            <param name="plot_width" value="10"/>
            <param name="common_features_excluded" value="source,misc_feature"/>
            <param name="features_excluded" value="misc_difference,gene"/>
            <param name="common_ignore_feature_labels" value="RBS"/>
            <param name="feature_label_order" value="product"/>
            <param name="title" value="Mu Test label"/>
            <param name="label algo" value="true"/>
            <param name="common_ignore_product_labels" value="uncharacterized protein,hypothetical protein"/>
            <conditional name="selectregion">
                <param name="custom_region" value="true"/>
                <param name="start_zoom" value="7000"/>
                <param name="end_zoom" value="12000"/>
            </conditional>
            <output name="file_stats" file="out_stats_zoom.txt"/>
            <output name="out_img" file="out_img_zoom.svg" compare="sim_size" delta_frac="0.05"/>
        </test>
        <test>
            <!-- Test nonzoom and extra params -->
            <param name="input_file" value="mu_reanno.gb"/>
            <param name="plot_width" value="100"/>
            <param name="common_features_excluded" value="source,misc_feature"/>
            <param name="features_excluded" value="misc_difference,gene"/>
            <param name="common_ignore_feature_labels" value="RBS"/>
            <param name="feature_label_order" value="product"/>
            <param name="title" value="Mu Test label"/>
            <param name="label algo" value="true"/>
            <param name="common_ignore_product_labels" value="uncharacterized protein,hypothetical protein"/>
            <param name="ignore_labeling" value="testing_ignore_labeling,another_test"/>
            <param name="ignored_feature_labels" value="testing_ignored_feature_labels,another_test"/>
            <assert_command>
                <has_text text="--ignore_labeling &quot;testing_ignore_labeling,another_test&quot; "/>
                <has_text text="--ignored_feature_labels &quot;testing_ignored_feature_labels,another_test&quot; "/>
            </assert_command>
            <output name="file_stats" file="out_stats.txt"/>
            <output name="out_img" file="out_img.svg" compare="sim_size" delta_frac="0.05"/>
        </test>
        <test>
            <!-- Test for multiline plot and plotting custom colors -->
            <param name="input_file" value="mu_reanno.gb"/>
            <param name="plot_width" value="100"/>
            <param name="common_features_excluded" value="source,misc_feature"/>
            <param name="features_excluded" value="misc_difference,gene"/>
            <param name="common_ignore_feature_labels" value="RBS"/>
            <param name="feature_label_order" value="product"/>
            <param name="title" value="Mu Test label"/>
            <param name="label algo" value="true"/>
            <param name="common_ignore_product_labels" value="uncharacterized protein,hypothetical protein"/>
            <conditional name="selectregion">
                <param name="custom_region" value="true"/>
                <param name="start_zoom" value="7000"/>
                <param name="end_zoom" value="12000"/>
            </conditional>
            <conditional name="multiline">
                <param name="multi_line" value="true"/>
                <param name="nucl_per_line" value="1200"/>
            </conditional>
            <repeat name="feature_colors">
                <conditional name="feature_color_selector">
                    <param name="feature_color_options" value="add_colors"/>
                    <param name="feature_id" value="RBS"/>
                    <param name="feature_id_color" value="#CD5C5C"/>
                </conditional>
            </repeat>
            <repeat name="feature_colors">
                <conditional name="feature_color_selector">
                    <param name="feature_color_options" value="add_colors"/>
                    <param name="feature_id" value="CDS"/>
                    <param name="feature_id_color" value="#FFFFFF"/>
                </conditional>
            </repeat>
            <repeat name="gene_colors">
                <conditional name="gene_color_selector">
                    <param name="gene_color_options" value="add_colors"/>
                    <param name="gene_id" value="spanin"/>
                    <param name="gene_id_color" value="#3355FF"/>
                </conditional>
            </repeat>
            <repeat name="gene_colors">
                <conditional name="gene_color_selector">
                    <param name="gene_color_options" value="add_colors"/>
                    <param name="gene_id" value="SAR endolysin"/>
                    <param name="gene_id_color" value="#33FFB2"/>
                </conditional>
            </repeat>
            <output name="file_stats" file="out_stats_multi.txt"/>
            <output name="out_img" file="out_img_multi.svg" compare="sim_size" delta_frac="0.05"/>
            <assert_command>
                <has_text text="--multiline"/>
                <has_text text="--nucl_per_line 1200"/>
                <has_text text="--gene_id &quot;SAR endolysin&quot;"/>
                <has_text text="--gene_id_color &quot;#33FFB2&quot;"/>
            </assert_command>
        </test>
    </tests>
    <help><![CDATA[
.. class:: warningmark

**TIP** This tool requires *genbank* format.

-----

Genome Plot

*Description of some of the various parameters:*

* Features to EXCLUDE :: The **"Common Feature(s) to EXCLUDE"** select menu has frequent features that are excluded. **"Extra Feature(s) to EXCLUDE"** is where you can pass one, or multiple, feature(s) to be excluded from the plot. Separate by commas with NO spaces.

* Feature(s) label(s) to EXCLUDE :: The **"Common Feature(s) label(s) to EXCLUDE from labeling"** select menu has frequent labels from features that are excluded. Input specific features that you do not want to include in the **"Extra Feature(s) label(s) to EXCLUDE"**. Use this when you still want to plot the feature but not label it. A good sample case is if you are zoomed into a specific region and looking at overlapping genes but do not want to label all of the RBS sites. 

* Products(s) names label(s) to EXCLUDE :: The **"Common Product names to EXCLUDE from labeling"** select menu has frequent product names that are excluded. Input specific names that you do not want to include in the **"Extra Product(s) name label(s) to EXCLUDE"**. Any product with this name will NOT be labeled. It must be spelt, spaced, and capitalized exactly as it is within the file to be caught. However, if you wish to exclude labels with common patterns, say JX0101.orf01, you could pass "JX" to the input box and it will skip all names containing JX. Realize that passing something like protein will also eliminate labels such as DNA binding protein. Of note, if you choose to label by locus_tags, and want to skip over each, for example, hypothetical protein; the script will still pass their label and not label them.

* Name ordering :: In case you want to customize the selection method of labeling genes by a specific feature, use this argument. "product" will use the product names from within the file. If you want to use product, and if there is no name, use locus_tag as the next name, pass the following: product,locus_tag.

* Label algorithm dictates label placement :: DNA-features (the orginal python package) has a very nice spacing algorithm for deciding how to space and where to put the label. Selecting no will force ALL features to placed outside of their gene box. If more tuning is desired, a custom pixel control can be implemented as an argument (not currently implemented) to allow more user control for label placement within gene boxes.

* Select Custom Region :: If you would like to zoom in and look at a specific region of select a start and end site to zoom in on. Values are based on Nucleotides.

* Multiline :: Permits plotting of the genome across multiple lines. Select the amount of nucleotides per line you would like in the **"Nucleotides Per Line"** field. Smaller widths (which are recommended for multiline plots) and larger nucleotides per line will have faster compute times. This can also be combined with the custom region parameters.

* Feature Colors :: Customizing Feature colors based on their exact name as listed in the file (check the output stats file for spelling).

* Product Name Colors :: Customizing Product colors based on their exact name as listed in the file (check the output stats file for spelling). **NOW**, in addition to the exact name, general words can be used to grab similiarly named features. For example, if the genome has a two component spanin system (i-spanin + o-spanin), using "spanin" would change the color for both the i-spanin and o-spanin.

**Output**

* file_stats will output the different product names as well as the count of each respective feature
* svg output

**Output Example**

Using the zoom function and custom colors for various product names

.. image:: $PATH_TO_IMAGES/sample.png

    ]]></help>
    <citations>
        <citation type="doi">https://doi.org/10.1101/2020.01.09.900589</citation>
        <citation type="bibtex">
            @unpublished{galaxyTools,
            author = {C. Ross},
            title = {CPT Galaxy Tools},
            year = {2020-},
            note = {https://github.com/tamu-cpt/galaxy-tools/}
            }
        </citation>
    </citations>
</tool>