view osra.xml @ 1:0916781bc971 draft

Uploaded
author bgruening
date Sun, 20 Apr 2014 09:03:07 -0400
parents 34ae5f2ae450
children d2490712b67d
line wrap: on
line source

<tool id="ctb_osra" name="Molecule recognition" version="0.3">
    <description>in Images or PDF documents (OSRA)</description>
    <requirements>
        <requirement type="package" version="2.0.0">osra</requirement>
        <requirement type="package" version="2.3.2">openbabel</requirement>
        <requirement type="package" version="1.3.18">graphicsmagick</requirement>
    </requirements>
    <command interpreter='python'>
        ## OSRA_DATA_FILES is set during the toolshed Installation
        ## if it is not set, use the standard configuration and hope the best
        osra.py -f $oformat $infile 
        -l \$OSRA_DATA_FILES/spelling.txt -a \$OSRA_DATA_FILES/superatom.txt
        
        ## further additions of OSRA parameter should go after -l and -a
        ## because -l and -a can be removed by the python wrapper
        
        $confidence
        $adaptive
        $thinning
        
        > $outfile
    </command>
    <inputs>
        <param name="infile" type="data" format="png,pdf" label="Image or PDF with molecules"/>
        <param name="oformat" type="select" label="Output molecule format">
            <option value="can">SMILES</option>
            <option value="sdf">SDF</option>
        </param>
        <param name="confidence" type="boolean" label="Print out confidence estimate (-p)" truevalue="-p" falsevalue="" checked="true" />
        <param name="adaptive" type="boolean" label="Adaptive thresholding pre-processing, useful for low light/low contrast images (-i)" truevalue="-i" falsevalue="" checked="false" />
        <param name="thinning" type="boolean" label="Additional thinning/scaling down of low quality documents (-j)" truevalue="-j" falsevalue="" checked="false" />

    </inputs>
    <outputs>
        <data name="outfile" type="data" format="sdf">
            <change_format>
                <when input="oformat" value="can" format="smi"/>
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="infile" ftype="png" value="CID_2244.png"/>
            <param name="oformat" value="sdf"/>
            <output name="outfile" ftype="sdf" file="osra_on_CID2244.sdf"/>
        </test>
        <test>
            <param name="infile" ftype="png" value="2008001635_153_chem.png"/>
            <param name="oformat" value="can"/>
            <output name="outfile" ftype="sdf" file="2008001635_153_chem.smi"/>
        </test>
        
    </tests>
    <help>

.. class:: infomark

**What this tool does**

OSRA_ (Optical Structure Recognition Application) is a utility designed to convert graphical representations of chemical structures into SMILES or SDF. It generates the SMILES or SDF representation of any molecular structure image within a document which is parseable by GraphicMagick.

.. _OSRA: http://cactus.nci.nih.gov/osra/

-----

.. class:: infomark

**Cite**

Igor V Filippov and Marc C Nicklaus - `Optical Structure Recognition Software To Recover Chemical Information: OSRA, An Open Source Solution`_

.. _`Optical Structure Recognition Software To Recover Chemical Information: OSRA, An Open Source Solution`: http://pubs.acs.org/doi/abs/10.1021/ci800067r
    </help>
</tool>