Mercurial > repos > jay > pdaug_word_vector_descriptor

<tool id="pdaug_word_vector_descriptor" name="PDAUG Word Vector Descriptor" version="0.1.0" python_template_version="3.7">
    <description>Calculates word vector descriptor</description>

    <requirements>
      <requirement type="package" version="3.5">nltk</requirement>
      <requirement type="package" version="1.0.3">pandas</requirement>
      <requirement type="package" version="1.76">biopython</requirement>
      <requirement type="package" version="3.8.0">gensim</requirement>
    </requirements>

    <command detect_errors="exit_code"><![CDATA[

       python '$__tool_directory__/PDAUG_Word_Vector_Descriptor.py' -I '$Input' -M '$ModelInput' -R '$row' -P '$positive' -N '$negative' -O '$output'

    ]]></command>

    <inputs>
      <param name="ModelInput" type="data" label="Input model file" value="model.txt" format="txt" argument= "--ModelInput" help="Input word Vector model file"/>
      <param name="Input" type="data" label="Training data" format="fasta" argument= "--InputFasta" help="Input tabular data file "/>
      <param name="row" type="integer" label="Number of traning samples" value="200" argument= "--row" help="Number of positive and negative samples"/>
      <param name="positive" type="integer" label="Positives samples"  value="100" argument= "--positive" help="Number of positive samples"/>
      <param name="negative" type="integer" label="Negative samples" value="100" argument= "--negative" help="Number of ngative samples"/>
    </inputs>

    <outputs>
        <data name='output' format='tabular' label="${tool.name} on $on_string - (tabular)" />
    </outputs>

    <tests>
      <test>
        <param name="Input" value="test.fasta" />
        <param name="ModelInput" value="model.txt" />
        <param name="row" value="276" />
        <param name="positive" value="138" />
        <param name="negative" value="138" />
        <output name="output" value="Out.tsv" />
      </test>
    </tests>

    <help><![CDATA[
.. class:: infomark

**What it does**

Calculates word vector based descriptors derived from the Skip-gram model.

-----

**Inputs**
    * **--InputFasta** A fasta file with negative and positive samples.
    * **--ModelInput** Skip-gram model as ".txt" file generated from "Word_Vector_Model".
    * **--row** Number of training samples.
    * **--positive** Positive samples.
    * **--negative** Negative samples.

-----

**Outputs**
    * **--OutFile** Tabular file with descriptor data.]]></help>
<citations>

  <citation type="bibtex">
    @misc{PDAUGGITHUB,
      author = {Joshi, Jayadev  and Blankenberg, Daniel},
      year = {2020},
      title ={PDAUG - a Galaxy based toolset for peptide library analysis, visualization, and machine learning modeling},
      publisher = {GitHub},
      journal = {GitHub repository},
      url =
      {https://github.com/jaidevjoshi83/pdaug.git},
      }
  </citation>

  <citation type="bibtex">
  @inproceedings{rehurek_lrec,
      title = {{Software Framework for Topic Modelling with Large Corpora}},
      author = {Radim {\v R}eh{\r u}{\v r}ek and Petr Sojka},
      booktitle = {{Proceedings of the LREC 2010 Workshop on New
           Challenges for NLP Frameworks}},
      pages = {45--50},
      year = 2010,
      month = May,
      day = 22,
      publisher = {ELRA},
      address = {Valletta, Malta},
      url={http://is.muni.cz/publication/884893/en},
      language={English}
    }
  </citation>

  <citation type="bibtex">
    @article{Md_Nafiz,
      title= {Identifying antimicrobial peptides using word embedding with deep recurrent neural networks},
      volume={35},
      DOI={https://doi.org/10.1093/bioinformatics/bty937},
      issue={12},
      year={2018},
      pages={2009-2016},
      journal={Europe PMC},
      author={Hamid, Md-Nafiz and  Friedberg,  Iddo}
    }

  </citation>

</citations>
</tool>
author	jay
date	Sun, 09 Jan 2022 04:53:41 +0000
parents	a3a1d9bea1ad
children