Mercurial > repos > jay > pdaug_word_vector_model
view PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.xml @ 5:c6a1b09d8846 draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit ac4353ca5c0ac9ce60df9f4bf160ed08b99fbee3"
author | jay |
---|---|
date | Thu, 28 Jan 2021 04:16:01 +0000 |
parents | 3ce435b8d648 |
children | d739a3bc7e39 |
line wrap: on
line source
<tool id="pdaug_word_vector_model" name="PDAUG Word Vector Model" python_template_version="3.7" version="0.1.0"> <description>Generates the skip-gram model</description> <requirements> <requirement type="package" version="1.0.3">pandas</requirement> <requirement type="package" version="1.76">biopython</requirement> <requirement type="package" version="3.5">nltk</requirement> <requirement type="package" version="3.8.0">gensim</requirement> <requirement type="package" version="0.23.1">scikit-learn</requirement> <requirement type="package" version="1.18.4">numpy</requirement> </requirements> <stdio> <exit_code range="1" level="fatal" /> </stdio> <command detect_errors="exit_code"><![CDATA[ python '$__tool_directory__/PDAUG_Word_Vector_Model.py' -I '$input' -M '$meanCount' -W '$window' -O '$OutFile' --SG '$sg' ]]></command> <inputs> <param name="input" type="data" label="Input fasta file" format="fasta" argument= "--Input" help="Input fasta file with peptides"/> <param name="meanCount" type="integer" label="Mean Count" value="0" format="fasta" argument= "--min_count" help="Ignores a all words with total frequency lower than this"/> <param name="window" type="integer" label="window" value="5" argument="--window" help="Maximum distance between the current and predicted word within a sentence"/> <param name="sg" type="select" label="Select algorithm" help="Training algorithm skip-gram or CBOW."> <option value="skip-gram">Skip-gram</option> <option value="CBOW" >CBOW</option> </param> </inputs> <outputs> <data name='OutFile' format='txt' label="${tool.name} on $on_string - (text)" /> </outputs> <tests> <test> <param name="input" value="test.fasta"/> <param name="meanCount" value="0"/> <param name="window" value="5"/> <output name="OutFile" value="model.txt" lines_diff="2268" /> </test> </tests> <help><![CDATA[ .. class:: infomark **What it does** This tool calculates the skip-gram model which is a neural network where the inputs and outputs of the network are one-hot vectors calculated based on training data that contains input word and output word. ----- **Inputs** * **--Input** Fasta file with protein sequences. * **--min_count** Ignores all words with total frequency lower than this * **--window** Maximum distance between the current and predicted word within a sentence, accepts integer value. * **--SG** Select training algorithm skip-gram or CBOW. ----- **Outputs** * **--OutFile** Return "model.txt" model file. ]]></help> <citations> <citation type="bibtex"> @misc{PDAUGGITHUB, author = {Joshi, Jayadev and Blankenberg, Daniel}, year = {2020}, title ={PDAUG - a Galaxy based toolset for peptide library analysis, visualization, and machine learning modeling}, publisher = {GitHub}, journal = {GitHub repository}, url = {https://github.com/jaidevjoshi83/pdaug.git}, } </citation> <citation type="bibtex"> @inproceedings{rehurek_lrec, title = {{Software Framework for Topic Modelling with Large Corpora}}, author = {Radim {\v R}eh{\r u}{\v r}ek and Petr Sojka}, booktitle = {{Proceedings of the LREC 2010 Workshop on New Challenges for NLP Frameworks}}, pages = {45--50}, year = 2010, month = May, day = 22, publisher = {ELRA}, address = {Valletta, Malta}, url={http://is.muni.cz/publication/884893/en}, language={English} } </citation> <citation type="bibtex"> @article{Md_Nafiz, title= {Identifying antimicrobial peptides using word embedding with deep recurrent neural networks}, volume={35}, DOI={https://doi.org/10.1093/bioinformatics/bty937}, issue={12}, year={2018}, pages={2009-2016}, journal={Europe PMC}, author={Hamid, Md-Nafiz and Friedberg, Iddo} } </citation> </citations> </tool>