view SideCompoundsScan.xml @ 2:cb9942e68c8c draft default tip

planemo upload for repository https://forge.inrae.fr/metexplore/met4j-galaxy commit 71335d4a636672d264cc4f443c07856907571928
author metexplore
date Fri, 21 Nov 2025 10:24:06 +0000
parents 97e6505bf8da
children
line wrap: on
line source

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<tool id="met4j_SideCompoundsScan" name="SideCompoundsScan" version="@TOOL_VERSION@">
  <description>Scan a network to identify side compounds.</description>
  <macros>
    <import>macros.xml</import>
  </macros>
  <expand macro="bio_tools"/>
  <expand macro="requirements"/>
  <command detect_errors="exit_code"><![CDATA[met4j networkAnalysis.SideCompoundsScan -i "$inputPath"
 $sideOnly
 $noReportValue
#if str($degree):
 -d "$degree"
#end if
#if str($degreePrecentile) != 'nan':
 -dp "$degreePrecentile"
#end if
 $flagInorganic
 $flagNoFormula
#if str($parallelEdge) != 'nan':
 -nc "$parallelEdge"
#end if
#if str($mergingStrat):
 -m "$mergingStrat"
#end if
 -o "$outputPath"
]]></command>
  <inputs>
    <param argument="-i" format="sbml" label="input SBML file" name="inputPath" optional="false" type="data" value=""/>
    <param argument="-s" checked="false" falsevalue="" label="output compounds flagged as side compounds only" name="sideOnly" truevalue="-s" type="boolean" value="false"/>
    <param argument="-id" checked="false" falsevalue="" label="do not report values in output, export ids of compounds flagged as side compounds, allowing piping results" name="noReportValue" truevalue="-id" type="boolean" value="false"/>
    <param argument="-d" label="flag as side compounds any compound with degree above threshold. Ignored if negative" name="degree" optional="true" type="text" value="-1">
      <sanitizer invalid_char="_">
        <valid initial="string.printable"/>
      </sanitizer>
    </param>
    <param argument="-dp" label="flag as side compounds the top x% of compounds according to their degree. Ignored if negative" name="degreePrecentile" optional="true" type="float" value="2.0"/>
    <param argument="-cc" checked="false" falsevalue="" label="flag as side compound any compound with less than 2 carbons in formula" name="flagInorganic" truevalue="-cc" type="boolean" value="false"/>
    <param argument="-uf" checked="false" falsevalue="" label="flag as side compound any compound with no valid chemical formula" name="flagNoFormula" truevalue="-uf" type="boolean" value="false"/>
    <param argument="-nc" label="flag as side compound any compound with a number of parallel edges shared with a neighbor above the given threshold. Ignored if negative" name="parallelEdge" optional="true" type="float" value="-1.0"/>
    <param argument="-m" label="degree is shared between compounds in different compartments. Use names if consistent and unambiguous across compartments, or identifiers if compartment suffix is present (id in form &quot;xxx_y&quot; with xxx as base identifier and y as compartment label)." name="mergingStrat" optional="true" type="select" value="no">
      <option selected="true" value="no">no</option>
      <option value="by_name">by_name</option>
      <option value="by_id">by_id</option>
    </param>
  </inputs>
  <outputs>
    <data format="tsv" name="outputPath"/>
  </outputs>
  <tests>
    <test>
      <param name="inputPath" value="XF_network.sbml"/>
      <output ftype="tsv" name="outputPath">
        <assert_contents>
          <has_n_lines min="1000"/>
          <has_n_columns n="4"/>
        </assert_contents>
      </output>
    </test>
    <test>
      <param name="inputPath" value="XF_network.sbml"/>
      <param name="noReportValue" value="true"/>
      <output ftype="tsv" name="outputPath">
        <assert_contents>
          <has_n_lines min="30"/>
          <has_n_columns n="1"/>
        </assert_contents>
      </output>
    </test>
  </tests>
  <help><![CDATA[Scan a network to identify side compounds.
Side compounds are metabolites of small relevance for topological analysis. Their definition can be quite subjective and varies between sources.
Side compounds tend to be ubiquitous and not specific to a particular biochemical or physiological process.Compounds usually considered as side compounds include water, atp or carbon dioxide. By being involved in many reactions and thus connected to many compounds, they tend to significantly lower the average shortest path distances beyond expected metabolic relatedness.
This tool attempts to propose a list of side compounds according to specific criteria:  
- *Degree*: Compounds with an uncommonly high number of neighbors can betray a lack of process specificity.  
High degree compounds typically include water and most main cofactors (CoA, ATP, NADPH...) but can also include central compounds such as pyruvate or acetyl-CoA  
- *Neighbor Coupling*: Similar to degree, this criteria assume that side compounds are involved in many reactions, but in pairs with other side compounds.
Therefore, the transition from ATP to ADP will appear multiple times in the network, creating redundant 'parallel edges' between these two neighbors.
Being tightly coupled to another compound through a high number of redundant edges, can point out cofactors while keeping converging pathways' products with high degree like pyruvate aside.  
- *Carbon Count*: Metabolic "waste", or degradation end-product such as ammonia or carbon dioxide are usually considered as side compounds.
Most of them are inorganic compound, another ill-defined concept, sometimes defined as compound lacking C-C or C-H bonds. Since chemical structure is rarely available in SBML model beyond chemical formula, we use a less restrictive criterion by flagging compound with one or no carbons. This cover most inorganic compounds, but include few compounds such as methane usually considered as organic.  - *Chemical Formula*: Metabolic network often contains 'artifacts' that serve modelling purpose (to define a composite objective function for example). Such entities can be considered as 'side entities'. Since they are not actual chemical compounds, they can be detected by their lack of valid chemical formula. However, this can also flag main compounds with erroneous or missing annotation.
@ATTRIBUTION@]]></help>
  <citations/>
</tool>