changeset 48:e4235b5231e4 draft

Uploaded
author bimib
date Sun, 23 Feb 2020 09:41:14 -0500
parents 3af9d394367c
children 2c2a11aa1e02
files Marea/marea.py Marea/marea.xml Marea/ras_generator.xml
diffstat 3 files changed, 172 insertions(+), 95 deletions(-) [+]
line wrap: on
line diff
--- a/Marea/marea.py	Wed Feb 19 10:44:52 2020 -0500
+++ b/Marea/marea.py	Sun Feb 23 09:41:14 2020 -0500
@@ -18,14 +18,17 @@
     parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
                                      description = 'process some value\'s'+
                                      ' genes to create a comparison\'s map.')
-    parser.add_argument('-rs', '--rules_selector', 
+    parser.add_argument('-cr', '--custom_rules', 
                         type = str,
-                        default = 'HMRcore',
-                        choices = ['HMRcore', 'Recon', 'Custom'], 
-                        help = 'chose which type of dataset you want use')
-    parser.add_argument('-cr', '--custom',
+                        default = 'false',
+                        choices = ['true', 'false'],
+                        help = 'choose whether to use custom rules')
+    parser.add_argument('-cc', '--custom_rule',
                         type = str,
-                        help='your dataset if you want custom rules')
+                        help='custom rules to use')
+    parser.add_argument('-cm', '--custom_map',
+                        type = str,
+                        help='custom map to use')
     parser.add_argument('-n', '--none',
                         type = str,
                         default = 'true',
@@ -55,13 +58,6 @@
     parser.add_argument('-ic', '--input_class', 
                         type = str, 
                         help = 'sample group specification')
-    parser.add_argument('-cm', '--custom_map', 
-                        type = str, 
-                        help = 'custom map')
-    parser.add_argument('-yn', '--yes_no', 
-                        type = str,
-                        choices = ['yes', 'no'],
-                        help = 'if make or not custom map')
     parser.add_argument('-gs', '--generate_svg',
                         type = str,
                         default = 'true',
@@ -551,7 +547,7 @@
         react = recon.reactions
         rules = [react[i].gene_reaction_rule for i in range(len(react))]
         ids = [react[i].id for i in range(len(react))]
-    except cb.io.sbml3.CobraSBMLError:
+    except cb.io.sbml.CobraSBMLError:
         try:
             data = (pd.read_csv(data, sep = '\t', dtype = str, engine='python')).fillna('')
             if len(data.columns) < 2:
@@ -693,8 +689,8 @@
             tmp_csv.to_csv(tab, sep = '\t', index = False, header = header)
             
             if create_svg or create_pdf:
-                if args.rules_selector == 'HMRcore' or (args.rules_selector == 'Custom'
-                                                        and args.yes_no == 'yes'):
+                if args.custom_rules == 'false' or (args.custom_rules == 'true'
+                                                        and args.custom_map != ''):
                     fix_map(tmp, core_map, threshold_P_V, threshold_F_C, max_F_C)
                     file_svg = 'result/' + i + '_vs_' + j + ' (SVG Map).svg'
                     with open(file_svg, 'wb') as new_map:
@@ -760,13 +756,6 @@
                         
     elif comparison == "onevsmany":
         for i, j in it.combinations(class_pat.keys(), 2):
-
-            if i != control and j != control:
-                print(str(control) + " " + str(i) + " " + str(j))
-                #Se è un confronto fra elementi diversi dal controllo, skippo
-                continue
-            
-            print('vado')
             tmp = {}
             count = 0
             max_F_C = 0
@@ -823,13 +812,9 @@
 
     if os.path.isdir('result') == False:
         os.makedirs('result')
-    
-    if args.rules_selector == 'HMRcore':        
-        recon = pk.load(open(args.tool_dir + '/local/HMRcore_rules.p', 'rb'))
-    elif args.rules_selector == 'Recon':
-        recon = pk.load(open(args.tool_dir + '/local/Recon_rules.p', 'rb'))
-    elif args.rules_selector == 'Custom':
-        ids, rules, gene_in_rule = make_recon(args.custom)
+        
+    if args.custom_rules == 'true':
+        ids, rules, gene_in_rule = make_recon(args.custom_rule)
 
     class_pat = {}
     
@@ -885,15 +870,13 @@
         if resolve_rules_float != None:
             class_pat = split_class(classes, resolve_rules_float)
     	
-    if args.rules_selector == 'Custom':
-        if args.yes_no == 'yes':
-            try:
-                core_map = ET.parse(args.custom_map)
-            except (ET.XMLSyntaxError, ET.XMLSchemaParseError):
-                sys.exit('Execution aborted: custom map in wrong format')
-        elif args.yes_no == 'no':
-            core_map = ET.parse(args.tool_dir + '/local/HMRcoreMap.svg')
-    else:       
+       
+    if args.custom_rules == 'true':
+        try:
+            core_map = ET.parse(args.custom_map)
+        except (ET.XMLSyntaxError, ET.XMLSchemaParseError):
+            sys.exit('Execution aborted: custom map in wrong format')
+    else:
         core_map = ET.parse(args.tool_dir+'/local/HMRcoreMap.svg')
         
     maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf, args.comparison, args.control)
--- a/Marea/marea.xml	Wed Feb 19 10:44:52 2020 -0500
+++ b/Marea/marea.xml	Sun Feb 23 09:41:14 2020 -0500
@@ -1,4 +1,4 @@
-<tool id="MaREA" name="Metabolic Reaction Enrichment Analysis" version="1.0.7">
+<tool id="MaREA" name="Metabolic Reaction Enrichment Analysis" version="1.0.8">
 	<macros>
 		<import>marea_macros.xml</import>
 	</macros>
@@ -28,6 +28,11 @@
                 ${data.input_name}
             #end for
             --comparison ${cond.comparis.comparison}
+            #if $cond.advanced.cond_map == 'true':
+            	--custom_rules true
+            	--custom_rule ${cond.advanced.cond_map.custom_rule}
+            	--custom_map ${cond.advanced.cond_map.custom_map}
+            #end if
             #if $cond.advanced.choice == 'true':
       	       --pValue ${cond.advanced.pValue}
       	       --fChange ${cond.advanced.fChange}
@@ -47,6 +52,11 @@
             #if $cond.comparis.comparison == 'onevsmany'
             	--control ${cond.comparis.controlgroup}
             #end if
+            #if $cond.advanced.cond_map == 'true':
+            	--custom_rules true
+            	--custom_rule ${cond.advanced.cond_map.custom_rule}
+            	--custom_map ${cond.advanced.cond_map.custom_map}
+            #end if
             #if $cond.advanced.choice == 'true':
       	        --pValue ${cond.advanced.pValue}
       	        --fChange ${cond.advanced.fChange}
@@ -90,11 +100,12 @@
 					<when value="false"></when>
 					<when value="true">
 						<conditional name="cond_map">
-						<param name="choice" type="boolean" checked="false" label="Use custom map?" help="Use this option only if you have generated RAS using a custom set of rules">
+						<param name="choice" type="boolean" checked="false" label="Use custom map and rules?" help="Use this option only if you have generated RAS using a custom set of rules">
 							<option value="false" selected="true">No</option>
 							<option value="true">Yes</option>
 						</param>
-						<when value="true">							
+						<when value="true">	
+							<param name="Custom_rule" argument="--custom_rule" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />					
 							<param name="Custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/>
 						</when>
 					</conditional>
@@ -131,6 +142,7 @@
 							<option value="true">Yes</option>
 						</param>
 						<when value="true">							
+							<param name="Custom_rule" argument="--custom_rule" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />					
 							<param name="Custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/>
 						</when>
 					</conditional>
@@ -155,71 +167,108 @@
 What it does
 -------------
 
-This tool analyzes RNA-seq dataset(s) as described in Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724.
+This tool analyzes and visualizes differences in the Reaction Activity Scores (RASs) of groups of samples, as computed by the Expression2RAS tool, of groups of samples.
 
 Accepted files are: 
-    - option 1) two or more RNA-seq datasets, each referring to samples in a given condition/class. The user can specify a label for each class (as e.g. "*classA*" and "*classB*");
-    - option 2) one RNA dataset and one class-file specifying the class/condition each sample belongs to.
+    - option 1) two or more RAS datasets, each referring to samples in a given group. The user can specify a label for each group (as e.g. "classA" and "classB");
+    - option 2) one RAS dataset and one group-file specifying the group each sample belongs to.
+    
+RAS datasets format: tab-separated text files, reporting the RAS value of each reaction (row) for a given sample (column).
+
+Column header: sample ID.
+Raw header: reaction ID. 
 
 Optional files:
-    - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats:
-
-	* (Cobra Toolbox and CobraPy compliant) xml of metabolic model;
-	* .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2).
     - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example.
 
 The tool generates:
-    1) a tab-separated file: reporting fold-change and p-values of reaction activity scores (RASs) between a pair of conditions/classes;
-    2) a metabolic map file (downlodable as .svg): visualizing up- and down-regulated reactions between a pair of conditions/classes;
-    3) a log file (.txt).
+    - 1) a tab-separated file: reporting fold-change and p-values of reaction activity scores (RASs) between a pair of conditions/classes;
+    - 2) a metabolic map file (downloadable as .svg): visualizing up- and down-regulated reactions between a pair of conditions/classes;
+    - 3) a log file (.txt).
+    
+Output options:
+To calculate P-Values and Fold-Changes and to enrich maps, comparisons are performed for each possible pair of groups (default option ‘One vs One’).
 
-RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID.
-
-Class-file format: each row of the class-file reports the sample ID (column1) and the label of the class/condition the sample belongs to (column 2).
-
-To calculate P-Values and Fold-Changes and to generate maps, comparisons are performed for each possible pair of classes.
+Alternative options are:
+    - comparison of each group vs. the rest of samples (option ‘One vs Rest’)
+    - comparison of each group vs. a control group (option ‘One vs Control). If this option is selected the user must indicate the control group label.
 
 Output files will be named as classA_vs_classB. Reactions will conventionally be reported as up-regulated (down-regulated) if they are significantly more (less) active in class having label "classA".
 
-
 Example input
 -------------
 
-**"Custom Rules"** option:
+"RAS of group 1 + RAS of group 2 + ... + RAS of group N" option:
 
-Custom Rules Dastaset:
+RAS Dataset 1:
 
-@CUSTOM_RULES_EXEMPLE@
-
-**"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N"** option:
++------------+----------------+----------------+----------------+ 
+| Reaction ID|   TCGAA62670   |   TCGAA62671   |   TCGAA62672   |  
++============+================+================+================+
+| r1642      |    0.523167    |    0.371355    |    0.925661    |  
++------------+----------------+----------------+----------------+    
+| r1643      |    0.568765    |    0.765567    |    0.456789    |    
++------------+----------------+----------------+----------------+    
+| r1640      |    0.876545    |    0.768933    |    0.987654    |  
++------------+----------------+----------------+----------------+
+| r1641      |    0.456788    |    0.876543    |    0.876542    |    
++------------+----------------+----------------+----------------+    
+| r1646      |    0.876543    |    0.786543    |    0.897654    |   
++------------+----------------+----------------+----------------+
 
-RNA-seq Dataset 1:						
-
-@DATASET_EXEMPLE1@
+RAS Dataset 2:
 
-RNA-seq Dataset 2:
++------------+----------------+----------------+----------------+ 
+| Reaction ID|   TCGAA62670   |   TCGAA62671   |   TCGAA62672   |  
++============+================+================+================+
+| r1642      |    0.523167    |    0.371355    |    0.925661    |  
++------------+----------------+----------------+----------------+    
+| r1643      |    0.568765    |    0.765567    |    0.456789    |    
++------------+----------------+----------------+----------------+    
+| r1640      |    0.876545    |    0.768933    |    0.987654    |  
++------------+----------------+----------------+----------------+
+| r1641      |    0.456788    |    0.876543    |    0.876542    |    
++------------+----------------+----------------+----------------+    
+| r1646      |    0.876543    |    0.786543    |    0.897654    |   
++------------+----------------+----------------+----------------+
 
-@DATASET_EXEMPLE2@
-
-**"RNAseq of all samples + sample group specification"** option:
+"RAS of all samples + sample group specification" option:
 
-RNA-seq Dataset:
+RAS Dataset:
 
-@DATASET_EXEMPLE1@
++------------+----------------+----------------+----------------+ 
+| Reaction ID|   TCGAA62670   |   TCGAA62671   |   TCGAA62672   |  
++============+================+================+================+
+| r1642      |    0.523167    |    0.371355    |    0.925661    |  
++------------+----------------+----------------+----------------+    
+| r1643      |    0.568765    |    0.765567    |    0.456789    |    
++------------+----------------+----------------+----------------+    
+| r1640      |    0.876545    |    0.768933    |    0.987654    |  
++------------+----------------+----------------+----------------+
+| r1641      |    0.456788    |    0.876543    |    0.876542    |    
++------------+----------------+----------------+----------------+    
+| r1646      |    0.876543    |    0.786543    |    0.897654    |   
++------------+----------------+----------------+----------------+
 
-Class-file:
+Group-file
 
-+------------+------------+   
-| Patient_ID |    class   |   
-+============+============+   
-| TCGAAA3529 |     MSI    |   
-+------------+------------+    
-| TCGAA62671 |     MSS    |    
-+------------+------------+    
-| TCGAA62672 |     MSI    |   
-+------------+------------+
++---------------+-----------+
+| Patient ID    |   Class   | 
++===============+===========+
+| TCGAAA3529    |    MSI    | 
++---------------+-----------+  
+| TCGAA62671    |    MSS    |    
++---------------+-----------+   
+| TCGAA62672    |    MSI    |
++---------------+-----------+
 
-|
+Advanced options
+----------------
+
+P-Value threshold: the threshold used for significance Kolmogorov-Smirnov (KS) test, to verify whether the distributions of RASs over the samples in two sets are significantly different
+
+Fold-Change threshold: threshold of the fold-change between the average RAS of two groups. Among the reactions that pass the KS test, only fold-change values larger than the indicated threshold will be visualized on the output metabolic map;
+
 
 .. class:: infomark
 
--- a/Marea/ras_generator.xml	Wed Feb 19 10:44:52 2020 -0500
+++ b/Marea/ras_generator.xml	Sun Feb 23 09:41:14 2020 -0500
@@ -1,4 +1,4 @@
-<tool id="MaREA RAS Generator" name="Expression2RAS" version="1.0.2">
+<tool id="MaREA RAS Generator" name="Expression2RAS" version="1.0.3">
     <description>- Reaction Activity Scores computation</description>
     <macros>
         <import>marea_macros.xml</import>
@@ -31,17 +31,6 @@
             </when>
             <when value="Custom">
                 <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />
-                <conditional name="cond_map">
-                    <param name="yes_no" type="select" label="Custom map? (optional)">
-                        <option value="no" selected="true">no</option>
-                        <option value="yes">yes</option>
-                    </param>
-                    <when value="yes">
-                        <param name="Custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/>
-                    </when>
-                    <when value="no">
-                    </when>
-                </conditional>
             </when>
         </conditional>
         <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Gene Expression dataset:" />
@@ -59,6 +48,62 @@
 
 What it does
 -------------
+
+This tool computes Reaction Activity Scores from gene expression (RNA-seq) dataset(s), as described in Graudenzi et al. Integration of transcriptomic data and metabolic networks in cancer samples reveals highly significant prognostic power. Journal of Biomedical Informatics, 2018, 87: 37-49.
+ 
+Accepted files:
+    - A gene expression dataset
+ 
+Format:
+Tab-separated text file reporting the normalized expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column).
+Column header: sample ID.
+Row header: gene ID.
+ 
+ 
+Optional files:
+    - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats:
+
+	* (Cobra Toolbox and CobraPy compliant) xml of metabolic model;
+	* .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2).
+ 
+Computation option ‘(A and NaN) solved as (A)’:
+In case of missing expression value, referred to as NaN (Not a Number), for a gene joined with an AND operator in a given GPR rule, the rule ‘A and NaN’
+ 
+If YES is selected: the GPR will be solved as A.
+ 
+If NO is selected: the GPR will be disregarded tout-court (i.e., treated as NaN).
+
+Example input
+-------------
+
+Custom GPR rules:
+
++------------+--------------------------------------+   
+| id         |         rule (with entrez-id         |   
++============+======================================+   
+| r1642      |             155060 or 10357          |   
++------------+--------------------------------------+    
+| r1643      |        155060 or 100134869           |    
++------------+--------------------------------------+    
+| r1640      |     155060 and 100134869 or 10357    |   
++------------+--------------------------------------+
+
+RNA-seq dataset:
+
++------------+----------------+----------------+----------------+ 
+| Hugo_ID    |   TCGAA62670   |   TCGAA62671   |   TCGAA62672   |  
++============+================+================+================+
+| HGNC:24086 |    0.523167    |    0.371355    |    0.925661    |  
++------------+----------------+----------------+----------------+    
+| HGNC:24086 |    0.568765    |    0.765567    |    0.456789    |    
++------------+----------------+----------------+----------------+    
+| HGNC:9876  |    0.876545    |    0.768933    |    0.987654    |  
++------------+----------------+----------------+----------------+
+| HGNC:9     |    0.456788    |    0.876543    |    0.876542    |    
++------------+----------------+----------------+----------------+    
+| HGNC:23    |    0.876543    |    0.786543    |    0.897654    |   
++------------+----------------+----------------+----------------+
+
 ]]>
     </help>
     <expand macro="citations" />