Previous changeset 30:e88efefbd015 (2019-10-15) Next changeset 32:b795e3e163e0 (2019-10-16) |
Commit message:
Uploaded |
modified:
Marea/marea.py Marea/marea.xml Marea/marea_cluster.py Marea/marea_cluster.xml |
removed:
Desktop/Marea/marea.py Desktop/Marea/marea.xml Desktop/Marea/marea_cluster.py Desktop/Marea/marea_cluster.xml Desktop/Marea/marea_macros.xml |
b |
diff -r e88efefbd015 -r 944e15aa970a Desktop/Marea/marea.py --- a/Desktop/Marea/marea.py Tue Oct 15 12:21:16 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,861 +0,0 @@\n-from __future__ import division\n-import sys\n-import pandas as pd\n-import itertools as it\n-import scipy.stats as st\n-import collections\n-import lxml.etree as ET\n-import shutil\n-import pickle as pk\n-import math\n-import os\n-import argparse\n-from svglib.svglib import svg2rlg\n-from reportlab.graphics import renderPDF\n-\n-########################## argparse ##########################################\n-\n-def process_args(args):\n- parser = argparse.ArgumentParser(usage = \'%(prog)s [options]\',\n- description = \'process some value\\\'s\'+\n- \' genes to create a comparison\\\'s map.\')\n- parser.add_argument(\'-rs\', \'--rules_selector\', \n- type = str,\n- default = \'HMRcore\',\n- choices = [\'HMRcore\', \'Recon\', \'Custom\'], \n- help = \'chose which type of dataset you want use\')\n- parser.add_argument(\'-cr\', \'--custom\',\n- type = str,\n- help=\'your dataset if you want custom rules\')\n- parser.add_argument(\'-na\', \'--names\', \n- type = str,\n- nargs = \'+\', \n- help = \'input names\')\n- parser.add_argument(\'-n\', \'--none\',\n- type = str,\n- default = \'true\',\n- choices = [\'true\', \'false\'], \n- help = \'compute Nan values\')\n- parser.add_argument(\'-pv\' ,\'--pValue\', \n- type = float, \n- default = 0.05, \n- help = \'P-Value threshold (default: %(default)s)\')\n- parser.add_argument(\'-fc\', \'--fChange\', \n- type = float, \n- default = 1.5, \n- help = \'Fold-Change threshold (default: %(default)s)\')\n- parser.add_argument(\'-td\', \'--tool_dir\',\n- type = str,\n- required = True,\n- help = \'your tool directory\')\n- parser.add_argument(\'-op\', \'--option\', \n- type = str, \n- choices = [\'datasets\', \'dataset_class\', \'datasets_rasonly\'],\n- help=\'dataset or dataset and class\')\n- parser.add_argument(\'-ol\', \'--out_log\', \n- help = "Output log") \n- parser.add_argument(\'-ids\', \'--input_datas\', \n- type = str,\n- nargs = \'+\', \n- help = \'input datasets\')\n- parser.add_argument(\'-id\', \'--input_data\',\n- type = str,\n- help = \'input dataset\')\n- parser.add_argument(\'-ic\', \'--input_class\', \n- type = str, \n- help = \'sample group specification\')\n- parser.add_argument(\'-cm\', \'--custom_map\', \n- type = str, \n- help = \'custom map\')\n- parser.add_argument(\'-yn\', \'--yes_no\', \n- type = str,\n- choices = [\'yes\', \'no\'],\n- help = \'if make or not custom map\')\n- parser.add_argument(\'-gs\', \'--generate_svg\',\n- type = str,\n- default = \'true\',\n- choices = [\'true\', \'false\'], \n- help = \'generate svg map\')\n- parser.add_argument(\'-gp\', \'--generate_pdf\',\n- type = str,\n- default = \'true\',\n- choices = [\'true\', \'false\'], \n- help = \'generate pdf map\')\n- parser.add_argument(\'-gr\', \'--generate_ras\',\n- type = str,\n- default = \'true\',\n- choices = [\'true\', \'false\'],\n- help = \'generate reaction activity score\')\n- parser.add_argument(\'-sr\', \'--single_ras_file\', \n- '..b'taset.iloc[0, 0], name) \n- \n- if args.rules_selector != \'Custom\':\n- genes = data_gene(dataset, type_gene, name, None)\n- ids, rules = load_id_rules(recon.get(type_gene))\n- elif args.rules_selector == \'Custom\':\n- genes = data_gene(dataset, type_gene, name, gene_in_rule)\n- \n- resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n-\n- create_ras(resolve_rules, name, True)\n- \n- if err != None and err:\n- warning(\'Warning: gene\\n\' + str(err) + \'\\nnot found in class \'\n- + name + \', the expression level for this gene \' +\n- \'will be considered NaN\\n\')\n- \n- print(\'execution succeded\')\n- return None\n- \n- \n- elif args.option == \'datasets\':\n- num = 1\n- for i, j in zip(args.input_datas, args.names):\n-\n- name = name_dataset(j, num)\n- dataset = read_dataset(i, name)\n-\n- dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)\n-\n- type_gene = gene_type(dataset.iloc[0, 0], name) \n- \n- if args.rules_selector != \'Custom\':\n- genes = data_gene(dataset, type_gene, name, None)\n- ids, rules = load_id_rules(recon.get(type_gene))\n- elif args.rules_selector == \'Custom\':\n- genes = data_gene(dataset, type_gene, name, gene_in_rule)\n- \n- resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n-\n- if generate_ras:\n- create_ras(resolve_rules, name, False)\n- \n- if err != None and err:\n- warning(\'Warning: gene\\n\' + str(err) + \'\\nnot found in class \'\n- + name + \', the expression level for this gene \' +\n- \'will be considered NaN\\n\')\n- if resolve_rules != None:\n- class_pat[name] = list(map(list, zip(*resolve_rules.values())))\n- num += 1\n- elif args.option == \'dataset_class\':\n- name = \'RNAseq\'\n- dataset = read_dataset(args.input_data, name)\n- dataset.iloc[:, 0] = (dataset.iloc[:, 0]).astype(str)\n- type_gene = gene_type(dataset.iloc[0, 0], name)\n- classes = read_dataset(args.input_class, \'class\')\n- if not len(classes.columns) == 2:\n- warning(\'Warning: more than 2 columns in class file. Extra\' +\n- \'columns have been disregarded\\n\')\n- classes = classes.astype(str)\n- if args.rules_selector != \'Custom\':\n- genes = data_gene(dataset, type_gene, name, None)\n- ids, rules = load_id_rules(recon.get(type_gene))\n- elif args.rules_selector == \'Custom\':\n- genes = data_gene(dataset, type_gene, name, gene_in_rule)\n- resolve_rules, err = resolve(genes, rules, ids, resolve_none, name)\n- if err != None and err:\n- warning(\'Warning: gene\\n\'+str(err)+\'\\nnot found in class \'\n- + name + \', the expression level for this gene \' +\n- \'will be considered NaN\\n\')\n- if resolve_rules != None:\n- class_pat = split_class(classes, resolve_rules)\n- \n- \t\n- if args.rules_selector == \'Custom\':\n- if args.yes_no == \'yes\':\n- try:\n- core_map = ET.parse(args.custom_map)\n- except (ET.XMLSyntaxError, ET.XMLSchemaParseError):\n- sys.exit(\'Execution aborted: custom map in wrong format\')\n- elif args.yes_no == \'no\':\n- core_map = ET.parse(args.tool_dir + \'/local/HMRcoreMap.svg\')\n- else: \n- core_map = ET.parse(args.tool_dir+\'/local/HMRcoreMap.svg\')\n- \n- maps(core_map, class_pat, ids, args.pValue, args.fChange, create_svg, create_pdf)\n- \n- print(\'Execution succeded\')\n-\n- return None\n-\n-###############################################################################\n-\n-if __name__ == "__main__":\n- main()\n' |
b |
diff -r e88efefbd015 -r 944e15aa970a Desktop/Marea/marea.xml --- a/Desktop/Marea/marea.xml Tue Oct 15 12:21:16 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,274 +0,0 @@\n-<tool id="MaREA" name="Metabolic Reaction Enrichment Analysis" version="1.0.3">\n- <description></description>\n- <macros>\n- <import>marea_macros.xml</import>\n- </macros>\n- <requirements>\n- <requirement type="package" version="0.23.0">pandas</requirement>\n- <requirement type="package" version="1.1.0">scipy</requirement>\n- <requirement type="package" version="0.10.1">cobra</requirement>\n- <requirement type="package" version="4.2.1">lxml</requirement>\n- <requirement type="package" version="0.8.1">svglib</requirement>\n- <requirement type="package" version="3.4.0">reportlab</requirement>\n- </requirements>\n- <command detect_errors="exit_code">\n- <![CDATA[\n- \tpython $__tool_directory__/marea.py\n- --rules_selector $cond_rule.rules_selector\n- #if $cond_rule.rules_selector == \'Custom\':\n- --custom ${cond_rule.Custom_rules}\n- --yes_no ${cond_rule.cond_map.yes_no}\n- #if $cond_rule.cond_map.yes_no == \'yes\':\n- --custom_map $cond_rule.cond_map.Custom_map\n- #end if\n- #end if\n-\t\n- \t--tool_dir $__tool_directory__\n- \t--option $cond.type_selector\n- --out_log $log\t\t\n-\t\n- #if $cond.type_selector == \'datasets\':\n- --input_datas\n- #for $data in $cond.input_Datasets:\n- ${data.input}\n- #end for\n- --names\n- #for $data in $cond.input_Datasets:\n- ${data.input_name}\n- #end for\n- #if $cond.advanced.choice == \'true\':\n- \t --none ${cond.advanced.None}\n- \t --pValue ${cond.advanced.pValue}\n- \t --fChange ${cond.advanced.fChange}\n-\t \t--generate_svg ${cond.advanced.generateSvg}\n-\t \t--generate_pdf ${cond.advanced.generatePdf}\n-\t --generate_ras ${cond.advanced.generateRas}\n-\t#else \n-\t --none true\n-\t --pValue 0.05\n-\t --fChange 1.5\n-\t --generate_svg false\n-\t --generate_pdf true\n-\t --generate_ras false\n-\t#end if\n- #elif $cond.type_selector == \'dataset_class\':\n- --input_data ${input_data}\n- --input_class ${input_class}\n- #if $cond.advanced.choice == \'true\':\n- \t --none ${cond.advanced.None}\n- \t --pValue ${cond.advanced.pValue}\n- \t --fChange ${cond.advanced.fChange}\n-\t --generate_svg ${cond.advanced.generateSvg}\n-\t --generate_pdf ${cond.advanced.generatePdf}\n-\t --generate_ras ${cond.advanced.generateRas}\n-\t#else \n-\t --none true\n-\t --pValue 0.05\n-\t --fChange 1.5\n-\t --generate_svg false\n-\t --generate_pdf true\n-\t --generate_ras false\n-\t#end if\n- #end if\n- #if $cond.type_selector == \'datasets_rasonly\':\n- --input_datas ${input_Datasets}\n- --single_ras_file $ras_single\n- --none ${cond.advanced.None}\n- #end if\n- ]]>\n- </command>\n-\n- <inputs>\n- <conditional name="cond_rule">\n- <expand macro="options"/>\n- <when value="HMRcore">\n- </when>\n- <when value="Recon">\n- </when>\n- <when value="Custom">\n- <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />\n- <conditional name="cond_map">\n- <param name="yes_no" type="select" label="Custom map? (optional)">\n- <option value="no" selected="true">no</option>\n- <option value="yes">yes</option>\n- </param>\n- <when value="yes">\n- <param name="Custom_map" argument="--custom_map" type="data" format="xml, svg" label="custom-map.svg"/>\n- </when>\n- <when value="no">\n- </when>\n- </conditional>\n- </when>\n- </conditional>\n- <conditional name="cond">\n- <param name="type_selector" '..b'ctor\'] == "datasets_rasonly"</filter>\n- </data>\n- <collection name="results" type="list" label="MaREA - Results">\n- <filter>cond[\'type_selector\'] == "datasets" or cond[\'type_selector\'] == "dataset_class"</filter>\n- <discover_datasets pattern="__name_and_ext__" directory="result"/>\n- </collection>\n-\t<collection name="ras" type="list" label="MaREA - RAS list" format_source="tabular">\n-\t <filter>cond[\'type_selector\'] != "datasets_rasonly" and cond[\'advanced\'][\'choice\'] and cond[\'advanced\'][\'generateRas\']</filter>\n- \t <discover_datasets pattern="__name_and_ext__" directory="ras" format="tabular"/>\n-\t</collection>\n-\t\n- </outputs>\n- <tests>\n- <test>\n- <param name="pValue" value="0.56"/>\n- <output name="log" file="log.txt"/>\n- </test>\n- </tests>\n- <help>\n-<![CDATA[\n-\n-What it does\n--------------\n-\n-This tool analyzes RNA-seq dataset(s) as described in Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724.\n-\n-Accepted files are: \n- - option 1) two or more RNA-seq datasets, each referring to samples in a given condition/class. The user can specify a label for each class (as e.g. "*classA*" and "*classB*");\n- - option 2) one RNA dataset and one class-file specifying the class/condition each sample belongs to.\n-\n-Optional files:\n- - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats:\n-\n-\t* (Cobra Toolbox and CobraPy compliant) xml of metabolic model;\n-\t* .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2).\n- - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example.\n-\n-The tool generates:\n- 1) a tab-separated file: reporting fold-change and p-values of reaction activity scores (RASs) between a pair of conditions/classes;\n- 2) a metabolic map file (downlodable as .svg): visualizing up- and down-regulated reactions between a pair of conditions/classes;\n- 3) a log file (.txt).\n-\n-RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID.\n-\n-Class-file format: each row of the class-file reports the sample ID (column1) and the label of the class/condition the sample belongs to (column 2).\n-\n-To calculate P-Values and Fold-Changes and to generate maps, comparisons are performed for each possible pair of classes.\n-\n-Output files will be named as classA_vs_classB. Reactions will conventionally be reported as up-regulated (down-regulated) if they are significantly more (less) active in class having label "classA".\n-\n-\n-Example input\n--------------\n-\n-**"Custom Rules"** option:\n-\n-Custom Rules Dastaset:\n-\n-@CUSTOM_RULES_EXEMPLE@\n-\n-**"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N"** option:\n-\n-RNA-seq Dataset 1:\t\t\t\t\t\t\n-\n-@DATASET_EXEMPLE1@\n-\n-RNA-seq Dataset 2:\n-\n-@DATASET_EXEMPLE2@\n-\n-**"RNAseq of all samples + sample group specification"** option:\n-\n-RNA-seq Dataset:\n-\n-@DATASET_EXEMPLE1@\n-\n-Class-file:\n-\n-+------------+------------+ \n-| Patient_ID | class | \n-+============+============+ \n-| TCGAAA3529 | MSI | \n-+------------+------------+ \n-| TCGAA62671 | MSS | \n-+------------+------------+ \n-| TCGAA62672 | MSI | \n-+------------+------------+\n-\n-|\n-\n-.. class:: infomark\n-\n-**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.\n-\n-.. class:: infomark\n-\n-**TIP**: If your dataset is not split into classes, use `MaREA cluster analysis`_.\n-\n-@REFERENCE@\n-\n-.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724\n-.. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj\n-.. _MaREA cluster analysis: http://link del tool di cluster.org\n-\n-]]>\n- </help>\n- <expand macro="citations" />\n-</tool>\n-\t\n' |
b |
diff -r e88efefbd015 -r 944e15aa970a Desktop/Marea/marea_cluster.py --- a/Desktop/Marea/marea_cluster.py Tue Oct 15 12:21:16 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,401 +0,0 @@\n-# -*- coding: utf-8 -*-\n-"""\n-Created on Mon Jun 3 19:51:00 2019\n-@author: Narger\n-"""\n-\n-import sys\n-import argparse\n-import os\n-from sklearn.datasets import make_blobs\n-from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering\n-from sklearn.metrics import silhouette_samples, silhouette_score, davies_bouldin_score, cluster\n-import matplotlib\n-matplotlib.use(\'agg\')\n-import matplotlib.pyplot as plt\n-import scipy.cluster.hierarchy as shc \n-import matplotlib.cm as cm\n-import numpy as np\n-import pandas as pd\n-\n-################################# process args ###############################\n-\n-def process_args(args):\n- parser = argparse.ArgumentParser(usage = \'%(prog)s [options]\',\n- description = \'process some value\\\'s\' +\n- \' genes to create class.\')\n-\n- parser.add_argument(\'-ol\', \'--out_log\', \n- help = "Output log")\n- \n- parser.add_argument(\'-in\', \'--input\',\n- type = str,\n- help = \'input dataset\')\n- \n- parser.add_argument(\'-cy\', \'--cluster_type\',\n- type = str,\n- choices = [\'kmeans\', \'meanshift\', \'dbscan\', \'hierarchy\'],\n- default = \'kmeans\',\n- help = \'choose clustering algorythm\')\n- \n- parser.add_argument(\'-k1\', \'--k_min\', \n- type = int,\n- default = 2,\n- help = \'choose minimun cluster number to be generated\')\n- \n- parser.add_argument(\'-k2\', \'--k_max\', \n- type = int,\n- default = 7,\n- help = \'choose maximum cluster number to be generated\')\n- \n- parser.add_argument(\'-el\', \'--elbow\', \n- type = str,\n- default = \'false\',\n- choices = [\'true\', \'false\'],\n- help = \'choose if you want to generate an elbow plot for kmeans\')\n- \n- parser.add_argument(\'-si\', \'--silhouette\', \n- type = str,\n- default = \'false\',\n- choices = [\'true\', \'false\'],\n- help = \'choose if you want silhouette plots\')\n- \n- parser.add_argument(\'-db\', \'--davies\', \n- type = str,\n- default = \'false\',\n- choices = [\'true\', \'false\'],\n- help = \'choose if you want davies bouldin scores\')\n- \n- parser.add_argument(\'-td\', \'--tool_dir\',\n- type = str,\n- required = True,\n- help = \'your tool directory\')\n- \n- parser.add_argument(\'-ms\', \'--min_samples\',\n- type = int,\n- help = \'min samples for dbscan (optional)\')\n- \n- parser.add_argument(\'-ep\', \'--eps\',\n- type = int,\n- help = \'eps for dbscan (optional)\')\n- \n- parser.add_argument(\'-bc\', \'--best_cluster\',\n- type = str,\n- help = \'output of best cluster tsv\')\n- \t\t\t\t\n- \n- \n- args = parser.parse_args()\n- return args\n-\n-########################### warning ###########################################\n-\n-def warning(s):\n- args = process_args(sys.argv)\n- with open(args.out_log, \'a\') as log:\n- log.write(s + "\\n\\n")\n- print(s)\n-\n-########################## read dataset ######################################\n- \n-def read_dataset(dataset):\n- try:\n- dataset = pd.read_csv(dataset, sep = \'\\t\', header = 0)\n- except pd.errors.EmptyDataError:\n- sys.exit(\'Execution aborted: wrong format of dataset\\n\')\n- if len(dataset.columns) < 2:\n- sys.exit(\'Execution aborted: wrong format of dataset\\n\')\n- return datase'..b' # Label the silhouette plots with their cluster numbers at the middle\n- ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))\n- \n- # Compute the new y_lower for next plot\n- y_lower = y_upper + 10 # 10 for the 0 samples\n- \n- ax1.set_title("The silhouette plot for the various clusters.")\n- ax1.set_xlabel("The silhouette coefficient values")\n- ax1.set_ylabel("Cluster label")\n- \n- # The vertical line for average silhouette score of all the values\n- ax1.axvline(x=silhouette_avg, color="red", linestyle="--")\n- \n- ax1.set_yticks([]) # Clear the yaxis labels / ticks\n- ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])\n- \n- \n- plt.suptitle(("Silhouette analysis for clustering on sample data "\n- "with n_clusters = " + str(n_clusters) + "\\nAverage silhouette_score = " + str(silhouette_avg)), fontsize=12, fontweight=\'bold\')\n- \n- \n- plt.savefig(path, bbox_inches=\'tight\')\n- \n-######################## dbscan ##############################################\n- \n-def dbscan(dataset, eps, min_samples):\n- if not os.path.exists(\'clustering\'):\n- os.makedirs(\'clustering\')\n- \n- if eps is not None:\n- \tclusterer = DBSCAN(eps = eps, min_samples = min_samples)\n- else:\n- \tclusterer = DBSCAN()\n- \n- clustering = clusterer.fit(dataset)\n- \n- core_samples_mask = np.zeros_like(clustering.labels_, dtype=bool)\n- core_samples_mask[clustering.core_sample_indices_] = True\n- labels = clustering.labels_\n-\n- # Number of clusters in labels, ignoring noise if present.\n- n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)\n- \n- \n- ##TODO: PLOT SU DBSCAN (no centers) e HIERARCHICAL\n- \n- \n- write_to_csv(dataset, labels, \'clustering/dbscan_results.tsv\')\n- \n-########################## hierachical #######################################\n- \n-def hierachical_agglomerative(dataset, k_min, k_max):\n-\n- if not os.path.exists(\'clustering\'):\n- os.makedirs(\'clustering\')\n- \n- plt.figure(figsize=(10, 7)) \n- plt.title("Customer Dendograms") \n- shc.dendrogram(shc.linkage(dataset, method=\'ward\')) \n- fig = plt.gcf()\n- fig.savefig(\'clustering/dendogram.png\', dpi=200)\n- \n- range_n_clusters = [i for i in range(k_min, k_max+1)]\n-\n- for n_clusters in range_n_clusters:\n- \n- cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity=\'euclidean\', linkage=\'ward\') \n- cluster.fit_predict(dataset) \n- cluster_labels = cluster.labels_\n- \n- silhouette_avg = silhouette_score(dataset, cluster_labels)\n- write_to_csv(dataset, cluster_labels, \'clustering/hierarchical_with_\' + str(n_clusters) + \'_clusters.tsv\')\n- #warning("For n_clusters =", n_clusters,\n- #"The average silhouette_score is :", silhouette_avg)\n- \n- \n- \n-\n- \n-############################# main ###########################################\n-\n-\n-def main():\n- if not os.path.exists(\'clustering\'):\n- os.makedirs(\'clustering\')\n-\n- args = process_args(sys.argv)\n- \n- #Data read\n- \n- X = read_dataset(args.input)\n- X = pd.DataFrame.to_dict(X, orient=\'list\')\n- X = rewrite_input(X)\n- X = pd.DataFrame.from_dict(X, orient = \'index\')\n- \n- for i in X.columns:\n- tmp = X[i][0]\n- if tmp == None:\n- X = X.drop(columns=[i])\n- \n- \n- if args.cluster_type == \'kmeans\':\n- kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.davies, args.best_cluster)\n- \n- if args.cluster_type == \'dbscan\':\n- dbscan(X, args.eps, args.min_samples)\n- \n- if args.cluster_type == \'hierarchy\':\n- hierachical_agglomerative(X, args.k_min, args.k_max)\n- \n-##############################################################################\n-\n-if __name__ == "__main__":\n- main()\n' |
b |
diff -r e88efefbd015 -r 944e15aa970a Desktop/Marea/marea_cluster.xml --- a/Desktop/Marea/marea_cluster.xml Tue Oct 15 12:21:16 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,95 +0,0 @@ -<tool id="MaREA_cluester" name="Cluster Analysis" version="1.0.6"> - <description></description> - <macros> - <import>marea_macros.xml</import> - </macros> - <requirements> - <requirement type="package" version="0.25.1">pandas</requirement> - <requirement type="package" version="1.1.0">scipy</requirement> - <requirement type="package" version="0.10.1">cobra</requirement> - <requirement type="package" version="0.21.3">scikit-learn</requirement> - <requirement type="package" version="2.2.2">matplotlib</requirement> - <requirement type="package" version="1.17">numpy</requirement> - </requirements> - <command detect_errors="exit_code"> - <![CDATA[ - python $__tool_directory__/marea_cluster.py - --input $input - --tool_dir $__tool_directory__ - --out_log $log - --best_cluster $best_cluster - --cluster_type ${data.clust_type} - #if $data.clust_type == 'kmeans': - --k_min ${data.k_min} - --k_max ${data.k_max} - --elbow ${data.elbow} - --silhouette ${data.silhouette} - #end if - #if $data.clust_type == 'dbscan': - #if $data.dbscan_advanced.advanced == 'true' - --eps ${data.dbscan_advanced.eps} - --min_samples ${data.dbscan_advanced.min_samples} - #end if - #end if - #if $data.clust_type == 'hierarchy': - --k_min ${data.k_min} - --k_max ${data.k_max} - #end if - ]]> - </command> - <inputs> - <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" /> - - <conditional name="data"> - <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:"> - <option value="kmeans" selected="true">KMeans</option> - <option value="dbscan">DBSCAN</option> - <option value="hierarchy">Agglomerative Hierarchical</option> - </param> - <when value="kmeans"> - <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" /> - <param name="k_max" argument="--k_max" type="integer" min="2" max="20" value="3" label="Max number of clusters (k) to be tested" /> - <param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/> - <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/> - </when> - <when value="dbscan"> - <conditional name="dbscan_advanced"> - <param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)"> - <option value="true">Yes</option> - <option value="false">No</option> - </param> - <when value="false"></when> - <when value="true"> - <param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" /> - <param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/> - - </when> - </conditional> - </when> - <when value="hierarchy"> - <param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" /> - <param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" /> - </when> - </conditional> - </inputs> - - <outputs> - <data format="txt" name="log" label="${tool.name} - Log" /> - <data format="tabular" name="best_cluster" label="${tool.name} - Best cluster" /> - <collection name="results" type="list" label="${tool.name} - Plots and results"> - <discover_datasets pattern="__name_and_ext__" directory="clustering"/> - </collection> - </outputs> - <help> -<![CDATA[ - -What it does -------------- - - -]]> - </help> - <expand macro="citations" /> -</tool> - - |
b |
diff -r e88efefbd015 -r 944e15aa970a Desktop/Marea/marea_macros.xml --- a/Desktop/Marea/marea_macros.xml Tue Oct 15 12:21:16 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,92 +0,0 @@ -<macros> - - <xml name="options"> - <param name="rules_selector" argument="--rules_selector" type="select" label="Gene-Protein-Reaction rules:"> - <option value="HMRcore" selected="true">HMRcore rules</option> - <option value="Recon">Recon 2.2 rules</option> - <option value="Custom">Custom rules</option> - </param> - </xml> - - <token name="@CUSTOM_RULES_EXEMPLE@"> - -+--------------------+-------------------------------+ -| id | rule (with entrez-id) | -+====================+===============================+ -| SHMT1 | 155060 or 10357 | -+--------------------+-------------------------------+ -| NIT2 | 155060 or 100134869 | -+--------------------+-------------------------------+ -| GOT1_GOT2_GOT1L1_2 | 155060 and 100134869 or 10357 | -+--------------------+-------------------------------+ - -| - - </token> - - <token name="@DATASET_EXEMPLE1@"> - -+------------+------------+------------+------------+ -| Hugo_ID | TCGAA62670 | TCGAA62671 | TCGAA62672 | -+============+============+============+============+ -| HGNC:24086 | 0.523167 | 0.371355 | 0.925661 | -+------------+------------+------------+------------+ -| HGNC:24086 | 0.568765 | 0.765567 | 0.456789 | -+------------+------------+------------+------------+ -| HGNC:9876 | 0.876545 | 0.768933 | 0.987654 | -+------------+------------+------------+------------+ -| HGNC:9 | 0.456788 | 0.876543 | 0.876542 | -+------------+------------+------------+------------+ -| HGNC:23 | 0.876543 | 0.786543 | 0.897654 | -+------------+------------+------------+------------+ - -| - - </token> - - <token name="@DATASET_EXEMPLE2@"> - -+-------------+------------+------------+------------+ -| Hugo_Symbol | TCGAA62670 | TCGAA62671 | TCGAA62672 | -+=============+============+============+============+ -| A1BG | 0.523167 | 0.371355 | 0.925661 | -+-------------+------------+------------+------------+ -| A1CF | 0.568765 | 0.765567 | 0.456789 | -+-------------+------------+------------+------------+ -| A2M | 0.876545 | 0.768933 | 0.987654 | -+-------------+------------+------------+------------+ -| A4GALT | 0.456788 | 0.876543 | 0.876542 | -+-------------+------------+------------+------------+ -| M664Y65 | 0.876543 | 0.786543 | 0.897654 | -+-------------+------------+------------+------------+ - -| - - </token> - - <token name="@REFERENCE@"> - -This tool is developed by the `BIMIB`_ at the `Department of Informatics, Systems and Communications`_ of `University of Milan - Bicocca`_. - -.. _BIMIB: http://sito di bio.org -.. _Department of Informatics, Systems and Communications: http://www.disco.unimib.it/go/Home/English -.. _University of Milan - Bicocca: https://www.unimib.it/ - - </token> - - <xml name="citations"> - <citations> <!--esempio di citazione--> - <citation type="bibtex"> -@online{lh32017, - author = {Alex Graudenzi, Davide Maspero, Cluadio Isella, Marzia Di Filippo, Giancarlo Mauri, Enzo Medico, Marco Antoniotti, Chiara Damiani}, - year = {2018}, - title = {MaREA: Metabolic feature extraction, enrichment and visualization of RNAseq}, - publisher = {bioRxiv}, - journal = {bioRxiv}, - url = {https://www.biorxiv.org/content/early/2018/01/16/248724}, -} - </citation> - </citations> - </xml> - -</macros> |
b |
diff -r e88efefbd015 -r 944e15aa970a Marea/marea.py --- a/Marea/marea.py Tue Oct 15 12:21:16 2019 -0400 +++ b/Marea/marea.py Tue Oct 15 12:22:43 2019 -0400 |
[ |
@@ -709,7 +709,7 @@ tab = 'result/' + i + '_vs_' + j + ' (Tabular Result).tsv' tmp_csv = pd.DataFrame.from_dict(tmp, orient = "index") tmp_csv = tmp_csv.reset_index() - header = ['ids', 'P_Value', 'Average'] + header = ['ids', 'P_Value', 'Log2(fold change)'] tmp_csv.to_csv(tab, sep = '\t', index = False, header = header) if create_svg or create_pdf: |
b |
diff -r e88efefbd015 -r 944e15aa970a Marea/marea.xml --- a/Marea/marea.xml Tue Oct 15 12:21:16 2019 -0400 +++ b/Marea/marea.xml Tue Oct 15 12:22:43 2019 -0400 |
[ |
b'@@ -1,4 +1,4 @@\n-<tool id="MaREA" name="Metabolic Reaction Enrichment Analysis" version="1.0.2">\n+<tool id="MaREA" name="Metabolic Reaction Enrichment Analysis" version="1.0.3">\n <description></description>\n <macros>\n <import>marea_macros.xml</import>\n@@ -22,21 +22,7 @@\n --custom_map $cond_rule.cond_map.Custom_map\n #end if\n #end if\n-\t#if $advanced.choice == \'true\':\n- \t --none ${advanced.None}\n- \t --pValue ${advanced.pValue}\n- \t --fChange ${advanced.fChange}\n-\t --generate_svg ${advanced.generateSvg}\n-\t --generate_pdf ${advanced.generatePdf}\n-\t --generate_ras ${advanced.generateRas}\n-\t#else \n-\t --none true\n-\t --pValue 0.05\n-\t --fChange 1.5\n-\t --generate_svg false\n-\t --generate_pdf true\n-\t --generate_ras false\n-\t#end if\n+\t\n \t--tool_dir $__tool_directory__\n \t--option $cond.type_selector\n --out_log $log\t\t\n@@ -50,13 +36,44 @@\n #for $data in $cond.input_Datasets:\n ${data.input_name}\n #end for\n+ #if $cond.advanced.choice == \'true\':\n+ \t --none ${cond.advanced.None}\n+ \t --pValue ${cond.advanced.pValue}\n+ \t --fChange ${cond.advanced.fChange}\n+\t \t--generate_svg ${cond.advanced.generateSvg}\n+\t \t--generate_pdf ${cond.advanced.generatePdf}\n+\t --generate_ras ${cond.advanced.generateRas}\n+\t#else \n+\t --none true\n+\t --pValue 0.05\n+\t --fChange 1.5\n+\t --generate_svg false\n+\t --generate_pdf true\n+\t --generate_ras false\n+\t#end if\n #elif $cond.type_selector == \'dataset_class\':\n --input_data ${input_data}\n --input_class ${input_class}\n+ #if $cond.advanced.choice == \'true\':\n+ \t --none ${cond.advanced.None}\n+ \t --pValue ${cond.advanced.pValue}\n+ \t --fChange ${cond.advanced.fChange}\n+\t --generate_svg ${cond.advanced.generateSvg}\n+\t --generate_pdf ${cond.advanced.generatePdf}\n+\t --generate_ras ${cond.advanced.generateRas}\n+\t#else \n+\t --none true\n+\t --pValue 0.05\n+\t --fChange 1.5\n+\t --generate_svg false\n+\t --generate_pdf true\n+\t --generate_ras false\n+\t#end if\n #end if\n #if $cond.type_selector == \'datasets_rasonly\':\n --input_datas ${input_Datasets}\n --single_ras_file $ras_single\n+ --none ${cond.advanced.None}\n #end if\n ]]>\n </command>\n@@ -94,48 +111,66 @@\n <param name="input" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" />\t\n <param name="input_name" argument="--names" type="text" label="Dataset\'s name:" value="Dataset" help="Default: Dataset" />\n </repeat>\n+ <conditional name="advanced">\n+\t\t\t\t\t<param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom rules for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps.">\n+\t\t \t\t\t<option value="true" selected="true">No</option>\n+\t\t \t\t\t<option value="false">Yes</option>\n+\t\t\t\t\t</param>\n+\t\t\t\t\t<when value="false">\n+\t\t\t\t\t</when>\n+\t\t\t\t\t<when value="true">\n+\t\t \t\t\t<param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> \n+\t\t \t\t\t<param name="pValue" argument="--pValue" type="float" size="20" value="0.01" max="1" min="0" label="P-value threshold:" help="min value 0" />\n+\t\t \t\t\t<param name="fChange" argument="--fChange" type="float" size="20" value="1.2" min="1" label="Fold-Change threshold:" help="min value 1" />\n+\t\t \t\t\t<param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" />\n+\t\t \t\t\t<param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the pro'..b'one" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> \n+\t\t \t\t\t<param name="pValue" argument="--pValue" type="float" size="20" value="0.01" max="1" min="0" label="P-value threshold:" help="min value 0" />\n+\t\t \t\t\t<param name="fChange" argument="--fChange" type="float" size="20" value="1.2" min="1" label="Fold-Change threshold:" help="min value 1" />\n+\t\t \t\t\t<param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" />\n+\t\t \t\t\t<param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" />\t\n+\t\t \t\t\t<param name="generateRas" argument="--generateRas" type="boolean" checked="false" label="Generate Reaction Activity Score for each table" help="Generate Reaction Activity Score for each table" />\t\t\n+\t\t\t\t\t</when>\n+ \t</conditional>\n </when>\n </conditional>\n \n- \n- <!--TODO: NASCONDERE ADVANCED SE RAS ONLY-->\n+ \n \n-\t<conditional name="advanced">\n-\t\t<param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom rules for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps.">\n-\t\t <option value="true" selected="true">No</option>\n-\t\t <option value="false">Yes</option>\n-\t\t</param>\n-\t\t<when value="false">\n-\t\t</when>\n-\t\t<when value="true">\n-\t\t <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> \n-\t\t <param name="pValue" argument="--pValue" type="float" size="20" value="0.05" max="1" min="0" label="P-value threshold:" help="min value 0" />\n-\t\t <param name="fChange" argument="--fChange" type="float" size="20" value="1.5" min="1" label="Fold-Change threshold:" help="min value 1" />\n-\t\t <param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" />\n-\t\t <param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" />\t\n-\t\t <param name="generateRas" argument="--generateRas" type="boolean" checked="false" label="Generate Reaction Activity Score for each table" help="Generate Reaction Activity Score for each table" />\t\t\n-\t\t</when>\n- \t</conditional>\n+\t\n </inputs>\n \n <outputs>\n- <data format="txt" name="log" label="${tool.name} - Log" />\n- <data format="tabular" name="ras_single" label="${tool.name} - RAS">\n+ <data format="txt" name="log" label="MaREA - Log" />\n+ <data format="tabular" name="ras_single" label="MaREA - RAS - ${cond.input_name}">\n \t<filter>cond[\'type_selector\'] == "datasets_rasonly"</filter>\n </data>\n- <collection name="results" type="list" label="${tool.name} - Results">\n+ <collection name="results" type="list" label="MaREA - Results">\n <filter>cond[\'type_selector\'] == "datasets" or cond[\'type_selector\'] == "dataset_class"</filter>\n <discover_datasets pattern="__name_and_ext__" directory="result"/>\n </collection>\n-\t<collection name="ras" type="list" label="${tool.name} - RAS list" format_source="tabular">\n-\t <filter>advanced[\'choice\'] and advanced[\'generateRas\']</filter>\n+\t<collection name="ras" type="list" label="MaREA - RAS list" format_source="tabular">\n+\t <filter>cond[\'type_selector\'] != "datasets_rasonly" and cond[\'advanced\'][\'choice\'] and cond[\'advanced\'][\'generateRas\']</filter>\n \t <discover_datasets pattern="__name_and_ext__" directory="ras" format="tabular"/>\n \t</collection>\n \t\n' |
b |
diff -r e88efefbd015 -r 944e15aa970a Marea/marea_cluster.py --- a/Marea/marea_cluster.py Tue Oct 15 12:21:16 2019 -0400 +++ b/Marea/marea_cluster.py Tue Oct 15 12:22:43 2019 -0400 |
[ |
@@ -176,6 +176,10 @@ scores = [] all_labels = [] + clusterer = KMeans(n_clusters=1, random_state=10) + distortions.append(clusterer.fit(dataset).inertia_) + + for n_clusters in range_n_clusters: clusterer = KMeans(n_clusters=n_clusters, random_state=10) cluster_labels = clusterer.fit_predict(dataset) @@ -227,8 +231,10 @@ def elbow_plot (distortions, k_min, k_max): plt.figure(0) - plt.plot(range(k_min, k_max+1), distortions, marker = 'o') - plt.xlabel('Number of cluster') + x = list(range(k_min, k_max + 1)) + x.insert(0, 1) + plt.plot(x, distortions, marker = 'o') + plt.xlabel('Number of clusters (k)') plt.ylabel('Distortion') s = 'clustering/elbow_plot.png' fig = plt.gcf() |
b |
diff -r e88efefbd015 -r 944e15aa970a Marea/marea_cluster.xml --- a/Marea/marea_cluster.xml Tue Oct 15 12:21:16 2019 -0400 +++ b/Marea/marea_cluster.xml Tue Oct 15 12:22:43 2019 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="MaREA_cluester" name="Cluster Analysis" version="1.0.5"> +<tool id="MaREA_cluester" name="Cluster Analysis" version="1.0.6"> <description></description> <macros> <import>marea_macros.xml</import> @@ -38,7 +38,7 @@ ]]> </command> <inputs> - <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" /> + <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" /> <conditional name="data"> <param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:"> @@ -47,8 +47,8 @@ <option value="hierarchy">Agglomerative Hierarchical</option> </param> <when value="kmeans"> - <param name="k_min" argument="--k_min" type="integer" min="1" max="99" value="3" label="Min number of clusters (k) to be tested" /> - <param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" /> + <param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" /> + <param name="k_max" argument="--k_max" type="integer" min="2" max="20" value="3" label="Max number of clusters (k) to be tested" /> <param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/> <param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/> </when> |