Previous changeset 32:b795e3e163e0 (2019-10-16) Next changeset 34:1a97d1537623 (2019-10-26) |
Commit message:
Uploaded |
modified:
Marea/marea.xml Marea/marea_cluster.py Marea/marea_cluster.xml |
b |
diff -r b795e3e163e0 -r abf0bfe01c78 Marea/marea.xml --- a/Marea/marea.xml Wed Oct 16 07:12:37 2019 -0400 +++ b/Marea/marea.xml Wed Oct 16 16:25:56 2019 -0400 |
[ |
b'@@ -22,11 +22,11 @@\n --custom_map $cond_rule.cond_map.Custom_map\n #end if\n #end if\n-\n+\t\n \t--tool_dir $__tool_directory__\n \t--option $cond.type_selector\n- --out_log $log\n-\n+ --out_log $log\t\t\n+\t\n #if $cond.type_selector == \'datasets\':\n --input_datas\n #for $data in $cond.input_Datasets:\n@@ -43,7 +43,7 @@\n \t \t--generate_svg ${cond.advanced.generateSvg}\n \t \t--generate_pdf ${cond.advanced.generatePdf}\n \t --generate_ras ${cond.advanced.generateRas}\n-\t#else\n+\t#else \n \t --none true\n \t --pValue 0.05\n \t --fChange 1.5\n@@ -61,7 +61,7 @@\n \t --generate_svg ${cond.advanced.generateSvg}\n \t --generate_pdf ${cond.advanced.generatePdf}\n \t --generate_ras ${cond.advanced.generateRas}\n-\t#else\n+\t#else \n \t --none true\n \t --pValue 0.05\n \t --fChange 1.5\n@@ -73,7 +73,7 @@\n #if $cond.type_selector == \'datasets_rasonly\':\n --input_datas ${input_Datasets}\n --single_ras_file $ras_single\n- --none ${None}\n+ --none ${cond.None}\n #end if\n ]]>\n </command>\n@@ -108,56 +108,56 @@\n </param>\n <when value="datasets">\n <repeat name="input_Datasets" title="RNAseq" min="2">\n- <param name="input" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" />\n+ <param name="input" argument="--input_datas" type="data" format="tabular, csv, tsv" label="add dataset" />\t\n <param name="input_name" argument="--names" type="text" label="Dataset\'s name:" value="Dataset" help="Default: Dataset" />\n </repeat>\n <conditional name="advanced">\n-\t\t\t\t\t<param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom rules for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps.">\n+\t\t\t\t\t<param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom parameters for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps.">\n \t\t \t\t\t<option value="true" selected="true">No</option>\n \t\t \t\t\t<option value="false">Yes</option>\n \t\t\t\t\t</param>\n \t\t\t\t\t<when value="false">\n \t\t\t\t\t</when>\n \t\t\t\t\t<when value="true">\n-\t\t \t\t\t<param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" />\n+\t\t \t\t\t<param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> \n \t\t \t\t\t<param name="pValue" argument="--pValue" type="float" size="20" value="0.01" max="1" min="0" label="P-value threshold:" help="min value 0" />\n \t\t \t\t\t<param name="fChange" argument="--fChange" type="float" size="20" value="1.2" min="1" label="Fold-Change threshold:" help="min value 1" />\n \t\t \t\t\t<param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" />\n-\t\t \t\t\t<param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" />\n-\t\t \t\t\t<param name="generateRas" argument="--generateRas" type="boolean" checked="false" label="Generate Reaction Activity Score for each table" help="Generate Reaction Activity Score for each table" />\n+\t\t \t\t\t<param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" />\t\n+\t\t \t\t\t<param name="generateRas" argument="--generateRas" type="boolean" checked="false" label="Generate Reaction Activity Score for each table" help="G'..b'tions to choose custom rules for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps.">\n+\t\t\t\t\t<param name="choice" type="boolean" checked="false" label="Use advanced options?" help="Use this options to choose custom parameters for evaluation: pValue, Fold-Change threshold, how to solve (A and NaN) and specify output maps.">\n \t\t \t\t\t<option value="true" selected="true">No</option>\n \t\t \t\t\t<option value="false">Yes</option>\n \t\t\t\t\t</param>\n \t\t\t\t\t<when value="false">\n \t\t\t\t\t</when>\n \t\t\t\t\t<when value="true">\n-\t\t \t\t\t<param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" />\n+\t\t \t\t\t<param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" /> \n \t\t \t\t\t<param name="pValue" argument="--pValue" type="float" size="20" value="0.01" max="1" min="0" label="P-value threshold:" help="min value 0" />\n \t\t \t\t\t<param name="fChange" argument="--fChange" type="float" size="20" value="1.2" min="1" label="Fold-Change threshold:" help="min value 1" />\n \t\t \t\t\t<param name="generateSvg" argument="--generateSvg" type="boolean" checked="false" label="Generate SVG map" help="should the program generate an editable svg map of the processes?" />\n-\t\t \t\t\t<param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" />\n-\t\t \t\t\t<param name="generateRas" argument="--generateRas" type="boolean" checked="false" label="Generate Reaction Activity Score for each table" help="Generate Reaction Activity Score for each table" />\n+\t\t \t\t\t<param name="generatePdf" argument="--generatePdf" type="boolean" checked="true" label="Generate PDF map" help="should the program return a non editable (but displayble) pdf map of the processes?" />\t\n+\t\t \t\t\t<param name="generateRas" argument="--generateRas" type="boolean" checked="false" label="Generate Reaction Activity Score for each table" help="Generate Reaction Activity Score for each table" />\t\t\n \t\t\t\t\t</when>\n \t</conditional>\n </when>\n </conditional>\n-\n-\n-\n-\n+ \n+ \n+ \n+\t\n </inputs>\n \n <outputs>\n@@ -173,7 +173,7 @@\n \t <filter>cond[\'type_selector\'] != "datasets_rasonly" and cond[\'advanced\'][\'choice\'] and cond[\'advanced\'][\'generateRas\']</filter>\n \t <discover_datasets pattern="__name_and_ext__" directory="ras" format="tabular"/>\n \t</collection>\n-\n+\t\n </outputs>\n <tests>\n <test>\n@@ -189,7 +189,7 @@\n \n This tool analyzes RNA-seq dataset(s) as described in Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724.\n \n-Accepted files are:\n+Accepted files are: \n - option 1) two or more RNA-seq datasets, each referring to samples in a given condition/class. The user can specify a label for each class (as e.g. "*classA*" and "*classB*");\n - option 2) one RNA dataset and one class-file specifying the class/condition each sample belongs to.\n \n@@ -225,7 +225,7 @@\n \n **"RNAseq of group 1 + RNAseq of group 2 + ... + RNAseq of group N"** option:\n \n-RNA-seq Dataset 1:\n+RNA-seq Dataset 1:\t\t\t\t\t\t\n \n @DATASET_EXEMPLE1@\n \n@@ -241,14 +241,14 @@\n \n Class-file:\n \n-+------------+------------+\n-| Patient_ID | class |\n-+============+============+\n-| TCGAAA3529 | MSI |\n-+------------+------------+\n-| TCGAA62671 | MSS |\n-+------------+------------+\n-| TCGAA62672 | MSI |\n++------------+------------+ \n+| Patient_ID | class | \n++============+============+ \n+| TCGAAA3529 | MSI | \n++------------+------------+ \n+| TCGAA62671 | MSS | \n++------------+------------+ \n+| TCGAA62672 | MSI | \n +------------+------------+\n \n |\n@@ -271,3 +271,4 @@\n </help>\n <expand macro="citations" />\n </tool>\n+\t\n' |
b |
diff -r b795e3e163e0 -r abf0bfe01c78 Marea/marea_cluster.py --- a/Marea/marea_cluster.py Wed Oct 16 07:12:37 2019 -0400 +++ b/Marea/marea_cluster.py Wed Oct 16 16:25:56 2019 -0400 |
[ |
@@ -72,11 +72,11 @@ help = 'your tool directory') parser.add_argument('-ms', '--min_samples', - type = int, + type = float, help = 'min samples for dbscan (optional)') parser.add_argument('-ep', '--eps', - type = int, + type = float, help = 'eps for dbscan (optional)') parser.add_argument('-bc', '--best_cluster', @@ -310,7 +310,7 @@ ######################## dbscan ############################################## -def dbscan(dataset, eps, min_samples): +def dbscan(dataset, eps, min_samples, best_cluster): if not os.path.exists('clustering'): os.makedirs('clustering') @@ -331,12 +331,15 @@ ##TODO: PLOT SU DBSCAN (no centers) e HIERARCHICAL - - write_to_csv(dataset, labels, 'clustering/dbscan_results.tsv') + labels = labels + predict = [x+1 for x in labels] + classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str) + classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class']) + ########################## hierachical ####################################### -def hierachical_agglomerative(dataset, k_min, k_max): +def hierachical_agglomerative(dataset, k_min, k_max, best_cluster): if not os.path.exists('clustering'): os.makedirs('clustering') @@ -349,16 +352,28 @@ range_n_clusters = [i for i in range(k_min, k_max+1)] - for n_clusters in range_n_clusters: - + scores = [] + labels = [] + for n_clusters in range_n_clusters: cluster = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean', linkage='ward') cluster.fit_predict(dataset) cluster_labels = cluster.labels_ - + labels.append(cluster_labels) silhouette_avg = silhouette_score(dataset, cluster_labels) write_to_csv(dataset, cluster_labels, 'clustering/hierarchical_with_' + str(n_clusters) + '_clusters.tsv') + scores.append(silhouette_avg) #warning("For n_clusters =", n_clusters, #"The average silhouette_score is :", silhouette_avg) + + best = max_index(scores) + k_min + + for i in range(len(labels)): + if (i + k_min == best): + labels = labels[i] + predict = [x+1 for x in labels] + classe = (pd.DataFrame(list(zip(dataset.index, predict)))).astype(str) + classe.to_csv(best_cluster, sep = '\t', index = False, header = ['Patient_ID', 'Class']) + @@ -390,10 +405,10 @@ kmeans(args.k_min, args.k_max, X, args.elbow, args.silhouette, args.davies, args.best_cluster) if args.cluster_type == 'dbscan': - dbscan(X, args.eps, args.min_samples) + dbscan(X, args.eps, args.min_samples, args.best_cluster) if args.cluster_type == 'hierarchy': - hierachical_agglomerative(X, args.k_min, args.k_max) + hierachical_agglomerative(X, args.k_min, args.k_max, args.best_cluster) ############################################################################## |
b |
diff -r b795e3e163e0 -r abf0bfe01c78 Marea/marea_cluster.xml --- a/Marea/marea_cluster.xml Wed Oct 16 07:12:37 2019 -0400 +++ b/Marea/marea_cluster.xml Wed Oct 16 16:25:56 2019 -0400 |
[ |
@@ -1,4 +1,4 @@ -<tool id="MaREA_cluester" name="Cluster Analysis" version="1.0.6"> +<tool id="MaREA_cluester" name="Cluster Analysis" version="1.0.7"> <description></description> <macros> <import>marea_macros.xml</import> @@ -75,9 +75,10 @@ <outputs> <data format="txt" name="log" label="${tool.name} - Log" /> - <data format="tabular" name="best_cluster" label="${tool.name} - Best cluster" /> + <data format="tabular" name="best_cluster" label="${tool.name} - best cluster assignment" /> <collection name="results" type="list" label="${tool.name} - Plots and results"> <discover_datasets pattern="__name_and_ext__" directory="clustering"/> + <filter>data['clust_type'] == "kmeans" or data['clust_type'] == "hierarchy"</filter> </collection> </outputs> <help> |