# HG changeset patch # User azomics # Date 1596056633 14400 # Node ID b5453d07f740507abef04282174341e4b5ea2153 # Parent 8283ff163ba6372004e7e195ceaaa730d26339ae "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/flow_overview commit 65373effef15809f3db0e5f9603ef808f4110aa3" diff -r 8283ff163ba6 -r b5453d07f740 color_palette.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/color_palette.py Wed Jul 29 17:03:53 2020 -0400 @@ -0,0 +1,48 @@ +###################################################################### +# Copyright (c) 2016 Northrop Grumman. +# All rights reserved. +###################################################################### + +color_palette = [ + '#000000', # Black 0 + '#FF0000', # Red 1 + '#FFFF00', # Yellow 2 + '#008000', # Dark Green 3 + '#0000FF', # Blue 4 + '#FFA500', # Orange 5 + '#8A2BE2', # BlueViolet 6 + '#808000', # Olive 7 + '#00FFFF', # Cyan 8 + '#FF00FF', # Magenta 9 + '#00FF00', # Green 10 + '#000080', # Navy Blue 11 + '#F08080', # Light Coral 12 + '#800080', # Purple 13 + '#F0E68C', # Khaki 14 + '#8FBC8F', # Dark Sea Green 15 + '#2F4F4F', # Dark Slate Grey 16 + '#008080', # Teal 17 + '#9932CC', # Dark Orchid 18 + '#FF7F50', # Coral 19 + '#FFD700', # Gold 20 + '#008B8B', # Cyan 4 21 + '#800000', # Maroon 22 + '#5F9EA0', # Cadet Blue 23 + '#FFC0CB', # Pink 24 + '#545454', # Grey 25 + '#7FFFD4', # Aquamarine 26 + '#ADD8E6', # Light Blue 27 + '#DB7093', # Medium Violet Red 28 + '#CD853F', # Tan 3 29 + '#4169E1', # Royal Blue 30 + '#708090', # Slate Grey 31 + '#4682B4', # Steel Blue 32 + '#D8BFD8', # Thistle 33 + '#F5DEB3', # Wheat 34 + '#9ACD32', # Yellow Green 35 + '#BDB76B', # Dark Khaki 36 + '#8B008B', # Magenta 4 37 + '#556B2F', # Dark Olive Green 38 + '#00CED1', # Dark Turquoise 39 + '#FF1493' # Deep Pink +] diff -r 8283ff163ba6 -r b5453d07f740 css/parallelCoordinates.css --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/css/parallelCoordinates.css Wed Jul 29 17:03:53 2020 -0400 @@ -0,0 +1,170 @@ +/*Chart*/ +.chartDiv { + margin-top: 10px; + margin-bottom: 10px; + margin-left: 10px; + margin-right: 10px; + background-color: #ffffff; +} +/*This for stacked / bar plot*/ +.optionButtons{ + padding-bottom: 8px; +} +.igbtn { + border-radius: 4px; + color: white; + background-color: #0E3163; + border:1px; + font-size: 14px; + font-weight: bold; + border-color: #0E3163; + padding: 6px 12px; + background: -webkit-linear-gradient(#0E3163, #3e6db0); + background: -o-linear-gradient(#0E3163, #3e6db0); + background: -moz-linear-gradient(#0E3163, #3e6db0); + background: linear-gradient(#0E3163, #3e6db0); +} + #togglePlot { + display: block; + margin-left: auto; + margin-right: auto; + height: 60px; + width: 90px; + padding: 4px 8px 8px 4px; +} +/*DataTables config*/ +div.dt-buttons { + float: right; +} +th { + background-color:#3965A4; + color:#fff; + border-radius: 2px; + padding:6px 12px; + font-size:14px; + text-align: center; +} +.igtable th { + background: -webkit-linear-gradient(#0E3163, #3e6db0); + background: -o-linear-gradient(#0E3163, #3e6db0); + background: -moz-linear-gradient(#0E3163, #3e6db0); + background: linear-gradient(#0E3163, #3e6db0) +} +body td { + max-width: 50px; + font-size: 14px; + overflow: hidden; + text-overflow: ellipsis; +} +.pctable td { + max-width: 100px; +} +.dtable td { + max-width: 50px; + word-break: break-all; + word-wrap: break-word; +} +.popt thead { + display:none; +} +.popt td { + max-width: 40px; + word-break: break-all; + word-wrap: break-word; +} +div.dataTables_wrapper { + width: 95%; + margin: 0 auto; +} +/*Opacity widget*/ +.widget { + margin-top: 4px; + padding: 4px 4px 8px 4px; + margin-bottom: 10px + height: 36px; + line-height: 12px; + font-size: 12px; + text-align: center; + // float: right; +} +/*This section for coordinates plot*/ +.table th { + text-align: center; + padding: 10px; +} +.table { + width:100%; + border:0px solid; + border-collapse:collapse; + text-align:center; + font-size: 14px; +} +svg { + font: 10px sans-serif; +} +.background path { + fill: none; + stroke: #F0F0F0; + shape-rendering: crispEdges; +} +.foreground path { + fill: none; + stroke-opacity: .8; +} +.brush .extent { + fill-opacity: .3; + stroke: #fff; + shape-rendering: crispEdges; +} +.axis line, +.axis path { + fill: none; + stroke: #000; + stroke-width: 2; + shape-rendering: crispEdges; +} +.axis text { + text-shadow: 0 1px 0 #fff, 1px 0 0 #fff, 0 -1px 0 #fff, -1px 0 0 #fff; +} +.xlabel { + font-size: 14px; + fill: #808080; +} +.ylabel { + font-size: 16px; + fill: #808080; +} +/*This section for boxplots*/ +.box { + font: 10px sans-serif; +} +.box line, +.box rect, +.box circle { + fill: red; + stroke: #000; + stroke-width: 1px; +} +.box .center { + stroke-dasharray: 3,3; +} +.box .outlier { + fill: none; + stroke: #000; +} +.axisbp path, +.axisbp line { + stroke-width: 1px; + fill: +} +.y.axisbp path { + stroke-width: 2; + stroke: #000; + fill: none; +} +.x.axisbp path { + fill: none; + stroke: #C8C8C8; + stroke-width: 0px; + shape-rendering: crispEdges; +} diff -r 8283ff163ba6 -r b5453d07f740 flow_overview/color_palette.py --- a/flow_overview/color_palette.py Mon Feb 27 12:54:37 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -###################################################################### -# Copyright (c) 2016 Northrop Grumman. -# All rights reserved. -###################################################################### - -color_palette = [ - '#000000', # Black 0 - '#FF0000', # Red 1 - '#FFFF00', # Yellow 2 - '#008000', # Dark Green 3 - '#0000FF', # Blue 4 - '#FFA500', # Orange 5 - '#8A2BE2', # BlueViolet 6 - '#808000', # Olive 7 - '#00FFFF', # Cyan 8 - '#FF00FF', # Magenta 9 - '#00FF00', # Green 10 - '#000080', # Navy Blue 11 - '#F08080', # Light Coral 12 - '#800080', # Purple 13 - '#F0E68C', # Khaki 14 - '#8FBC8F', # Dark Sea Green 15 - '#2F4F4F', # Dark Slate Grey 16 - '#008080', # Teal 17 - '#9932CC', # Dark Orchid 18 - '#FF7F50', # Coral 19 - '#FFD700', # Gold 20 - '#008B8B', # Cyan 4 21 - '#800000', # Maroon 22 - '#5F9EA0', # Cadet Blue 23 - '#FFC0CB', # Pink 24 - '#545454', # Grey 25 - '#7FFFD4', # Aquamarine 26 - '#ADD8E6', # Light Blue 27 - '#DB7093', # Medium Violet Red 28 - '#CD853F', # Tan 3 29 - '#4169E1', # Royal Blue 30 - '#708090', # Slate Grey 31 - '#4682B4', # Steel Blue 32 - '#D8BFD8', # Thistle 33 - '#F5DEB3', # Wheat 34 - '#9ACD32', # Yellow Green 35 - '#BDB76B', # Dark Khaki 36 - '#8B008B', # Magenta 4 37 - '#556B2F', # Dark Olive Green 38 - '#00CED1', # Dark Turquoise 39 - '#FF1493' # Deep Pink -] diff -r 8283ff163ba6 -r b5453d07f740 flow_overview/flowstatlib.py --- a/flow_overview/flowstatlib.py Mon Feb 27 12:54:37 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,100 +0,0 @@ -###################################################################### -# Copyright (c) 2016 Northrop Grumman. -# All rights reserved. -###################################################################### -from __future__ import print_function -import sys -import pandas as pd -from scipy.stats import gmean -from argparse import ArgumentParser - - -def gen_overview_stats(file_name): - flow_stats = {} - fcs = pd.read_table(file_name) - (events, columns) = fcs.shape - flow_stats['fcs'] = fcs - flow_stats['events'] = events - flow_stats['columns'] = columns - 1 - flow_stats['data'] = fcs.iloc[:, :-1] - flow_stats['population'] = fcs.iloc[:, -1:].iloc[:, 0] - flow_stats['population_freq'] = flow_stats['population'].value_counts() - flow_stats['population_sample'] = (flow_stats['population_freq'] * (20000/float(events))).round(decimals=0) - flow_stats['population_freq_sort'] = flow_stats['population_freq'].sort_index() - flow_stats['population_per'] = (flow_stats['population'].value_counts(normalize=True) * 100).round(decimals=2) - flow_stats['population_per_sort'] = flow_stats['population_per'].sort_index() - flow_stats['population_all'] = pd.concat([flow_stats['population_freq_sort'], flow_stats['population_per_sort']], axis=1) - flow_stats['population_all'].columns = ['Count', 'Percentage'] - flow_stats['min'] = flow_stats['data'].values.min() - flow_stats['max'] = flow_stats['data'].values.max() - flow_stats['markers'] = list(flow_stats['data'].columns) - flow_stats['mfi'] = fcs.groupby('Population').mean().round(decimals=2) - flow_stats['mfi_pop'] = pd.merge(flow_stats['mfi'], flow_stats['population_all'], left_index=True, right_index=True) - flow_stats['mfi_pop']['Population'] = flow_stats['mfi_pop'].index - flow_stats['gmfi'] = fcs.groupby('Population').agg(lambda x: gmean(list(x))).round(decimals=2) - flow_stats['gmfi_pop'] = pd.merge(flow_stats['gmfi'], flow_stats['population_all'], left_index=True, right_index=True) - flow_stats['gmfi_pop']['Population'] = flow_stats['gmfi_pop'].index - flow_stats['mdfi'] = fcs.groupby('Population').median().round(decimals=2) - flow_stats['mdfi_pop'] = pd.merge(flow_stats['mdfi'], flow_stats['population_all'], left_index=True, right_index=True) - flow_stats['mdfi_pop']['Population'] = flow_stats['mdfi_pop'].index - - # - # If the number of events is less than 20000, then return - # the complete data set, - # Otherwise sample the data to only return 20000 events. - if events <= 20000: - flow_stats['sample'] = fcs - else: - fcs_np = fcs.values - sample_data = [] - pop_found = {} - for i in range(0, events): - population_number = fcs_np[i][columns-1] - if population_number in pop_found: - if pop_found[population_number] < flow_stats['population_sample'][population_number]: - pop_found[population_number] += 1 - sample_data.append(fcs_np[i]) - else: - pop_found[population_number] = 1 - sample_data.append(fcs_np[i]) - flow_stats['sample'] = pd.DataFrame(sample_data) - flow_stats['sample'].columns = fcs.columns - - flow_stats['sample_data'] = flow_stats['sample'].iloc[:, :-1] - flow_stats['sample_population'] = flow_stats['sample'].iloc[:, -1:].iloc[:, 0] - - return flow_stats - - -if __name__ == '__main__': - parser = ArgumentParser( - prog="flowstats", - description="Gets statistics on FLOCK run") - - parser.add_argument( - '-i', - dest="input_file", - required=True, - help="File locations for flow clr file.") - - parser.add_argument( - '-o', - dest="out_file", - required=True, - help="Path to the directory for the output file.") - args = parser.parse_args() - - flow_stats = gen_overview_stats(args.input_file) - with open(args.out_file, "w") as outf: - outf.write("Events: ", flow_stats['events']) - outf.write("Min: ", flow_stats['min']) - outf.write("Max: ", flow_stats['max']) - outf.write("Columns: ", flow_stats['columns']) - outf.write("Markers: ", flow_stats['markers']) - outf.write("Population: ", flow_stats['population']) - outf.write("Population Freq: ", flow_stats['population_freq']) - outf.write("Population Sample: ", flow_stats['population_sample']) - outf.write("Population Per: ", flow_stats['population_per']) - outf.write("Sample Data contains ", len(flow_stats['sample']), " events") - outf.write("MIF_POP ", flow_stats['mfi_pop']) - sys.exit(0) diff -r 8283ff163ba6 -r b5453d07f740 flow_overview/genFlowOverview.py --- a/flow_overview/genFlowOverview.py Mon Feb 27 12:54:37 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,330 +0,0 @@ -#!/usr/bin/env python -###################################################################### -# Copyright (c) 2016 Northrop Grumman. -# All rights reserved. -###################################################################### -from __future__ import print_function -import sys -import os -import pandas as pd -import logging -import fileinput - -from argparse import ArgumentParser -from jinja2 import Environment, FileSystemLoader -from collections import defaultdict - -from color_palette import color_palette -from flowstatlib import gen_overview_stats -import matplotlib as mpl -mpl.use('Agg') -import matplotlib.pyplot as plt - - -profile_key = { - "1": "-", - "2": "lo", - "3": "+", - "4": "hi" -} - - -# flow CL functions -def run_flowCL(phenotype, output_txt, output_pdf, tool): - run_command = " ". join(["Rscript --slave --vanilla", tool, "--args", - output_txt, phenotype]) - os.system(run_command) - - get_graph = " ".join(["mv flowCL_results/*.pdf", output_pdf]) - os.system(get_graph) - return - - -def generate_flowCL_query(list_markers, list_types): - if (len(list_markers) != len(list_types)): - return("pb with headers") - query = [] - # go through both lists, remove fsc/ssc - for i in range(1, len(list_markers)): - if not list_markers[i].startswith("FSC") and not list_markers[i].startswith("SSC"): - query.append(list_markers[i].upper()) - query.append(profile_key[list_types[i]]) - # return concatenated string - return("".join(query)) - - -def translate_profiles(input_file, tool_dir, html_dir): - tool = "/".join([tool_dir, "getOntology.R"]) - html_table = "".join([html_dir, "/CLprofiles.txt"]) - score_table = "".join(["cp ", input_file, " ", html_dir, "/scores.txt"]) - os.system(score_table) - - # read profile - with open(input_file, "r") as flock_profiles, open(html_table, "w") as out: - headers = flock_profiles.readline() - headers = headers.strip() - markers = headers.split("\t")[:-2] - counter = 0 - - out.write("Population\tFlowCL Query\tNb Results\tLink to PDF\t") - out.write("Top Result Label\tTop Result Score\tTop Result CL\n") - queries = {} - # create marker query for each population - for lines in flock_profiles: - lines = lines.strip("\n") - pop_profile = lines.split("\t")[:-2] - flowcl_query = generate_flowCL_query(markers, pop_profile) - counter += 1 - nb_results = "0" - top_label = "no_match" - top_score = "NA" - top_CL = "NA" - pdf_link = "NA" - - # check if query was run before - if flowcl_query not in queries: - # create filenames for results & graphs - txt = "".join(["flowcl_pop", str(counter).zfill(2), ".txt"]) - text_result = "/".join([html_dir, txt]) - graph = "".join(["flowcl_pop", str(counter).zfill(2), ".pdf"]) - graph_output = "/".join([html_dir, graph]) - # run flowCL for each marker profile - run_flowCL(flowcl_query, text_result, graph_output, tool) - - # test that text file exists if not results are all NAs: - if os.path.isfile(text_result): - with open(text_result, "r") as res: - for line in res: - if line.startswith("Score"): - data = line.split(") ") - top_score = data[2][:-2] - tot_results = len(data) - 2 - nb_results = str(tot_results) - if tot_results == 5: - if len(data[6].split("+")) > 1: - nb_results = "5+" - elif line.startswith("Cell ID"): - prep_link = line.split(") ")[1][:-2] - cl = prep_link.replace("_", ":") - link = "".join(['']) - top_CL = "".join([link, prep_link, ""]) - elif line.startswith("Cell Label"): - top_label = line.split(") ")[1][:-2] - pdf_link = "".join(['PDF']) - tmpflowcl_query = "".join(['', flowcl_query, '']) - queries[flowcl_query] = { - "query": tmpflowcl_query, - "results": nb_results, - "pdf": pdf_link, - "label": top_label, - "score": top_score, - "CL": top_CL - } - - # write query results to CLprofiles.txt - out.write("\t".join([pop_profile[0], - queries[flowcl_query]["query"], - queries[flowcl_query]["results"], - queries[flowcl_query]["pdf"], - queries[flowcl_query]["label"], - queries[flowcl_query]["score"], - queries[flowcl_query]["CL"]]) + "\n") - - -# boxplots data massaging -def panel_to_json_string(panel): - # from http://stackoverflow.com/questions/28078118/merge-many-json-strings-with-python-pandas-inputs - def __merge_stream(key, stream): - return '"' + key + '"' + ': ' + stream + ', ' - try: - stream = '{' - for item in panel.items: - stream += __merge_stream(item, panel.loc[item, :, :].to_json()) - # take out extra last comma - stream = stream[:-2] - # add the final paren - stream += '}' - except: - logging.exception('Panel Encoding did not work') - return stream - - -def get_outliers(group, upper, lower): - cat = group.name - out = {} - for marker in group: - out[marker] = group[(group[marker] > upper.loc[cat][marker]) | (group[marker] < lower.loc[cat][marker])][marker] - return out - - -def get_boxplot_stats(all_data, mfi_file, output_json): - # modified code from http://bokeh.pydata.org/en/latest/docs/gallery/boxplot.html - df = pd.read_table(all_data) - pops = df.groupby('Population') - q1 = pops.quantile(q=0.25) - q2 = pops.quantile(q=0.5) - q3 = pops.quantile(q=0.75) - iqr = q3 - q1 - upper = q3 + 1.5*iqr - lower = q1 - 1.5*iqr - resampled = False - # get outliers - out = pops.apply(get_outliers, upper, lower).dropna() - outliers = defaultdict(dict) - for population in set(df.Population): - for marker in df.columns: - if marker != 'Population': - tmp_outliers = list(out[population][marker]) - if (len(list(out[population][marker]))> 100): - tmp_outliers = list(out[population][marker].sample(n=100)) - resampled = True - outliers[population][marker] = tmp_outliers - outdf = pd.DataFrame(outliers) - # Get initial MFI values - mfi = pd.read_table(mfi_file) - mfi = mfi.set_index('Population') - - data = {'q1': q1, - 'q2': q2, - 'q3': q3, - 'upper': upper, - 'lower': lower, - 'outliers': outdf.T, - 'mfi': mfi} - wp = pd.Panel(data) - - with open(output_json, "w") as js_all: - js_all.write(panel_to_json_string(wp)) - - return resampled - -# html generation -def gen_flow_overview(args): - flow_stats = gen_overview_stats(args.input_file) - os.mkdir(args.output_directory) - html_template = "genOverview.template" - - if args.scores: - translate_profiles(args.scores, args.tool_directory, args.output_directory) - html_template = "genOverviewCL.template" - - env = Environment(loader=FileSystemLoader(args.tool_directory + "/templates")) - template = env.get_template(html_template) - - real_directory = args.output_directory.replace("/job_working_directory", "") - context = {'outputDirectory': real_directory} - overview = template.render(**context) - with open(args.output_file, "w") as f: - f.write(overview) - - flow_sample_file_name = args.output_directory + "/flow.sample" - with open(flow_sample_file_name, "w") as flow_sample_file: - flow_stats['sample'].to_csv(flow_sample_file, sep="\t", index=False, float_format='%.0f') - - flow_mfi_file_name = args.output_directory + "/flow.mfi" - with open(flow_mfi_file_name, "w") as flow_mfi_file: - flow_stats[args.mfi_calc].to_csv(flow_mfi_file, sep="\t", float_format='%.0f') - - mpop = "_".join([args.mfi_calc, "pop"]) - flow_mfi_pop_file_name = args.output_directory + "/flow.mfi_pop" - with open(flow_mfi_pop_file_name, "w") as flow_mfi_pop_file: - flow_stats[mpop].to_csv(flow_mfi_pop_file, sep="\t", index=False, float_format="%.2f") - - # box plot data - boxplot_data = args.output_directory + "/boxplotData.json" - resampled = get_boxplot_stats(args.input_file, flow_mfi_file_name, boxplot_data) - - # Generate the Images -- eventually we should change that over to D3 - fcm = flow_stats['sample_data'].values - colors = [] - for i, j in enumerate(flow_stats['sample_population']): - colors.append(color_palette[j]) - - for i in range(flow_stats['columns']): - for j in range(flow_stats['columns']): - file_name = "m" + str(i) + "_m" + str(j) - ax = plt.subplot(1, 1, 1) - plt.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=1.0, wspace=0.0, hspace=0.0) - plt.scatter(fcm[:, i], fcm[:, j], s=1, c=colors, edgecolors='none') - plt.axis([0, 1024, 0, 1024]) - plt.xticks([]) - plt.yticks([]) - F = plt.gcf() - F.set_size_inches(1, 1) - F.set_dpi(90) - png_file = file_name + "_90X90.png" - F.savefig(args.output_directory + "/" + png_file) - plt.clf() - - flow_overview_file_name = args.output_directory + "/flow.overview" - with open(flow_overview_file_name, "w") as flow_overview_file: - flow_overview_file.write("
\n") - for i in range(flow_stats['columns']): - flow_overview_file.write(" | " + flow_stats['markers'][i] + " | \n") - - for i in range(flow_stats['columns']): - flow_overview_file.write("
" + flow_stats['markers'][i] + " | \n") - for j in range(flow_stats['columns']): - file_name = "m" + str(j) + "_m" + str(i) - image_file = file_name + "_90X90.png" - flow_overview_file.write('') - flow_overview_file.write(" |