# HG changeset patch # User immport-devteam # Date 1488218599 18000 # Node ID 8547aedf1350120dbf31981a1d6894d8ac06d86b Uploaded diff -r 000000000000 -r 8547aedf1350 profile_cl/getOntology.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/profile_cl/getOntology.R Mon Feb 27 13:03:19 2017 -0500 @@ -0,0 +1,28 @@ +# Cell Ontology Module for Galaxy +# FlowCL +###################################################################### +# Copyright (c) 2016 Northrop Grumman. +# All rights reserved. +###################################################################### +# +# Version 1 +# Cristel Thomas +# +# + +library(flowCL) +library(base) + +getOntology <- function(output_file, markers) { + res <- flowCL(markers, ResetArch = TRUE) + if (length(res) == 6) { + report <- capture.output(res$Table) + sink(output_file) + cat(report, sep = "\n") + sink() + } +} + +args <- commandArgs(trailingOnly = TRUE) +markers <- paste(args[3:length(args)], collapse="") +getOntology(args[2], markers) diff -r 000000000000 -r 8547aedf1350 profile_cl/profileCLs.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/profile_cl/profileCLs.py Mon Feb 27 13:03:19 2017 -0500 @@ -0,0 +1,163 @@ +#!/usr/bin/env python + +###################################################################### +# Copyright (c) 2016 Northrop Grumman. +# All rights reserved. +###################################################################### +from __future__ import print_function +import sys +import os +from argparse import ArgumentParser +from jinja2 import Environment, FileSystemLoader + +profile_key = { + "1": "-", + "2": "lo", + "3": "+", + "4": "hi" +} + + +def run_flowCL(phenotype, output_txt, output_pdf, tool): + run_command = " ". join(["Rscript --slave --vanilla", tool, "--args", output_txt, phenotype]) + os.system(run_command) + get_graph = " ".join(["mv flowCL_results/*.pdf", output_pdf]) + os.system(get_graph) + return + + +def generate_flowCL_query(list_markers, list_types): + if (len(list_markers) != len(list_types)): + return("pb with headers") + query = [] + # go through both lists, remove fsc/ssc + for i in range(1, len(list_markers)): + if not list_markers[i].startswith("FSC") and not list_markers[i].startswith("SSC"): + query.append(list_markers[i].upper()) + query.append(profile_key[list_types[i]]) + # return concatenated string + return("".join(query)) + + +def translate_profiles(input_file, tool_dir, output, html_dir): + os.mkdir(html_dir) + + tool = "/".join([tool_dir, "getOntology.R"]) + html_table = "".join([html_dir, "/CLprofiles.txt"]) + score_table = "".join(["cp ", input_file, " ", html_dir, "/scores.txt"]) + os.system(score_table) + + # read profile + with open(input_file, "r") as flock_profiles, open(html_table, "w") as out: + headers = flock_profiles.readline() + headers = headers.strip() + # get all headers except for last 2 (count + percentage) + markers = headers.split("\t")[:-2] + counter = 0 + + out.write("Population\tFlowCL Query\tNb Results\tLink to PDF\t") + out.write("Top Result Label\tTop Result Score\tTop Result CL\n") + queries = {} + # create marker query for each population + for lines in flock_profiles: + lines = lines.strip("\n") + pop_profile = lines.split("\t")[:-2] + flowcl_query = generate_flowCL_query(markers, pop_profile) + counter += 1 + nb_results = "0" + top_label = "no_match" + top_score = "NA" + top_CL = "NA" + pdf_link = "NA" + # check if query was run before + if flowcl_query not in queries: + # create filenames for results & graphs + txt = "".join(["flowcl_pop", str(counter).zfill(2), ".txt"]) + text_result = "/".join([html_dir, txt]) + graph = "".join(["flowcl_pop", str(counter).zfill(2), ".pdf"]) + graph_output = "/".join([html_dir, graph]) + # run flowCL for each marker profile + run_flowCL(flowcl_query, text_result, graph_output, tool) + + # test that text file exists if not results are all NAs: + if os.path.isfile(text_result): + with open(text_result, "r") as res: + for line in res: + if line.startswith("Score"): + data = line.split(") ") + top_score = data[2][:-2] + tot_results = len(data) - 2 + nb_results = str(tot_results) + if tot_results == 5: + if len(data[6].split("+")) > 1: + nb_results = "5+" + elif line.startswith("Cell ID"): + prep_link = line.split(") ")[1][:-2] + cl = prep_link.replace("_", ":") + link = "".join(['']) + top_CL = "".join([link, prep_link, ""]) + elif line.startswith("Cell Label"): + top_label = line.split(") ")[1][:-2] + pdf_link = "".join(['PDF']) + tmpflowcl_query = "".join(['', flowcl_query, '']) + + queries[flowcl_query] = { + "query": tmpflowcl_query, + "results": nb_results, + "pdf": pdf_link, + "label": top_label, + "score": top_score, + "CL": top_CL + } + # write query results to CLprofiles.txt + out.write("\t".join([pop_profile[0], + queries[flowcl_query]["query"], + queries[flowcl_query]["results"], + queries[flowcl_query]["pdf"], + queries[flowcl_query]["label"], + queries[flowcl_query]["score"], + queries[flowcl_query]["CL"]]) + "\n") + + env = Environment(loader=FileSystemLoader(tool_dir + "/templates")) + template = env.get_template("profileCLs.template") + + real_directory = html_dir.replace("/job_working_directory", "") + context = {'outputDirectory': real_directory} + overview = template.render(**context) + with open(output, "w") as outf: + outf.write(overview) + + +if __name__ == "__main__": + parser = ArgumentParser( + prog="getCLs_from_profile", + description="runs flowCL on a each population defined by FLOCK.") + + parser.add_argument( + '-i', + dest="input_file", + required=True, + help="File location for the profile.txt from FLOCK.") + + parser.add_argument( + '-o', + dest="output", + required=True, + help="Name of the output html file.") + + parser.add_argument( + '-d', + dest="html_dir", + required=True, + help="Path to html supporting directory.") + + parser.add_argument( + '-t', + dest="tool_dir", + required=True, + help="Path to the tool directory") + + args = parser.parse_args() + + translate_profiles(args.input_file, args.tool_dir, args.output, args.html_dir) + sys.exit(0) diff -r 000000000000 -r 8547aedf1350 profile_cl/profileCLs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/profile_cl/profileCLs.xml Mon Feb 27 13:03:19 2017 -0500 @@ -0,0 +1,116 @@ + + with cell ontology + + jinja2 + r + bioconductor-flowcl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10.1093/bioinformatics/btu807 + + diff -r 000000000000 -r 8547aedf1350 profile_cl/static/flowtools/css/flowCL.css --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/profile_cl/static/flowtools/css/flowCL.css Mon Feb 27 13:03:19 2017 -0500 @@ -0,0 +1,34 @@ +/* +** DataTables config +*/ +div.dt-buttons { + float: right; +} +th { + font-size:16px; + text-align: left; +} +body td { + max-width: 50px; + font-size: 14px; + overflow: hidden; + text-overflow: ellipsis; +} +.cltable thead { + display:none; +} +.cltable td { + max-width: 40px; + word-break: break-all; + word-wrap: break-word; +} +.firstcol { + width: 160px; +} +.smallcols { + width: 100px; +} +div.dataTables_wrapper { + width: 95%; + margin: 0 auto; +} diff -r 000000000000 -r 8547aedf1350 profile_cl/static/flowtools/js/profileCLs.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/profile_cl/static/flowtools/js/profileCLs.js Mon Feb 27 13:03:19 2017 -0500 @@ -0,0 +1,85 @@ +// Copyright (c) 2016 Northrop Grumman. +// All rights reserved. + +var cl_table = './CLprofiles.txt'; +var scores_table = './scores.txt'; + +var displayCLTable = function(){ + d3.text(cl_table, function(error, data){ + var clHdgs = [], + clHTML = '', + clTableData = [], + clHeadings = []; + + if (error){ + alert("Problem retrieving data"); + return; + } + clHdgs = data.split("\n")[0].split("\t"); + data = d3.tsv.parse(data); + clTableData = $.extend(true, [], data); + + clHdgs.forEach(function(d,i){ + clHeadings.push({"data" : d, "title" : d}); + }); + + $('#clprofiles').html(clHTML); + var clTable = $('#cltable').DataTable({ + columns: clHeadings, + dom: '<"top"Bi>t<"bottom"lp><"clear">', + pageLength: 25, + order: [[ 0, "asc" ]], + data: clTableData, + buttons: [ + 'copy', 'pdfHtml5','csvHtml5' + ], + columnDefs: [ + { + targets: [0,2,3], + className: "smallcols" + }, + { + targets: 4, + className: "dt-body-left" + }, + { + targets: [5,6], + className: "firstcol" + }] + }); + }); +}; + +var displayScoresTable = function(){ + d3.text(scores_table, function(error, data){ + var scoreHTML = '
', + scoreHdgs = [], + scoreTableData = [], + scoreHeadings = []; + if (error){ + alert("Problem retrieving data"); + return; + } + scoreHdgs = data.split("\n")[0].split("\t"); + data = d3.tsv.parse(data); + + $('#scores').html(scoreHTML); + + scoreTableData = $.extend(true, [], data); + + scoreHdgs.forEach(function(d,i){ + scoreHeadings.push({"data" : d, "title" : d}); + }); + + var scoreTable = $('#scoretable').DataTable({ + columns: scoreHeadings, + pageLength: 25, + order: [[ 0, "asc" ]], + dom: '<"top"Bi>t<"bottom"lp><"clear">', + data: scoreTableData, + buttons: [ + 'copy', 'pdfHtml5','csvHtml5' + ], + }); + }); +}; diff -r 000000000000 -r 8547aedf1350 profile_cl/static/images/flowtools/popprofiles.png Binary file profile_cl/static/images/flowtools/popprofiles.png has changed diff -r 000000000000 -r 8547aedf1350 profile_cl/static/images/flowtools/profileCL.png Binary file profile_cl/static/images/flowtools/profileCL.png has changed diff -r 000000000000 -r 8547aedf1350 profile_cl/templates/profileCLs.template --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/profile_cl/templates/profileCLs.template Mon Feb 27 13:03:19 2017 -0500 @@ -0,0 +1,91 @@ + + + +FLOCK Populations Ontology + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ + +
+ + + + + + diff -r 000000000000 -r 8547aedf1350 profile_cl/test-data/CLprofiles.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/profile_cl/test-data/CLprofiles.txt Mon Feb 27 13:03:19 2017 -0500 @@ -0,0 +1,4 @@ +Population FlowCL Query Nb Results Link to PDF Top Result Label Top Result Score Top Result CL +1 CD4+CCR3-CD8-CCR7- 5+ PDF effector CD4-positive, alpha-beta T cell 0.167 CL_0001044 +2 CD4+CCR3-CD8-CCR7- 5+ PDF effector CD4-positive, alpha-beta T cell 0.167 CL_0001044 +3 CD4+CCR3-CD8+CCR7lo 5+ PDF mature CD8_alpha-negative CD11b-positive dendritic cell 2)" -0.357 CL_0001003 diff -r 000000000000 -r 8547aedf1350 profile_cl/test-data/flowcl_pop01.pdf Binary file profile_cl/test-data/flowcl_pop01.pdf has changed diff -r 000000000000 -r 8547aedf1350 profile_cl/test-data/flowcl_pop01.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/profile_cl/test-data/flowcl_pop01.txt Mon Feb 27 13:03:19 2017 -0500 @@ -0,0 +1,44 @@ + [,1] +Short marker names "CD4+CCR3-CD8-CCR7-" +Ontology marker names "CD4 molecule, C-C chemokine receptor type 3, T cell receptor" + "co-receptor CD8, C-C chemokine receptor type 7" +Experiment markers "CD4,CCR3,CD8,CCR7" +Ontology exper. names "CD4 molecule, C-C chemokine receptor type 3, T cell receptor" + "co-receptor CD8, C-C chemokine receptor type 7" +Successful Match? "No" +Marker ID "1) PR_000001004, PR_000025402, PR_000001203 2) PR_000001004," + "PR_000025402, PR_000001203 3) PR_000001004, PR_000025402," + "PR_000001203 4) PR_000001004, PR_000025402 5) PR_000001004," + "PR_000025402 + more" +Marker Label "1) CD4 molecule, T cell receptor co-receptor CD8, C-C chemokine" + "receptor type 7 2) CD4 molecule, T cell receptor co-receptor CD8, C-C" + "chemokine receptor type 7 3) CD4 molecule, T cell receptor" + "co-receptor CD8, C-C chemokine receptor type 7 4) CD4 molecule, T" + "cell receptor co-receptor CD8 5) CD4 molecule, T cell receptor" + "co-receptor CD8 + more" +Marker Key "1) { CCR3- } CD8-, CCR7-, CD4+ ( ) [ alpha-beta T cell receptor" + "complex+, receptor-type tyrosine-protein phosphatase C isoform" + "CD45RA+, CD3+ ] 2) { CCR3- } CD8-, CCR7-, CD4+ ( ) [ alpha-beta T" + "cell receptor complex+, C-X-C chemokine receptor type 5+, inducible" + "T-cell costimulator+, programmed cell death protein 1+, CD3+ ] 3) {" + "CCR3- } CD8-, CCR7-, CD4+ ( ) [ interleukin-2 receptor subunit" + "alpha-, receptor-type tyrosine-protein phosphatase C isoform CD45RA-," + "alpha-beta T cell receptor complex+, receptor-type tyrosine-protein" + "phosphatase C isoform CD45RO+, interleukin-7 receptor subunit alpha+," + "CD3+, CD44 moleculehi, interleukin-2 receptor subunit betahi ] 4) {" + "CCR3-, CCR7- } CD8-, CD4+ ( ) [ L-selectin-, interleukin-7 receptor" + "subunit alpha-, alpha-beta T cell receptor complex+, CD69 molecule+," + "interleukin-2 receptor subunit alpha+, ADP-ribosyl cyclase 1+, MHC" + "class II histocompatibility antigen alpha chain HLA-DRA+, CD3+ ] 5) {" + "CCR3-, CCR7- } CD8-, CD4+ ( ) [ interleukin-2 receptor subunit" + "alpha-, alpha-beta T cell receptor complex+, receptor-type" + "tyrosine-protein phosphatase C isoform CD45RO+, interleukin-7" + "receptor subunit alpha+, CD3+, CD44 moleculehi, interleukin-2" + "receptor subunit betahi ] + more" +Score (Out of 1) "1) 0.167 2) 0.125 3) 0.091 4) -0.2 5) -0.222 + more" +Cell ID "1) CL_0001044 2) CL_0002038 3) CL_0000905 4) CL_0001043 5) CL_0000897" + "+ more" +Cell Label "1) effector CD4-positive, alpha-beta T cell 2) T follicular helper" + "cell 3) effector memory CD4-positive, alpha-beta T cell 4) activated" + "CD4-positive, alpha-beta T cell, human 5) CD4-positive, alpha-beta" + "memory T cell + more" diff -r 000000000000 -r 8547aedf1350 profile_cl/test-data/flowcl_pop03.pdf Binary file profile_cl/test-data/flowcl_pop03.pdf has changed diff -r 000000000000 -r 8547aedf1350 profile_cl/test-data/flowcl_pop03.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/profile_cl/test-data/flowcl_pop03.txt Mon Feb 27 13:03:19 2017 -0500 @@ -0,0 +1,43 @@ + [,1] +Short marker names "CD4+CCR3-CD8+CCR7lo" +Ontology marker names "CD4 molecule, C-C chemokine receptor type 3, T cell receptor" + "co-receptor CD8, C-C chemokine receptor type 7" +Experiment markers "CD4,CCR3,CD8,CCR7" +Ontology exper. names "CD4 molecule, C-C chemokine receptor type 3, T cell receptor" + "co-receptor CD8, C-C chemokine receptor type 7" +Successful Match? "No" +Marker ID "1) PR_000001004 2) PR_000001004 3) PR_000001004 4) PR_000001004 5)" + "PR_000001004, PR_000025402 + more" +Marker Label "1) CD4 molecule 2) CD4 molecule 3) CD4 molecule 4) CD4 molecule 5)" + "CD4 molecule, T cell receptor co-receptor CD8 + more" +Marker Key "1) { CD8+, CCR3-, CCR7lo } CD4+ ( ) [ CD19 molecule-, CD34 molecule-," + "CD34 molecule-, neural cell adhesion molecule 1-, membrane-spanning" + "4-domains subfamily A member 1-, lymphocyte antigen 75-, T-cell" + "surface glycoprotein CD8 alpha chain-, CD83 molecule+, integrin" + "alpha-M+, MHC class II protein complexhi, CD86 moleculehi, CD80" + "moleculehi, integrin alpha-Xhi ] 2) { CD8+, CCR3-, CCR7lo } CD4+ ( )" + "[ CD19 molecule-, CD34 molecule-, CD34 molecule-, neural cell" + "adhesion molecule 1-, membrane-spanning 4-domains subfamily A member" + "1-, lymphocyte antigen 75-, T-cell surface glycoprotein CD8 alpha" + "chain-, integrin alpha-M+, MHC class II protein complexlo, CD86" + "moleculelo, CD80 moleculelo, integrin alpha-Xhi ] 3) { CD8+, CCR3-," + "CCR7lo } CD4+ ( ) [ CD19 molecule-, CD34 molecule-, CD34 molecule-," + "neural cell adhesion molecule 1-, membrane-spanning 4-domains" + "subfamily A member 1-, integrin alpha-M-, T-cell surface glycoprotein" + "CD8 alpha chain-, receptor-type tyrosine-protein phosphatase C" + "isoform CD45R+, lymphocyte antigen 6G+, integrin alpha-Xlo ] 4) {" + "CD8+, CCR3-, CCR7lo } CD4+ ( ) [ CD19 molecule-, CD34 molecule-, CD34" + "molecule-, neural cell adhesion molecule 1-, membrane-spanning" + "4-domains subfamily A member 1-, integrin alpha-M-, receptor-type" + "tyrosine-protein phosphatase C isoform CD45R+, lymphocyte antigen" + "6G+, T-cell surface glycoprotein CD8 alpha chain+, integrin alpha-Xlo" + "] 5) { CCR3-, CCR7lo } CD4+, CD8+ ( ) [ alpha-beta T cell receptor" + "complexlo, CD69 molecule+ ] + more" +Score (Out of 1) "1) -0.357 2) -0.385 3) -0.455 4) -0.455 5) -0.5 + more" +Cell ID "1) CL_0001003 2) CL_0000997 3) CL_0002455 4) CL_0002456 5) CL_0002429" + "+ more" +Cell Label "1) mature CD8_alpha-negative CD11b-positive dendritic cell 2)" + "immature CD8_alpha-negative CD11b-positive dendritic cell 3)" + "CD8_alpha-negative plasmactyoid dendritic cell 4) CD8_alpha-positive" + "plasmactyoid dendritic cell 5) CD69-positive double-positive" + "thymocyte + more" diff -r 000000000000 -r 8547aedf1350 profile_cl/test-data/input.flowscore --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/profile_cl/test-data/input.flowscore Mon Feb 27 13:03:19 2017 -0500 @@ -0,0 +1,4 @@ +Population_ID FSC SSC CD4 CCR3 CD8 CCR7 Count Percentage +1 1 1 3 1 1 1 1000 5 +2 1 1 3 1 1 1 1000 5 +3 1 1 3 1 3 2 1000 5 diff -r 000000000000 -r 8547aedf1350 profile_cl/test-data/out.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/profile_cl/test-data/out.html Mon Feb 27 13:03:19 2017 -0500 @@ -0,0 +1,91 @@ + + + +FLOCK Populations Ontology + + + + + + + + + + + + + + + + + + + + +
+
+
+ + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ + +
+ + + + + + diff -r 000000000000 -r 8547aedf1350 profile_cl/test-data/scores.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/profile_cl/test-data/scores.txt Mon Feb 27 13:03:19 2017 -0500 @@ -0,0 +1,4 @@ +Population_ID FSC SSC CD4 CCR3 CD8 CCR7 Count Percentage +1 1 1 3 1 1 1 1000 5 +2 1 1 3 1 1 1 1000 5 +3 1 1 3 1 3 2 1000 5