# HG changeset patch # User iuc # Date 1697035862 0 # Node ID d319dc5f3ea8aac7a77d898357aea4a332a74d08 planemo upload for repository https://github.com/INFRAFRONTIERDIB/tools-iuc/tree/query_impc/tools/query_impc commit 991881b5df5f5228ecf4445ee2cc1431b9602ea8 diff -r 000000000000 -r d319dc5f3ea8 impc_tool.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/impc_tool.py Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,759 @@ +import sys + +import mygene +import pandas as pd +import requests + + +impc_api_url = "https://www.ebi.ac.uk/mi/impc/bulkdata-api" +impc_api_search_url = f"{impc_api_url}/genes" +impc_api_gene_bundle_url = f"{impc_api_url}/geneBundles" + + +def stop_err(msg): + sys.exit(msg) + + +def main(): + inp = str(sys.argv[1]) + query = str(sys.argv[3]) + + try: + if query == "7": + g_out = str(sys.argv[5]) + full_gene_table(g_out) + sys.exit(0) + + if str(sys.argv[5]) == "txt": + s = str(sys.argv[6]) + if s == "t": + sep = "\t" + elif s == "s": + sep = " " + elif s in ",;.": + sep = s + else: + sys.exit("Separator not valid, please change it.") + inp = pd.read_csv(inp, header=None, delimiter=sep) + if len(inp.columns) == 1: + inp = inp.to_csv(header=None, + index=False).strip("\n").split("\n") + inp = ",".join(inp) + else: + inp = inp.to_csv(header=None, + index=False).strip(sep).split(sep) + inp = ",".join(inp) + + if query == "8": + if str(sys.argv[5]) == "txt": + g_out = str(sys.argv[7]) + else: + g_out = str(sys.argv[6]) + genes_in_pipeline(inp, g_out) + sys.exit(0) + elif query == "9": + if str(sys.argv[5]) == "txt": + g_out = str(sys.argv[7]) + else: + g_out = str(sys.argv[6]) + sign_mp(inp, g_out) + sys.exit(0) + elif query == "10": + par_pip_ma(inp) + sys.exit(0) + elif query == "11": + par_gen(inp) + sys.exit(0) + elif query == "2" or query == "4": + final_list = pheno_mapping(inp) + else: + final_list = gene_mapping(inp) + inp = ",".join(final_list) + + if query == "1": + get_pheno(inp) + sys.exit(0) + elif query == "2": + if str(sys.argv[5]) == "txt": + g_out = str(sys.argv[7]) + else: + g_out = str(sys.argv[6]) + get_genes(inp, g_out) + sys.exit(0) + elif query == "3": + gene_set(inp) + sys.exit(0) + elif query == "4": + extr_img(inp) + sys.exit(0) + elif query == "5": + parameters(inp) + sys.exit(0) + elif query == "6": + sign_par(inp) + sys.exit(0) + else: + stop_err("Error, non-implemented query selected: " + query) + except Exception as ex: + stop_err("Error running impc_tool.py:\n" + str(ex)) + + +# 1-Given a gene id, retrieve all the phenotypes related to it (id and name) +def get_pheno(inp): + head = sys.argv[4] + mgi_accession_id = inp + + gene_url = f"{impc_api_search_url}/{mgi_accession_id}" + gene_data = requests.get(gene_url).json() + + p_list = [] + id_list = [] + + if gene_data["significantMpTerms"] is None: + stop_err("No significant MP terms found for this gene") + else: + for x in gene_data["significantMpTerms"]: + p_list.append(x["mpTermId"]) + id_list.append(x["mpTermName"]) + + df = pd.DataFrame() + df["MP term name"] = p_list + df["MP term id"] = id_list + + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 3-Extract all genes having a particular phenotype or a set of phenotypes +# (e.g. relevant to a disease) +def get_genes(inp, g_out): + head = sys.argv[4] + target_mp_terms = inp + +# All the data is paginated using the page and size parameters, +# by default the endpoint returns the first 20 hits + gene_by_phenotypes_query = f"{impc_api_search_url}" \ + f"/search/findAllBySignificantMpTermIdsContains" \ + f"?mpTermIds={target_mp_terms}&page=0&size=20" + genes_with_clinical_chemistry_phen = \ + requests.get(gene_by_phenotypes_query).json() + print(f"Genes with {target_mp_terms}: " + f"{genes_with_clinical_chemistry_phen['page']['totalElements']}") + acc = [] + name = [] + url = [] + + for gene in genes_with_clinical_chemistry_phen["_embedded"]["genes"]: + acc.append(gene["mgiAccessionId"]) + name.append(gene["markerName"]) + url.append(gene["_links"]["geneBundle"]["href"]) + + if g_out == "sym": + list_of_genes = pd.DataFrame(columns=["Gene symbol id", "Gene name", + "Gene bundle url"]) + list_of_genes["Gene symbol id"] = mgi_sym_map(acc) + else: + list_of_genes = pd.DataFrame(columns=["Gene accession id", + "Gene name", "Gene bundle url"]) + list_of_genes["Gene accession id"] = acc + list_of_genes["Gene name"] = name + list_of_genes["Gene bundle url"] = url + + if head == "True": + list_of_genes.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + list_of_genes.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 4. Extract all phenotypes which are present in a particular gene set +# (e.g. genes together in a pathway) +def gene_set(inp): + head = sys.argv[4] + target_genes = inp + + genes_in_gene_list_query = f"{impc_api_search_url}/search/" \ + f"findAllByMgiAccessionIdIn?" \ + f"mgiAccessionIds={target_genes}" + + genes_in_gene_list = requests.get(genes_in_gene_list_query).json() + mp_terms_vs_gene_idx = {} + + for gene in genes_in_gene_list["_embedded"]["genes"]: + mp_terms = gene["significantMpTerms"] + gene_acc_id = gene["mgiAccessionId"] + if mp_terms is None: + continue + for mp_term_name in mp_terms: + if mp_term_name["mpTermId"] not in mp_terms_vs_gene_idx: + mp_terms_vs_gene_idx[mp_term_name["mpTermId"]] = \ + {"mp_term": mp_term_name["mpTermId"], + "mp_name": mp_term_name["mpTermName"], "genes": []} + mp_terms_vs_gene_idx[mp_term_name["mpTermId"]]["genes"].\ + append(gene_acc_id) + genes_by_mp_term = list(mp_terms_vs_gene_idx.values()) + + df = pd.DataFrame() + terms = [] + names = [] + genes = [] + for i in genes_by_mp_term: + terms.append(i["mp_term"]) + names.append(i["mp_name"]) + genes.append(",".join(i["genes"])) + + df["mp_term"] = terms + df["mp_name"] = names + df["genes"] = genes + + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 7. Extract images with a particular phenotype or a set of phenotypes +def extr_img(inp): + head = sys.argv[4] + target_mp_terms = inp # ["MP:0002110", "MP:0000559"] + +# All the data is paginated using the page and size parameters, +# by default the endpoint returns the first 20 hits + gene_by_phenotypes_query = f"{impc_api_search_url}/search/" \ + f"findAllBySignificantMpTermIdsContains?" \ + f"mpTermIds={target_mp_terms}&page=0&size=20" + genes_with_morph_mps = requests.get(gene_by_phenotypes_query).json() + list_of_gene_bundle_urls = [ + gene["_links"]["geneBundle"]["href"] for gene in + genes_with_morph_mps["_embedded"]["genes"] + ] + + gene_bundles = [] + for gene_bundle_url in list_of_gene_bundle_urls: + gene_bundle = requests.get(gene_bundle_url).json() + gene_bundles.append(gene_bundle) + + images_with_morphology_mps = [] + + # Doing just the first 20 and filtering out fields on the images + display_fields = ["geneSymbol", "parameterName", "biologicalSampleGroup", + "colonyId", "zygosity", "sex", "downloadUrl", + "externalSampleId", "thumbnailUrl"] + + for gene_bundle in gene_bundles[:20]: + if len(gene_bundle) == 4: + continue + if gene_bundle["geneImages"] is not None: + images = gene_bundle["geneImages"] + for image in images: + display_image = {k: v for k, v in image.items() + if k in display_fields} + images_with_morphology_mps.append(display_image) + + images_table = [] + print(f"Images related to phenotype {target_mp_terms}: " + f"{len(images_with_morphology_mps)}") + # Displaying just the first 20 images + for i in images_with_morphology_mps[:20]: + row = [f""] + list(i.values()) + images_table.append(row) + + df = pd.DataFrame() + externalSampleId = [] + geneSymbol = [] + biologicalSampleGroup = [] + sex = [] + colonyId = [] + zygosity = [] + parameterName = [] + downloadUrl = [] + thumbnailUrl = [] + + for i in images_table: + externalSampleId.append(i[1]) + geneSymbol.append(i[2]) + biologicalSampleGroup.append(i[3]) + sex.append(i[4]) + colonyId.append(i[5]) + zygosity.append(i[6]) + parameterName.append(i[7]) + downloadUrl.append(i[8]) + thumbnailUrl.append(i[9]) + + df["externalSampleId"] = externalSampleId + df["geneSymbol"] = geneSymbol + df["biologicalSampleGroup"] = biologicalSampleGroup + df["sex"] = sex + df["colonyId"] = colonyId + df["zygosity"] = zygosity + df["parameterName"] = parameterName + df["downloadUrl"] = downloadUrl + df["thumbnailUrl"] = thumbnailUrl + + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 11- Which parameters have been measured for a particular knockout +def parameters(inp): + head = sys.argv[4] + knockout = inp # "MGI:104636" + gene_info = requests.get(impc_api_search_url + "/" + knockout).json() + + if gene_info["phenotypingDataAvailable"]: + geneBundle = requests.get(gene_info["_links"]["geneBundle"]["href"])\ + .json() + gen_imgs = geneBundle["geneImages"] + par_list = [] + lis = {} + for i in gen_imgs: + lis = {"Parameter Name": i["parameterName"]} + if lis not in par_list: + par_list.append(lis) + df = pd.DataFrame() + li = [] + + for i in par_list: + li.append(i["Parameter Name"]) + + df["Parameter"] = li + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + else: + stop_err("No parameters available for this knockout gene") + + +# 12- Which parameters identified a significant finding for a particular +# knockout line (colony) +def sign_par(inp): + head = sys.argv[4] + knockout = inp # "MGI:104636" + + gene_info = requests.get(f"{impc_api_url}statisticalResults/search/" + f"findAllByMarkerAccessionIdIsAndSignificantTrue?" + f"mgiAccessionId=" + knockout).json() + gene_stats = gene_info["_embedded"]["statisticalResults"] + + if len(gene_stats) == 0: + stop_err("No statistically relevant parameters found " + "for this knockout gene") + else: + df = pd.DataFrame() + n = [] + p = [] + for g in gene_stats: + n.append(g["parameterName"]) + p.append(g["pvalue"]) + + df["Parameter name"] = n + df["p-value"] = p + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 13- List of genes names and ID measured in a pipeline +def genes_in_pipeline(inp, g_out): + head = sys.argv[4] + pip = inp + + g_in_p_query = f"{impc_api_search_url}/search/" \ + f"findAllByTestedPipelineId?pipelineId={pip}&" \ + f"page=0&size=1000" + genes_in_pip = requests.get(g_in_p_query).json() + pages = genes_in_pip["page"]["totalPages"] + max_elem = genes_in_pip["page"]["totalElements"] + + print(f"Genes with {pip}: {genes_in_pip['page']['totalElements']}") + list_d = [] + acc = [] + name = [] + + if max_elem > 1000: + g_in_p_query = genes_in_pip["_embedded"]["genes"] + for i in range(1, pages): + gl = requests.get(f"{impc_api_search_url}/search/" + f"findAllByTestedPipelineId?pipelineId={pip}&" + f"page={i}&" + f"size=1000").json()["_embedded"]["genes"] + g_in_p_query += gl + else: + g_in_p_query = genes_in_pip["_embedded"]["genes"] + + for g in g_in_p_query: + d = {"Gene Accession ID": g["mgiAccessionId"], + "Gene Name": g["markerName"]} + list_d.append(d) + + for i in list_d: + acc.append(i["Gene Accession ID"]) + name.append(i["Gene Name"]) + if g_out == "sym": + list_of_genes = pd.DataFrame(columns=["Gene symbol", "Gene name"]) + list_of_genes["Gene symbol"] = mgi_sym_map(acc) + else: + list_of_genes = pd.DataFrame(columns=["Gene accession id", + "Gene name"]) + list_of_genes["Gene accession id"] = acc + list_of_genes["Gene name"] = name + + if head == "True": + list_of_genes.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + list_of_genes.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 14- Extract all genes and corresponding phenotypes related to a +# particular organ system (eg: significatMPTerm) +def sign_mp(inp, g_out): + head = sys.argv[4] + mp_term = inp # ["MP:0005391"] + + gene_by_mpterm_query = f"{impc_api_search_url}/search/" \ + f"findAllBySignificantMpTermIdsContains?" \ + f"mpTermIds={mp_term}&size=1000" + genes_with_mpterm = requests.get(gene_by_mpterm_query).json() + + pages = genes_with_mpterm["page"]["totalPages"] + genes_info = genes_with_mpterm["_embedded"]["genes"] + + for pn in range(1, pages): + pq = f"{impc_api_search_url}/search/" \ + f"findAllBySignificantMpTermIdsContains?" \ + f"mpTermIds={mp_term}&page={pn}&size=1000" + g = requests.get(pq).json()["_embedded"]["genes"] + genes_info += g + + list_d = [] + d = {} + for g in genes_info: + names = [] + ids = [] + for s in g["significantMpTerms"]: + names.append(s["mpTermName"]) + ids.append(s["mpTermId"]) + d = {"Gene": g["mgiAccessionId"], "mpTermId": ids, "mpTermName": names} + list_d.append(d) + + g = [] + ids = [] + names = [] + for i in list_d: + g.append(i["Gene"]) + ids.append(i["mpTermId"]) + names.append(i["mpTermName"]) + + df = pd.DataFrame() + if g_out == "sym": + df["Gene symbol"] = mgi_sym_map(g) + else: + df["Gene Id"] = g + df["Significant MP terms Ids"] = ids + df["Significant MP terms Names"] = names + + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 16- Full table of genes and all identified phenotypes +def full_gene_table(g_out): + head = sys.argv[4] + gene_list = requests.get(impc_api_search_url + "?page=0&size=1000").json() + pages = gene_list["page"]["totalPages"] + genes_info = gene_list["_embedded"]["genes"] + + for pn in range(1, pages): + gp = requests.get(impc_api_search_url + + f"?page={pn}&" + f"size=1000").json()["_embedded"]["genes"] + genes_info += gp + + d = {} + list_d = [] + + for i in genes_info: + if i["significantMpTerms"] is None: + d = {"Gene": i["mgiAccessionId"], "Identified phenotypes": "None"} + else: + d = {"Gene": i["mgiAccessionId"], + "Identified phenotypes": [ + sub["mpTermId"] for sub in i["significantMpTerms"] + ]} + list_d.append(d) + + df = pd.DataFrame() + g = [] + p = [] + for i in list_d: + g.append(i["Gene"]) + p.append(i["Identified phenotypes"]) + + if g_out == "sym": + df["Gene symbol"] = mgi_sym_map(g) + else: + df["MGI id"] = g + df["MP term list"] = p + + for i in range(0, len(df)): + if df["MP term list"][i] != "None": + df["MP term list"][i] = str( + df["MP term list"][i] + )[1:-1].replace("'", "") + + if str(sys.argv[1]) == "True": + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + else: + df = df[df["MP term list"] != "None"] + df.reset_index(drop=True, inplace=True) + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 18- Extract measurements and analysis for a parameter or pipeline +def par_pip_ma(inp): + head = sys.argv[4] + id = inp + + if id[0:4] == "IMPC": + par = True + ma_query = f"{impc_api_search_url}/search/" \ + f"findAllByTestedParameterId?" \ + f"parameterId={id}&page=0&size=1000" + else: + ma_query = f"{impc_api_search_url}/search/" \ + f"findAllByTestedPipelineId?" \ + f"pipelineId={id}&page=0&size=1000" + par = False + + ma_in_pip = requests.get(ma_query).json() + pages = ma_in_pip["page"]["totalPages"] + max_elem = ma_in_pip["page"]["totalElements"] + + print(f"Genes with {id}: {ma_in_pip['page']['totalElements']}") + list_d = [] + list_of_genes = pd.DataFrame(columns=["Measurements", "Analysis"]) + mes = [] + an = [] + + if max_elem > 1000: + + ma_in_pip = ma_in_pip["_embedded"]["genes"] + for pn in range(1, pages): + if par: + pip = requests.get(f"{impc_api_search_url}/search/" + f"findAllByTestedParameterId?" + f"parameterId={id}&page={pn}&" + f"size=1000").json()["_embedded"]["genes"] + else: + pip = requests.get(f"{impc_api_search_url}/search/" + f"findAllByTestedPipelineId?" + f"pipelineId={id}&page={pn}&" + f"size=1000").json()["_embedded"]["genes"] + ma_in_pip += pip + + else: + ma_in_pip = ma_in_pip["_embedded"]["genes"] + + for g in ma_in_pip: + d = {"Measurements": g[""], "Analysis": g[""]} + list_d.append(d) + + for i in list_d: + mes.append(i[""]) + an.append(i[""]) + + list_of_genes["Analysis"] = an + list_of_genes["Measurements"] = mes + + if head == "True": + list_of_genes.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + list_of_genes.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 19- Get all genes and measured values for a particular parameter +def par_gen(inp, g_out): + head = sys.argv[4] + id = inp + + pa_query = f"{impc_api_search_url}/search/" \ + f"findAllByTestedParameterId?parameterId={id}&page=0&size=1000" + + gm_par = requests.get(pa_query).json() + pages = gm_par["page"]["totalPages"] + max_elem = gm_par["page"]["totalElements"] + + print(f"Genes with {id}: {gm_par['page']['totalElements']}") + list_d = [] + gen = [] + mes = [] + + if max_elem > 1000: + + gm_par = gm_par["_embedded"]["genes"] + + for pn in range(1, pages): + pip = requests.get(f"{impc_api_search_url}/search/" + f"findAllByTestedParameterId?" + f"parameterId={id}&page={pn}&" + f"size=1000").json()["_embedded"]["genes"] + gm_par += pip + + else: + gm_par = gm_par["_embedded"]["genes"] + + for g in gm_par: + d = {"Genes": g["mgiAccessionId"], "Measured Values": g[""]} + list_d.append(d) + + for i in list_d: + gen.append(i["Genes"]) + mes.append(i["Measured Values"]) + + if g_out == "sym": + list_of_genes = pd.DataFrame(columns=["Gene symbol", + "Measured Values"]) + list_of_genes["Gene symbol"] = mgi_sym_map(gen) + else: + list_of_genes = pd.DataFrame(columns=["Gene accession id", + "Measured Values"]) + list_of_genes["Gene accession id"] = gen + list_of_genes["Measured Values"] = mes + + if head == "True": + list_of_genes.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + list_of_genes.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# Function to map gene symbol to MGI ids +def gene_mapping(inp): + tmp = inp.split(",") + final_list = [] + sym_list = [] + for i in tmp: + if "MGI:" in i: + final_list.append(i) + else: + sym_list.append(i) + del i + + # symbol for symbols, mgi for MGI : + # https://docs.mygene.info/en/latest/doc/query_service.html#available-fields + if len(sym_list) != 0: + mg = mygene.MyGeneInfo() + ginfo = mg.querymany(sym_list, scopes="symbol", fields="symbol,MGI", + species="mouse") + empty = True + discarded = [] + for i in ginfo: + try: + final_list.append(i["MGI"]) + empty = False + except KeyError: + discarded.append(i["query"]) + if empty and len(final_list) == 0: + stop_err("Error: it was not possible to map the input.") + elif empty: + print("Warning: it was not possible to map any of the symbol ids. " + "Only MGI ids will be used.") + elif len(discarded) != 0: + print("Warning: it was not possible to map these elements: " + "" + ",".join(discarded) + "\n") + + return final_list + + +# Function to map phenotypes ids to names +def pheno_mapping(inp): + tmp = inp.split(",") + final_list = [] + sym_list = [] + for i in tmp: + if "MP:" in i: + final_list.append(i) + else: + sym_list.append(i) + del i + if len(sym_list) != 0: + url = "https://raw.githubusercontent.com/AndreaFurlani/" \ + "hp_mp_mapping_test/main/hp_mp_mapping.csv" + mapper = pd.read_csv(url, header=0, index_col=2) + empty = True + discarded = [] + for i in sym_list: + try: + final_list.append(mapper.loc[i]["mpId"]) + empty = False + except KeyError: + discarded.append(i) + continue + if empty and len(final_list) == 0: + stop_err("Error: it was not possible to map the input.") + elif empty: + print("Warning: it was not possible to map any of the " + "HP term entries. Only MP entries will be used.") + elif len(discarded) != 0: + print("Warning: it was not possible to " + "map these elements: " + ",".join(discarded) + "\n") + return final_list + + +# Function to map MGI ids to Gene Symbols +def mgi_sym_map(mgi_list): + sym_list = [] + mg = mygene.MyGeneInfo() + ginfo = mg.querymany(mgi_list, scopes="MGI", fields="symbol,MGI", + species="mouse") + discarded = [] + for i in ginfo: + try: + sym_list.append(i["symbol"]) + except KeyError: + sym_list.append(i["query"]) + discarded.append(i["query"]) + if len(discarded) != 0: + print("It was not possible to map these genes: " + ",".join(discarded)) + return sym_list + + +if __name__ == "__main__": + main() diff -r 000000000000 -r d319dc5f3ea8 impc_tool.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/impc_tool.xml Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,351 @@ + + query tool + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + requests + pandas + lxml + mygene + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + https://doi.org/10.1093/nar/gku1193 + https://doi.org/10.12688/f1000research.25369.1 + https://doi.org/10.1038/nature19356 + + \ No newline at end of file diff -r 000000000000 -r d319dc5f3ea8 test-data/test_output_1_1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_output_1_1.tabular Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,10 @@ +MP term name MP term id +MP:0002135 abnormal kidney morphology +MP:0000194 increased circulating calcium level +MP:0002574 increased vertical activity +MP:0005633 increased circulating sodium level +MP:0001303 abnormal lens morphology +MP:0002965 increased circulating serum albumin level +MP:0001304 cataract +MP:0010052 increased grip strength +MP:0001402 decreased locomotor activity diff -r 000000000000 -r d319dc5f3ea8 test-data/test_output_1_2.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_output_1_2.tabular Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,5 @@ +MP term name MP term id +MP:0000194 increased circulating calcium level +MP:0011110 preweaning lethality, incomplete penetrance +MP:0001303 abnormal lens morphology +MP:0010053 decreased grip strength diff -r 000000000000 -r d319dc5f3ea8 test-data/test_output_2.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_output_2.tabular Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,21 @@ +Gene accession id Gene name Gene bundle url +MGI:1345144 sprouty RTK signaling antagonist 4 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1345144 +MGI:2670964 terminal nucleotidyltransferase 5A https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:2670964 +MGI:95490 fibrillin 2 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:95490 +MGI:95689 growth differentiation factor 6 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:95689 +MGI:1341886 ajuba LIM protein https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1341886 +MGI:1347352 hormonally upregulated Neu-associated kinase https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1347352 +MGI:109331 nucleoredoxin https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:109331 +MGI:1914061 dual oxidase maturation factor 2 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1914061 +MGI:1915958 RAB, member RAS oncogene family-like 2 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1915958 +MGI:1917363 ciliary microtubule associated protein 1B https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1917363 +MGI:1920858 MARVEL (membrane-associating) domain containing 3 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1920858 +MGI:106576 chondroitin polymerizing factor https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:106576 +MGI:107185 chaperonin containing Tcp1, subunit 5 (epsilon) https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:107185 +MGI:1931881 DnaJ heat shock protein family (Hsp40) member B12 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1931881 +MGI:109327 BCL2/adenovirus E1B interacting protein 2 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:109327 +MGI:1913955 deoxyribonuclease 1-like 2 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1913955 +MGI:107374 paired-like homeodomain transcription factor 1 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:107374 +MGI:1335088 proline-serine-threonine phosphatase-interacting protein 2 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1335088 +MGI:95688 growth differentiation factor 5 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:95688 +MGI:107474 CD38 antigen https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:107474 diff -r 000000000000 -r d319dc5f3ea8 test-data/test_output_3.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_output_3.tabular Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,79 @@ +MP:0002764 short tibia MGI:99960,MGI:108071 +MP:0001785 edema MGI:99960 +MP:0002968 increased circulating alkaline phosphatase level MGI:99960 +MPATH:590 fibro-osseous lesion MGI:99960 +MP:0001399 hyperactivity MGI:99960,MGI:1354170 +MP:0011100 preweaning lethality, complete penetrance MGI:99960,MGI:1344380,MGI:1917473 +MP:0010052 increased grip strength MGI:99960,MGI:96709 +MPATH:134 hyperplasia MGI:99960 +MP:0000218 increased leukocyte cell number MGI:99960,MGI:96709 +MP:0005013 increased lymphocyte cell number MGI:99960 +MP:0001363 increased anxiety-related response MGI:1354170 +MP:0001258 decreased body length MGI:1354170,MGI:108071,MGI:1915775,MGI:2443026 +MP:0003795 abnormal bone structure MGI:1354170 +MP:0001417 decreased exploration in new environment MGI:1354170,MGI:96709 +MP:0002797 increased thigmotaxis MGI:1354170 +MP:0002757 decreased vertical activity MGI:1354170 +MP:0011960 abnormal eye anterior chamber depth MGI:1354170 +MP:0010124 decreased bone mineral content MGI:1354170 +MP:0001402 decreased locomotor activity MGI:1354170 +MP:0004924 abnormal behavior MGI:1354170,MGI:96709 +MP:0013279 increased fasting circulating glucose level MGI:99502,MGI:1860418,MGI:103225 +MP:0005333 decreased heart rate MGI:3616082 +MP:0001406 abnormal gait MGI:96709 +MP:0010053 decreased grip strength MGI:96709,MGI:1924093,MGI:1915775 +MP:0001523 impaired righting response MGI:96709 +MP:0005559 increased circulating glucose level MGI:96709 +MP:0000745 tremors MGI:96709 +MPATH:52 lipid depletion MGI:1913564 +MPATH:42 lipid deposition MGI:1913564 +MP:0005419 decreased circulating serum albumin level MGI:1860418 +MP:0000219 increased neutrophil cell number MGI:1860418 +MP:0005567 decreased circulating total protein level MGI:1860418,MGI:1915775 +MP:0008810 increased circulating iron level MGI:1914361 +MP:0002875 decreased erythrocyte cell number MGI:1914361 +MP:0000208 decreased hematocrit MGI:1914361 +MP:0002874 decreased hemoglobin content MGI:1914361 +MP:0005566 decreased blood urea nitrogen level MGI:103225,MGI:1915775 +MP:0005343 increased circulating aspartate transaminase level MGI:103225 +MP:0011954 shortened PQ interval MGI:103225 +MP:0005344 increased circulating bilirubin level MGI:103225,MGI:95479 +MP:0002644 decreased circulating triglyceride level MGI:103225 +MP:0001415 increased exploration in new environment MGI:103225 +MP:0010511 shortened PR interval MGI:103225 +MP:0002574 increased vertical activity MGI:1915291 +MP:0003917 increased kidney weight MGI:1915291 +MP:0013292 embryonic lethality prior to organogenesis MGI:1344380 +MP:0000221 decreased leukocyte cell number MGI:95479 +MP:0005016 decreased lymphocyte cell number MGI:95479 +MP:0012361 decreased large unstained cell number MGI:95479 +MP:0001146 abnormal testis morphology MGI:2443598 +MP:0002152 abnormal brain morphology MGI:2443598 +MPATH:127 atrophy MGI:2443598 +MPATH:639 hydrocephalus MGI:2443598 +MP:0001925 male infertility MGI:2443598 +MP:0002092 abnormal eye morphology MGI:2443598 +MP:0005238 increased brain size MGI:2443598 +MP:0001147 small testis MGI:2443598 +MP:0000598 abnormal liver morphology MGI:2441730 +MP:0002833 increased heart weight MGI:2441730 +MP:0011110 preweaning lethality, incomplete penetrance MGI:2441730,MGI:1915775,MGI:2443026 +MP:0004738 abnormal auditory brainstem response MGI:2441730 +MP:0000599 enlarged liver MGI:2441730 +MP:0009476 enlarged cecum MGI:2441730 +MP:0005565 increased blood urea nitrogen level MGI:2441730 +MP:0001284 absent vibrissae MGI:2441730 +MP:0004832 enlarged ovary MGI:2441730 +MP:0005084 abnormal gallbladder morphology MGI:1915775 +MP:0000274 enlarged heart MGI:1915775 +MP:0009142 decreased prepulse inhibition MGI:1915775 +MP:0000692 small spleen MGI:1915775 +MP:0030610 absent teeth MGI:1915775 +MP:0001325 abnormal retina morphology MGI:1915775 +MP:0000266 abnormal heart morphology MGI:1915775 +MPATH:64 developmental dysplasia MGI:1915775 +MP:0000494 abnormal cecum morphology MGI:1915775 +MP:0001120 abnormal uterus morphology MGI:1915775 +MP:0000689 abnormal spleen morphology MGI:1915775 +MP:0009709 hydrometra MGI:1915775 +MP:0002060 abnormal skin morphology MGI:1915775 diff -r 000000000000 -r d319dc5f3ea8 test-data/test_output_9.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_output_9.tabular Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,4 @@ +Gene symbol Significant MP terms Ids Significant MP terms Names +Cacna1s ['MP:0001697', 'MP:0001785', 'MP:0003231', 'MP:0005388', 'MP:0001491', 'MP:0001575', 'MP:0003743', 'MP:0001914', 'MP:0011100', 'MP:0005560'] ['abnormal embryo size', 'edema', 'abnormal placenta vasculature', 'respiratory system phenotype', 'unresponsive to tactile stimuli', 'cyanosis', 'abnormal facial morphology', 'hemorrhage', 'preweaning lethality, complete penetrance', 'decreased circulating glucose level'] +Ndel1 ['MP:0001697', 'MP:0003984', 'MP:0002111', 'MP:0005388', 'MP:0011100'] ['abnormal embryo size', 'embryonic growth retardation', 'abnormal tail morphology', 'respiratory system phenotype', 'preweaning lethality, complete penetrance'] +Zfp536 ['MP:0003019', 'MP:0005564', 'MP:0005388', 'MP:0001575', 'MP:0001399', 'MP:0011100', 'MP:0005641'] ['increased circulating chloride level', 'increased hemoglobin content', 'respiratory system phenotype', 'cyanosis', 'hyperactivity', 'preweaning lethality, complete penetrance', 'increased mean corpuscular hemoglobin concentration'] diff -r 000000000000 -r d319dc5f3ea8 test-data/test_query_1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_query_1.txt Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,1 @@ +MGI:1923523 \ No newline at end of file diff -r 000000000000 -r d319dc5f3ea8 test-data/test_query_2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_query_2.txt Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,1 @@ +MP:0002110 MP:0000559 \ No newline at end of file diff -r 000000000000 -r d319dc5f3ea8 test-data/test_query_3.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_query_3.txt Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,1 @@ +MGI:1913564 MGI:1915291 MGI:1914361 MGI:1915775 MGI:1354170 MGI:103225 MGI:2441730 MGI:108071 MGI:2443598 MGI:106643 MGI:1917473 MGI:1338073 MGI:1924093 MGI:99960 MGI:99502 MGI:95479 MGI:1344380 MGI:1860418 MGI:1354721 MGI:3616082 MGI:96709 MGI:2443026 \ No newline at end of file