Mercurial > repos > recetox > freqsap
annotate freqSAP.py @ 0:ddabfd6ee2a2 draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
author | recetox |
---|---|
date | Fri, 18 Jul 2025 13:21:36 +0000 |
parents | |
children |
rev | line source |
---|---|
0
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
1 import argparse |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
2 import json |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
3 import os |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
4 import sys |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
5 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
6 import pandas as pd |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
7 import requests |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
8 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
9 VARIANT_INDEX = "NCBI Reference SNP (rs) Report ALPHA" |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
10 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
11 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
12 def get_protein_variation(accession: str) -> tuple[dict, pd.DataFrame]: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
13 requestURL = f"https://www.ebi.ac.uk/proteins/api/variation?offset=0&size=-1&accession={accession}" |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
14 r = requests.get(requestURL, headers={"Accept": "application/json"}) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
15 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
16 if not r.ok: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
17 r.raise_for_status() |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
18 sys.exit() |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
19 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
20 responseBody = r.text |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
21 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
22 data = json.loads(responseBody)[0] |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
23 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
24 features = pd.DataFrame(data.pop("features")) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
25 return data, features |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
26 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
27 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
28 def get_position(feature: dict) -> str: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
29 """ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
30 Get the position of a feature in the protein sequence. |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
31 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
32 Args: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
33 feature (dict): A feature dictionary containing 'begin' and 'end'. |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
34 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
35 Returns: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
36 str: The position in the format "start-end". |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
37 """ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
38 begin = feature.get("begin") |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
39 end = feature.get("end") |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
40 if begin == end: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
41 return str(begin) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
42 return f"{feature.get('begin')}-{feature.get('end')}" |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
43 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
44 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
45 def get_frequency(variant: str) -> str: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
46 if not variant: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
47 return "" |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
48 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
49 try: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
50 freq_url = f"https://www.ncbi.nlm.nih.gov/snp/{variant}/download/frequency" |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
51 r = requests.get(freq_url, headers={"Accept": "application/json"}) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
52 if not r.ok: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
53 r.raise_for_status() |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
54 return r.text |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
55 except requests.exceptions.RequestException as e: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
56 print(f"Error fetching frequency data for variant {variant}: {e}") |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
57 return "" |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
58 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
59 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
60 def parse_frequency_reponse(responseBody: str) -> tuple[dict, pd.DataFrame]: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
61 """ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
62 Parse the frequency response body. |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
63 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
64 Args: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
65 responseBody (str): The response body as a string. |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
66 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
67 Returns: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
68 dict: Parsed JSON data. |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
69 """ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
70 if responseBody == "": |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
71 return {}, pd.DataFrame() |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
72 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
73 lines = responseBody.splitlines() |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
74 n_lines = len(lines) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
75 i = 0 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
76 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
77 metadata = {} |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
78 header = [] |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
79 rows = [] |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
80 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
81 while i < n_lines: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
82 line = lines[i] |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
83 tokens = line.split("\t") |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
84 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
85 if len(tokens) == 2: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
86 key = tokens[0].strip("# ") |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
87 value = tokens[1].strip() |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
88 metadata[key] = value |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
89 elif len(tokens) > 2: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
90 if tokens[0].startswith("#"): |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
91 header = [t.strip("# ") for t in tokens] |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
92 else: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
93 row = list(map(lambda x: "NA" if x == "" else x, tokens)) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
94 rows.append(row) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
95 elif len(tokens) == 1: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
96 pass |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
97 else: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
98 print(f"Unexpected line format at line {i}: {line}") |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
99 sys.exit(1) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
100 i += 1 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
101 continue |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
102 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
103 df = pd.DataFrame(rows, columns=header) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
104 df[VARIANT_INDEX] = metadata.get(VARIANT_INDEX) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
105 return metadata, df |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
106 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
107 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
108 def get_variant_id(feature: dict) -> str: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
109 """ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
110 Get the variant ID from a feature. |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
111 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
112 Args: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
113 feature (dict): A feature dictionary. |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
114 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
115 Returns: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
116 str: The variant ID or None if not applicable. |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
117 """ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
118 xrefs = feature.get("xrefs", []) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
119 for xref in xrefs: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
120 if xref.get("id").startswith("rs"): |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
121 return xref.get("id") |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
122 return "" |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
123 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
124 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
125 def combine_alternative_sequences(df: pd.DataFrame) -> pd.DataFrame: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
126 if "mutatedType" not in df.columns: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
127 return df |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
128 return df.groupby(["begin", "end", "variant_id"], dropna=False, as_index=False).agg( |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
129 { |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
130 col: ( |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
131 (lambda x: ",".join(x.astype(str).unique())) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
132 if col == "mutatedType" |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
133 else "first" |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
134 ) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
135 for col in df.columns |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
136 } |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
137 ) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
138 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
139 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
140 def get_populations(regions: list[str]) -> set[str]: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
141 """Generate subgroups for larger groups. |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
142 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
143 Args: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
144 groups (list[str]): List of main groups to include. |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
145 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
146 Returns: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
147 list[str]: List of all subgroups in the main group. |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
148 """ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
149 mapping: dict[str, set[str]] = { |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
150 "Africa": set(["African"]), |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
151 "North America": set( |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
152 [ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
153 "American", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
154 "African American", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
155 "Mexican", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
156 "Cuban", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
157 "European American", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
158 "NativeAmerican", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
159 "NativeHawaiian", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
160 ] |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
161 ), |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
162 "Asia": set( |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
163 [ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
164 "Asian", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
165 "East Asian", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
166 "Central Asia", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
167 "JAPANESE", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
168 "KOREAN", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
169 "South Asian", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
170 "SouthAsian", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
171 ] |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
172 ), |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
173 "Europe": set( |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
174 [ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
175 "Europe", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
176 "European", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
177 "Finnish from FINRISK project", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
178 "Spanish controls", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
179 "TWIN COHORT", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
180 ] |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
181 ), |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
182 "Global": set(["Global", "Total"]), |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
183 "South America": set( |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
184 [ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
185 "Latin American 1", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
186 "Latin American 2", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
187 "Dominican", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
188 "PuertoRican", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
189 "SouthAmerican", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
190 ] |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
191 ), |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
192 "Middle East": set(["Middle Eastern", "Near_East"]), |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
193 "Other": set(["Other"]), |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
194 } |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
195 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
196 return set.union(*[mapping.get(group, set()) for group in regions]) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
197 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
198 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
199 def main(): |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
200 parser = argparse.ArgumentParser(description="Protein Variance CLI Application") |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
201 parser.add_argument( |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
202 "-a", "--accession", type=str, required=True, help="Protein accession number." |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
203 ) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
204 parser.add_argument( |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
205 "-p", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
206 "--populations", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
207 type=str, |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
208 required=True, |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
209 help="Comma-separated list of populations.", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
210 ) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
211 parser.add_argument( |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
212 "-f", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
213 "--output-format", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
214 type=str, |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
215 choices=["xlsx", "tabular", "csv"], |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
216 default="tabular", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
217 help="Output format: xlsx, tabular (tsv), or csv. Default is tabular.", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
218 ) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
219 parser.add_argument( |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
220 "-o", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
221 "--output-file", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
222 type=str, |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
223 default="protein_variation.tsv", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
224 help="Output file name.", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
225 ) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
226 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
227 args = parser.parse_args() |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
228 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
229 populations = get_populations(args.populations.split(",")) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
230 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
231 _, features_df = get_protein_variation(args.accession) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
232 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
233 features_df["variant_id"] = features_df.apply(get_variant_id, axis=1) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
234 features_df["variation_position"] = features_df.apply(get_position, axis=1) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
235 features_df = combine_alternative_sequences(features_df) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
236 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
237 results = list( |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
238 zip( |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
239 *[ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
240 parse_frequency_reponse(get_frequency(variant)) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
241 for variant in features_df["variant_id"] |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
242 ] |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
243 ) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
244 ) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
245 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
246 metadata_df = pd.DataFrame(results[0]) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
247 frequencies_df = pd.concat(results[1]) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
248 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
249 merged = pd.concat([features_df, metadata_df], axis=1) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
250 final_merged = pd.merge(merged, frequencies_df, on=VARIANT_INDEX, how="outer") |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
251 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
252 final_merged[["Ref Allele NA", "Ref Allele Prob"]] = final_merged[ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
253 "Ref Allele" |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
254 ].str.split("=", n=1, expand=True) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
255 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
256 alt_alleles = final_merged["Alt Allele"].str.split(",", expand=True) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
257 if alt_alleles.columns.size == 2: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
258 final_merged[["Alt Allele 1", "Alt Allele 2"]] = final_merged[ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
259 "Alt Allele" |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
260 ].str.split(",", expand=True) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
261 final_merged[["Alt Allele NA 1", "Alt Allele Prob 1"]] = final_merged[ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
262 "Alt Allele 1" |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
263 ].str.split("=", n=1, expand=True) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
264 final_merged[["Alt Allele NA 2", "Alt Allele Prob 2"]] = final_merged[ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
265 "Alt Allele 2" |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
266 ].str.split("=", n=1, expand=True) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
267 else: |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
268 final_merged[["Alt Allele NA 1", "Alt Allele Prob 1"]] = final_merged[ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
269 "Alt Allele" |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
270 ].str.split("=", n=1, expand=True) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
271 final_merged[["Alt Allele NA 2", "Alt Allele Prob 2"]] = None |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
272 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
273 subset_cols: list[str] = [ |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
274 "variation_position", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
275 "consequenceType", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
276 "wildType", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
277 "mutatedType", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
278 "variant_id", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
279 "Alleles", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
280 "Study", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
281 "Population", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
282 "Group", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
283 "Samplesize", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
284 "Ref Allele", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
285 "Alt Allele", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
286 "BioProject ID", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
287 "BioSample ID", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
288 "somaticStatus", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
289 "Ref Allele NA", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
290 "Ref Allele Prob", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
291 "Alt Allele NA 1", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
292 "Alt Allele Prob 1", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
293 "Alt Allele NA 2", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
294 "Alt Allele Prob 2", |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
295 ] |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
296 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
297 subset = final_merged[subset_cols] |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
298 subset = subset[subset["Population"].isin(populations)] |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
299 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
300 if args.output_format == "xlsx": |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
301 outdir = os.path.dirname(args.output_file) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
302 outpath = os.path.join(outdir, "results.xlsx") |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
303 subset.to_excel(outpath, index=False, na_rep="NA", engine="openpyxl") |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
304 os.rename(outpath, args.output_file) |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
305 elif args.output_format == "csv": |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
306 subset.to_csv(args.output_file, index=False, na_rep="NA") |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
307 else: # tabular/tsv |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
308 subset.to_csv(args.output_file, index=False, sep="\t", na_rep="NA") |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
309 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
310 |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
311 if __name__ == "__main__": |
ddabfd6ee2a2
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/main/tools/freqsap commit 202a898874d0de51b9923430ea0ef3040084c8d0
recetox
parents:
diff
changeset
|
312 main() |