Mercurial > repos > iuc > openalex_explorer
annotate openalex_fetch.py @ 0:7a27a48d57c0 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
| author | iuc |
|---|---|
| date | Sat, 31 May 2025 12:25:39 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
1 import argparse |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
2 import os |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
3 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
4 import requests |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
5 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
6 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
7 # doi |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
8 def get_openalex_id_from_doi(doi): |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
9 url = f'https://api.openalex.org/works/https://doi.org/{doi}' |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
10 response = requests.get(url) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
11 response.raise_for_status() |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
12 return response.json()['id'].split('/')[-1] |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
13 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
14 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
15 # title |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
16 def get_openalex_id_from_title(title): |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
17 url = f'https://api.openalex.org/works?search={title}' |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
18 response = requests.get(url) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
19 response.raise_for_status() |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
20 results = response.json().get('results', []) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
21 if not results: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
22 raise ValueError("No paper found with the given title.") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
23 return results[0]['id'].split('/')[-1] |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
24 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
25 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
26 # fetch papers |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
27 def fetch_citing_papers(openalex_id, max_citations=None): |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
28 all_citing_papers = [] |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
29 per_page = 200 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
30 page = 1 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
31 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
32 work_url = f'https://api.openalex.org/works/{openalex_id}' |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
33 response = requests.get(work_url) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
34 response.raise_for_status() |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
35 work_data = response.json() |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
36 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
37 cited_by_url = work_data.get('cited_by_api_url') |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
38 if not cited_by_url: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
39 raise ValueError("This work has no citing papers.") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
40 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
41 while True: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
42 paged_url = f"{cited_by_url}&per_page={per_page}&page={page}" |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
43 response = requests.get(paged_url) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
44 response.raise_for_status() |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
45 data = response.json() |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
46 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
47 results = data.get('results', []) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
48 if not results: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
49 break |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
50 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
51 all_citing_papers.extend(results) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
52 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
53 if max_citations and len(all_citing_papers) >= max_citations: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
54 all_citing_papers = all_citing_papers[:max_citations] |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
55 break |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
56 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
57 if len(results) < per_page: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
58 break |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
59 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
60 page += 1 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
61 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
62 return all_citing_papers |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
63 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
64 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
65 def download_pdf(url, title, folder_name): |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
66 try: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
67 if not os.path.exists(folder_name): |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
68 os.makedirs(folder_name) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
69 response = requests.get(url) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
70 if response.status_code == 200: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
71 safe_title = "".join(x for x in title if x.isalnum() or x in " _-").rstrip() |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
72 file_path = os.path.join(folder_name, f"{safe_title}.pdf") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
73 with open(file_path, 'wb') as f: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
74 f.write(response.content) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
75 print(f"[✓] Downloaded: {file_path}") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
76 else: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
77 print(f"[x] Failed to download: {url}") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
78 except Exception as e: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
79 print(f"[!] Error downloading {url}: {e}") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
80 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
81 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
82 def main(): |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
83 parser = argparse.ArgumentParser(description="Fetch citing papers from OpenAlex") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
84 group = parser.add_mutually_exclusive_group(required=True) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
85 group.add_argument('--id', help='OpenAlex ID of the paper (e.g., W2088676066)') |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
86 group.add_argument('--doi', help='DOI of the paper') |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
87 group.add_argument('--title', help='Title of the paper') |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
88 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
89 parser.add_argument('--download', action='store_true', help='Download available OA PDFs') |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
90 parser.add_argument('--max-citations', type=str, default="50", dest='max_citations', help="Max citing papers to fetch or 'all'") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
91 parser.add_argument('--output-dir', default='.', help='Directory to save output files') |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
92 args = parser.parse_args() |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
93 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
94 output_dir = args.output_dir |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
95 summary_path = os.path.join(output_dir, "summary.txt") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
96 tsv_path = os.path.join(output_dir, "citing_papers.tsv") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
97 download_dir = os.path.join(output_dir, "downloads") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
98 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
99 if args.max_citations.lower() == "all": |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
100 max_citations = None |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
101 else: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
102 max_citations = int(args.max_citations) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
103 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
104 try: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
105 if args.title: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
106 openalex_id = get_openalex_id_from_title(args.title) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
107 elif args.doi: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
108 openalex_id = get_openalex_id_from_doi(args.doi) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
109 else: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
110 openalex_id = args.id |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
111 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
112 citing_papers = fetch_citing_papers(openalex_id, max_citations=max_citations) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
113 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
114 is_oa = 0 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
115 is_not_oa = 0 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
116 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
117 for paper in citing_papers: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
118 if not paper['locations']: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
119 continue |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
120 location = paper['locations'][0] |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
121 is_open = location.get('is_oa', False) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
122 landing_url = location.get('landing_page_url', 'No URL') |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
123 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
124 if is_open: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
125 is_oa += 1 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
126 print("[OA]", landing_url) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
127 if args.download: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
128 pdf_url = location.get('pdf_url') |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
129 if pdf_url: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
130 download_pdf(pdf_url, paper['title'], download_dir) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
131 else: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
132 print(f"[!] No direct PDF URL for: {paper['title']}") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
133 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
134 else: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
135 is_not_oa += 1 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
136 print("[Closed]", landing_url) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
137 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
138 print("\nSummary:") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
139 print("Total citing papers:", len(citing_papers)) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
140 print("Open Access papers:", is_oa) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
141 print("Closed Access papers:", is_not_oa) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
142 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
143 # save summary |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
144 with open(summary_path, "w") as f: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
145 f.write(f"Total citing papers: {len(citing_papers)}\n") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
146 f.write(f"Open Access papers: {is_oa}\n") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
147 f.write(f"Closed Access papers: {is_not_oa}\n") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
148 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
149 # save citing papers to a TSV file |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
150 with open(tsv_path, "w", encoding="utf-8") as f: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
151 f.write("Title\tDOI\tIs_OA\n") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
152 for paper in citing_papers: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
153 raw_title = paper.get("title") or "N/A" |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
154 title = raw_title.replace("\t", " ") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
155 doi = paper.get("doi", "N/A") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
156 location = paper['locations'][0] if paper['locations'] else {} |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
157 is_oa = location.get("is_oa", False) |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
158 # landing_url = location.get("landing_page_url", "N/A") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
159 # pdf_url = location.get("pdf_url", "N/A") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
160 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
161 f.write(f"{title}\t{doi}\t{is_oa}\n") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
162 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
163 except Exception as e: |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
164 print(f"[!] Error: {e}") |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
165 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
166 |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
167 if __name__ == '__main__': |
|
7a27a48d57c0
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff
changeset
|
168 main() |
