annotate openalex_fetch.py @ 0:7a27a48d57c0 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
author iuc
date Sat, 31 May 2025 12:25:39 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
1 import argparse
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
2 import os
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
3
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
4 import requests
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
5
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
6
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
7 # doi
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
8 def get_openalex_id_from_doi(doi):
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
9 url = f'https://api.openalex.org/works/https://doi.org/{doi}'
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
10 response = requests.get(url)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
11 response.raise_for_status()
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
12 return response.json()['id'].split('/')[-1]
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
13
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
14
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
15 # title
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
16 def get_openalex_id_from_title(title):
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
17 url = f'https://api.openalex.org/works?search={title}'
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
18 response = requests.get(url)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
19 response.raise_for_status()
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
20 results = response.json().get('results', [])
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
21 if not results:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
22 raise ValueError("No paper found with the given title.")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
23 return results[0]['id'].split('/')[-1]
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
24
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
25
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
26 # fetch papers
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
27 def fetch_citing_papers(openalex_id, max_citations=None):
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
28 all_citing_papers = []
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
29 per_page = 200
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
30 page = 1
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
31
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
32 work_url = f'https://api.openalex.org/works/{openalex_id}'
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
33 response = requests.get(work_url)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
34 response.raise_for_status()
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
35 work_data = response.json()
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
36
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
37 cited_by_url = work_data.get('cited_by_api_url')
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
38 if not cited_by_url:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
39 raise ValueError("This work has no citing papers.")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
40
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
41 while True:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
42 paged_url = f"{cited_by_url}&per_page={per_page}&page={page}"
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
43 response = requests.get(paged_url)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
44 response.raise_for_status()
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
45 data = response.json()
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
46
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
47 results = data.get('results', [])
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
48 if not results:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
49 break
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
50
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
51 all_citing_papers.extend(results)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
52
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
53 if max_citations and len(all_citing_papers) >= max_citations:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
54 all_citing_papers = all_citing_papers[:max_citations]
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
55 break
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
56
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
57 if len(results) < per_page:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
58 break
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
59
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
60 page += 1
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
61
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
62 return all_citing_papers
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
63
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
64
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
65 def download_pdf(url, title, folder_name):
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
66 try:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
67 if not os.path.exists(folder_name):
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
68 os.makedirs(folder_name)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
69 response = requests.get(url)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
70 if response.status_code == 200:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
71 safe_title = "".join(x for x in title if x.isalnum() or x in " _-").rstrip()
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
72 file_path = os.path.join(folder_name, f"{safe_title}.pdf")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
73 with open(file_path, 'wb') as f:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
74 f.write(response.content)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
75 print(f"[✓] Downloaded: {file_path}")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
76 else:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
77 print(f"[x] Failed to download: {url}")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
78 except Exception as e:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
79 print(f"[!] Error downloading {url}: {e}")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
80
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
81
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
82 def main():
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
83 parser = argparse.ArgumentParser(description="Fetch citing papers from OpenAlex")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
84 group = parser.add_mutually_exclusive_group(required=True)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
85 group.add_argument('--id', help='OpenAlex ID of the paper (e.g., W2088676066)')
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
86 group.add_argument('--doi', help='DOI of the paper')
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
87 group.add_argument('--title', help='Title of the paper')
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
88
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
89 parser.add_argument('--download', action='store_true', help='Download available OA PDFs')
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
90 parser.add_argument('--max-citations', type=str, default="50", dest='max_citations', help="Max citing papers to fetch or 'all'")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
91 parser.add_argument('--output-dir', default='.', help='Directory to save output files')
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
92 args = parser.parse_args()
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
93
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
94 output_dir = args.output_dir
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
95 summary_path = os.path.join(output_dir, "summary.txt")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
96 tsv_path = os.path.join(output_dir, "citing_papers.tsv")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
97 download_dir = os.path.join(output_dir, "downloads")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
98
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
99 if args.max_citations.lower() == "all":
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
100 max_citations = None
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
101 else:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
102 max_citations = int(args.max_citations)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
103
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
104 try:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
105 if args.title:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
106 openalex_id = get_openalex_id_from_title(args.title)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
107 elif args.doi:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
108 openalex_id = get_openalex_id_from_doi(args.doi)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
109 else:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
110 openalex_id = args.id
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
111
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
112 citing_papers = fetch_citing_papers(openalex_id, max_citations=max_citations)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
113
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
114 is_oa = 0
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
115 is_not_oa = 0
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
116
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
117 for paper in citing_papers:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
118 if not paper['locations']:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
119 continue
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
120 location = paper['locations'][0]
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
121 is_open = location.get('is_oa', False)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
122 landing_url = location.get('landing_page_url', 'No URL')
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
123
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
124 if is_open:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
125 is_oa += 1
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
126 print("[OA]", landing_url)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
127 if args.download:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
128 pdf_url = location.get('pdf_url')
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
129 if pdf_url:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
130 download_pdf(pdf_url, paper['title'], download_dir)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
131 else:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
132 print(f"[!] No direct PDF URL for: {paper['title']}")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
133
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
134 else:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
135 is_not_oa += 1
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
136 print("[Closed]", landing_url)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
137
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
138 print("\nSummary:")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
139 print("Total citing papers:", len(citing_papers))
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
140 print("Open Access papers:", is_oa)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
141 print("Closed Access papers:", is_not_oa)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
142
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
143 # save summary
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
144 with open(summary_path, "w") as f:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
145 f.write(f"Total citing papers: {len(citing_papers)}\n")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
146 f.write(f"Open Access papers: {is_oa}\n")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
147 f.write(f"Closed Access papers: {is_not_oa}\n")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
148
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
149 # save citing papers to a TSV file
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
150 with open(tsv_path, "w", encoding="utf-8") as f:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
151 f.write("Title\tDOI\tIs_OA\n")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
152 for paper in citing_papers:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
153 raw_title = paper.get("title") or "N/A"
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
154 title = raw_title.replace("\t", " ")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
155 doi = paper.get("doi", "N/A")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
156 location = paper['locations'][0] if paper['locations'] else {}
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
157 is_oa = location.get("is_oa", False)
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
158 # landing_url = location.get("landing_page_url", "N/A")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
159 # pdf_url = location.get("pdf_url", "N/A")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
160
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
161 f.write(f"{title}\t{doi}\t{is_oa}\n")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
162
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
163 except Exception as e:
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
164 print(f"[!] Error: {e}")
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
165
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
166
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
167 if __name__ == '__main__':
7a27a48d57c0 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
iuc
parents:
diff changeset
168 main()