Mercurial > repos > iuc > detect_circular_sequences
annotate detect_circular_sequences.py @ 0:faec698e3f98 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
| author | iuc |
|---|---|
| date | Thu, 11 Dec 2025 08:53:37 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
2 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
3 ######################################################################################### |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
4 # This script detect circular contigs by looking for exact identical k-mer at the two |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
5 # ends of the sequences provided in fasta file. In order to be able to predict genes |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
6 # spanning the origin of circular contigs, the first 1,000 nucleotides of each circular |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
7 # contigs are duplicated and added at the contig's end. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
8 # |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
9 # Inspired by Simon Roux work for Metavir2 (2014) and Corentin Hochart work in PlasSuite |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
10 # |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
11 ######################################################################################### |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
12 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
13 import argparse |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
14 import logging |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
15 from pathlib import Path |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
16 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
17 from Bio import SeqIO |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
18 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
19 log_levels = { |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
20 0: logging.CRITICAL, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
21 1: logging.ERROR, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
22 2: logging.WARN, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
23 3: logging.INFO, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
24 4: logging.DEBUG, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
25 } |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
26 logging.basicConfig(level=log_levels[3]) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
27 logger = logging.getLogger() |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
28 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
29 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
30 def setup_logger(verbosity: int) -> None: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
31 """ |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
32 Configure the logger based on verbosity level. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
33 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
34 :param verbosity: verbosity level |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
35 """ |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
36 logging.basicConfig( |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
37 format="%(asctime)s - %(levelname)s - %(message)s", |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
38 level=log_levels.get(verbosity, logging.INFO), |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
39 ) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
40 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
41 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
42 def find_occurrences(s, substring) -> list: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
43 """ |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
44 Find all starting positions of a substring in a string |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
45 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
46 :param s: String to be searched |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
47 :param substring: Substring to search in s |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
48 """ |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
49 return [i for i in range(len(s)) if s.startswith(substring, i)] |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
50 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
51 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
52 def is_circular(sequence, length, pos) -> bool: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
53 """ |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
54 Determines if a sequence is circular by comparing segments starting at `start_pos`. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
55 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
56 A sequence is considered circular if the `length` elements at the beginning of the sequence match the `length` |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
57 elements starting at `start_pos` in the sequence. This is useful for detecting repeating patterns or cycles |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
58 in sequences. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
59 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
60 :param sequence: The input sequence |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
61 :param length: The number of elements to compare for circularity. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
62 :param pos: The starting index in the sequence to begin the comparison. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
63 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
64 :return bool: True if circular, False otherwise |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
65 """ |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
66 for i in range(length): |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
67 if sequence[i] != sequence[pos + i]: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
68 return False |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
69 return True |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
70 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
71 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
72 def check_circularity(seq_record, subseq_length=10) -> int: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
73 """ |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
74 Process a single sequence to detect circularity and return the overlap length if circular. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
75 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
76 :param seq_record: SeqRecord object |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
77 :param subseq_length: Length of 3' fragment to check on the 5' end |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
78 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
79 :return: overlap length if circular, 0 otherwise |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
80 """ |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
81 seq_len = len(seq_record) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
82 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
83 if seq_len < subseq_length: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
84 logging.error(f"Sequence too short ({seq_len}bp): {seq_record.id}") |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
85 return 0 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
86 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
87 begin = "".join(seq_record[:subseq_length]) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
88 end = "".join(seq_record[subseq_length:]) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
89 positions = [x + subseq_length for x in find_occurrences(end, begin)] |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
90 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
91 for pos in positions: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
92 overlap_length = seq_len - pos |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
93 if is_circular(seq_record, overlap_length, pos): |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
94 return overlap_length |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
95 return 0 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
96 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
97 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
98 def extend_sequence( |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
99 seq_record, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
100 overlap_length, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
101 duplication_length=1000, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
102 ): |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
103 """ |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
104 Extends the 5' end of a sequence by duplicating a fragment from the 3' end. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
105 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
106 This function is useful for simulating circular sequences by extending the 5' end |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
107 with a fragment from the 3' end, based on the specified `overlap_length` and `duplication_length`. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
108 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
109 :param seq_record: The input sequence record to be extended. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
110 :param overlap_length: The length of the overlapping segment that was previously identified as circular. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
111 :param duplication_length: The length of the 3' end fragment to duplicate and add to the 5' end. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
112 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
113 :return: The modified sequence record with the extended 5' end. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
114 """ |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
115 # Remove the overlapping segment from the 3' end |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
116 modified_seq = seq_record.seq[: len(seq_record.seq) - overlap_length] |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
117 # Duplicate the first `duplication_length` nucleotides from the original sequence |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
118 # and append them to the 5' end of the modified sequence |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
119 if len(modified_seq) < duplication_length: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
120 # If the modified sequence is shorter than `duplication_length`, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
121 # duplicate the entire modified sequence |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
122 extension = modified_seq |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
123 else: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
124 # Otherwise, duplicate the first `duplication_length` nucleotides |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
125 extension = seq_record.seq[:duplication_length] |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
126 # Combine the modified sequence with the duplicated fragment |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
127 extended_seq = modified_seq + extension |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
128 # Update the sequence in the SeqRecord object |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
129 seq_record.seq = extended_seq |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
130 return seq_record |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
131 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
132 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
133 def detect_circular( |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
134 fasta_in, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
135 fasta_out, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
136 id_out, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
137 subseq_length=10, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
138 duplication_length=1000, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
139 ): |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
140 """ |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
141 Detect and process circular sequences in a FASTA file. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
142 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
143 This function reads sequences from `fasta_in`, checks for circularity, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
144 extends circular sequences, and writes the results to `fasta_out` and `id_out`. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
145 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
146 :param fasta_in: Path to the input FASTA file. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
147 :param fasta_out: Path to the output FASTA file for extended circular sequences. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
148 :param id_out: Path to the output file for recording IDs of circular sequences. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
149 :param subseq_length: Length of the 3' fragment to check for circularity. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
150 :param duplication_length: Length of the 3' fragment to duplicate and add to the 5' end. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
151 """ |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
152 records = [] |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
153 ids = [] |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
154 try: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
155 with fasta_in.open("r") as fasta_in_f: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
156 for seq_record in SeqIO.parse(fasta_in_f, "fasta"): |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
157 overlap_length = check_circularity( |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
158 seq_record, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
159 subseq_length=subseq_length, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
160 ) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
161 if overlap_length > 0: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
162 records.append( |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
163 extend_sequence( |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
164 seq_record, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
165 overlap_length, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
166 duplication_length, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
167 ) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
168 ) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
169 ids.append(seq_record.id) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
170 except Exception as e: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
171 logging.error(f"Error processing {fasta_in}: {e}") |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
172 raise |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
173 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
174 if not records: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
175 logging.warning("Warning: No circular sequences found.") |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
176 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
177 try: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
178 with fasta_out.open("w") as fasta_out_f: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
179 SeqIO.write(records, fasta_out_f, "fasta") |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
180 with id_out.open("w") as id_out_f: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
181 id_out_f.write("\n".join(ids) + "\n") |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
182 except IOError as e: |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
183 logging.error(f"Error writing output files: {e}") |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
184 raise |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
185 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
186 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
187 def main(): |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
188 """ |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
189 Main function to detect circular contigs in a FASTA file. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
190 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
191 This function parses command-line arguments, launches function to read the input |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
192 FASTA file, process each sequence to detect circular contigs, and generate the |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
193 output files. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
194 """ |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
195 parser = argparse.ArgumentParser( |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
196 description=""" |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
197 Detect circular contigs by looking for exact identical subsequences at the two |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
198 ends of the sequences provided in a FASTA file and output the circular contigs |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
199 extended on 5' end by duplication of the first nucleotides on 3' end to be able |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
200 to predict genes spanning the origin of circular contigs. |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
201 """ |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
202 ) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
203 parser.add_argument("--fasta-in", required=True, help="Input FASTA file") |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
204 parser.add_argument( |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
205 "--subseq-length", |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
206 type=int, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
207 default=10, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
208 help="Length of 3' fragment to check on the 5' end (default: 10)", |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
209 ) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
210 parser.add_argument( |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
211 "--duplication-length", |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
212 type=int, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
213 default=1000, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
214 help="Length of the 3' end fragment to duplicate and add on the 5' end (default: 1000)", |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
215 ) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
216 parser.add_argument( |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
217 "-v", |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
218 "--verbose", |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
219 type=int, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
220 default=3, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
221 choices=log_levels.keys(), |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
222 help="Verbosity level (0=CRITICAL, 1=ERROR, 2=WARN, 3=INFO, 4=DEBUG)", |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
223 ) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
224 parser.add_argument( |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
225 "--fasta-out", |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
226 required=True, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
227 help="Output FASTA file with extended circular contigs", |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
228 ) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
229 parser.add_argument( |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
230 "--id-out", required=True, help="Output TXT file with circular sequence IDs" |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
231 ) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
232 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
233 args = parser.parse_args() |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
234 setup_logger(args.verbose) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
235 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
236 logging.info("Starting script execution.") |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
237 detect_circular( |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
238 Path(args.fasta_in), |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
239 Path(args.fasta_out), |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
240 Path(args.id_out), |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
241 subseq_length=args.subseq_length, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
242 duplication_length=args.duplication_length, |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
243 ) |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
244 logging.info("Script execution completed.") |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
245 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
246 |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
247 if __name__ == "__main__": |
|
faec698e3f98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/detect_circular_sequences commit 7ea9f729b44c6351c52b6295c780f496d239488e
iuc
parents:
diff
changeset
|
248 main() |
