# HG changeset patch
# User galaxy-australia
# Date 1650328769 0
# Node ID eb085b3dbaf873cfae919e218b923ebebe8a4ae4
# Parent 04e95886cf24283e9269265a3ff5093605209205
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
diff -r 04e95886cf24 -r eb085b3dbaf8 README.rst
--- a/README.rst Mon Apr 04 01:46:22 2022 +0000
+++ b/README.rst Tue Apr 19 00:39:29 2022 +0000
@@ -157,6 +157,18 @@
"$job_directory:ro,$tool_directory:ro,$job_directory/outputs:rw,$working_directory:rw,/data/alphafold_databases:/data:ro"
+CUSTOM PARAMETERS
+~~~~~~~~~~~~~~~~~
+
+A few parameters can be customized with the use of environment variables set in the job destination:
+
+- ``ALPHAFOLD_DB``: path to the reference database root (default ``/data``)
+- ``ALPHAFOLD_AA_LENGTH_MIN``: minimum accepted sequence length (default ``30``)
+- ``ALPHAFOLD_AA_LENGTH_MAX``: maximum accepted sequence length (default ``2000``)
+
+For the last two, these could be set to ``0`` and ``50000`` respectively to remove the valiation entirely.
+
+
Closing
~~~~~~~
diff -r 04e95886cf24 -r eb085b3dbaf8 alphafold.xml
--- a/alphafold.xml Mon Apr 04 01:46:22 2022 +0000
+++ b/alphafold.xml Tue Apr 19 00:39:29 2022 +0000
@@ -195,17 +195,6 @@
Proteins associate in many biological processes, including intracellular signalling pathways and protein complex formation.
To predict these interactions, other programs may ingest 3D models predicted by AlphaFold. Proprietary softwares include `GOLD `_ and `SeeSAR `_, but many `free and open-source options `_ are available such as `AutoDock `_ and `SwissDock `_.
- *Expected run times*
-
- .. image:: https://github.com/usegalaxy-au/galaxy-local-tools/blob/1a8d3e8daa7ccc5a345ca377697735ab95ed0666/tools/alphafold/static/img/alphafold_runtime_graph.png?raw=true
- :height: 520
- :alt: Run time graph
-
- |
- | In general, we observe a quadratic relationship between sequence length and time to fold.
- | Once your job begins, a sequence of 50aa will take approximately 1hr to complete, while a sequence of 2000aa will take about 18hrs.
- |
-
**Input**
*Amino acid sequence*
diff -r 04e95886cf24 -r eb085b3dbaf8 validate_fasta.py
--- a/validate_fasta.py Mon Apr 04 01:46:22 2022 +0000
+++ b/validate_fasta.py Tue Apr 19 00:39:29 2022 +0000
@@ -86,7 +86,7 @@
}
def validate(self):
- """performs fasta validation"""
+ """Perform FASTA validation."""
self.validate_num_seqs()
self.validate_length()
self.validate_alphabet()
@@ -98,41 +98,41 @@
def validate_num_seqs(self) -> None:
"""Assert that only one sequence has been provided."""
if len(self.fasta_list) > 1:
- raise Exception(
- 'Error encountered validating fasta:'
+ raise ValueError(
+ 'Error encountered validating FASTA:\n'
f' More than 1 sequence detected ({len(self.fasta_list)}).'
- ' Please use single fasta sequence as input.')
+ ' Please use single FASTA sequence as input.')
elif len(self.fasta_list) == 0:
- raise Exception(
- 'Error encountered validating fasta:'
- ' input file has no fasta sequences')
+ raise ValueError(
+ 'Error encountered validating FASTA:\n'
+ ' input file has no FASTA sequences')
def validate_length(self):
"""Confirm whether sequence length is valid."""
fasta = self.fasta_list[0]
if self.min_length:
if len(fasta.aa_seq) < self.min_length:
- raise Exception(
- 'Error encountered validating fasta: Sequence too short'
+ raise ValueError(
+ 'Error encountered validating FASTA:\n Sequence too short'
f' ({len(fasta.aa_seq)}AA).'
f' Minimum length is {self.min_length}AA.')
if self.max_length:
if len(fasta.aa_seq) > self.max_length:
- raise Exception(
- 'Error encountered validating fasta:'
+ raise ValueError(
+ 'Error encountered validating FASTA:\n'
f' Sequence too long ({len(fasta.aa_seq)}AA).'
f' Maximum length is {self.max_length}AA.')
def validate_alphabet(self):
- """
- Confirm whether the sequence conforms to IUPAC codes.
+ """Confirm whether the sequence conforms to IUPAC codes.
+
If not, report the offending character and its position.
"""
fasta = self.fasta_list[0]
for i, char in enumerate(fasta.aa_seq.upper()):
if char not in self.iupac_characters:
- raise Exception(
- 'Error encountered validating fasta: Invalid amino acid'
+ raise ValueError(
+ 'Error encountered validating FASTA:\n Invalid amino acid'
f' found at pos {i}: "{char}"')
def validate_x(self):
@@ -140,8 +140,8 @@
fasta = self.fasta_list[0]
for i, char in enumerate(fasta.aa_seq.upper()):
if char == 'X':
- raise Exception(
- 'Error encountered validating fasta: Unsupported AA code'
+ raise ValueError(
+ 'Error encountered validating FASTA:\n Unsupported AA code'
f' "X" found at pos {i}')
@@ -164,20 +164,31 @@
def main():
# load fasta file
- args = parse_args()
- fas = FastaLoader(args.input)
+ try:
+ args = parse_args()
+ fas = FastaLoader(args.input)
+
+ # validate
+ fv = FastaValidator(
+ fas.fastas,
+ min_length=args.min_length,
+ max_length=args.max_length,
+ )
+ fv.validate()
- # validate
- fv = FastaValidator(
- fas.fastas,
- min_length=args.min_length,
- max_length=args.max_length,
- )
- fv.validate()
+ # write cleaned version
+ fw = FastaWriter()
+ fw.write(fas.fastas[0])
- # write cleaned version
- fw = FastaWriter()
- fw.write(fas.fastas[0])
+ except ValueError as exc:
+ sys.stderr.write(f"{exc}\n\n")
+ raise exc
+
+ except Exception as exc:
+ sys.stderr.write(
+ "Input error: FASTA input is invalid. Please check your input.\n\n"
+ )
+ raise exc
def parse_args() -> argparse.Namespace: