changeset 7:eb085b3dbaf8 draft

"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
author galaxy-australia
date Tue, 19 Apr 2022 00:39:29 +0000
parents 04e95886cf24
children ca90d17ff51b
files README.rst alphafold.xml validate_fasta.py
diffstat 3 files changed, 52 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/README.rst	Mon Apr 04 01:46:22 2022 +0000
+++ b/README.rst	Tue Apr 19 00:39:29 2022 +0000
@@ -157,6 +157,18 @@
        <param id="singularity_volumes">"$job_directory:ro,$tool_directory:ro,$job_directory/outputs:rw,$working_directory:rw,/data/alphafold_databases:/data:ro"</param>
    </destination>
 
+CUSTOM PARAMETERS
+~~~~~~~~~~~~~~~~~
+
+A few parameters can be customized with the use of environment variables set in the job destination:
+
+- ``ALPHAFOLD_DB``: path to the reference database root (default ``/data``)
+- ``ALPHAFOLD_AA_LENGTH_MIN``: minimum accepted sequence length (default ``30``)
+- ``ALPHAFOLD_AA_LENGTH_MAX``: maximum accepted sequence length (default ``2000``)
+
+For the last two, these could be set to ``0`` and ``50000`` respectively to remove the valiation entirely.
+
+
 Closing
 ~~~~~~~
 
--- a/alphafold.xml	Mon Apr 04 01:46:22 2022 +0000
+++ b/alphafold.xml	Tue Apr 19 00:39:29 2022 +0000
@@ -195,17 +195,6 @@
         Proteins associate in many biological processes, including intracellular signalling pathways and protein complex formation.
         To predict these interactions, other programs may ingest 3D models predicted by AlphaFold. Proprietary softwares include `GOLD <https://www.ccdc.cam.ac.uk/solutions/csd-discovery/components/gold/>`_ and `SeeSAR <https://www.biosolveit.de/SeeSAR>`_, but many `free and open-source options <https://en.wikipedia.org/wiki/List_of_protein-ligand_docking_software>`_ are available such as `AutoDock <https://autodock.scripps.edu/>`_ and `SwissDock <http://www.swissdock.ch/>`_.
 
-    *Expected run times*
-
-    .. image:: https://github.com/usegalaxy-au/galaxy-local-tools/blob/1a8d3e8daa7ccc5a345ca377697735ab95ed0666/tools/alphafold/static/img/alphafold_runtime_graph.png?raw=true
-        :height: 520
-        :alt: Run time graph
-
-    |
-    | In general, we observe a quadratic relationship between sequence length and time to fold.
-    | Once your job begins, a sequence of 50aa will take approximately 1hr to complete, while a sequence of 2000aa will take about 18hrs.
-    |
-
     **Input**
 
     *Amino acid sequence*
--- a/validate_fasta.py	Mon Apr 04 01:46:22 2022 +0000
+++ b/validate_fasta.py	Tue Apr 19 00:39:29 2022 +0000
@@ -86,7 +86,7 @@
         }
 
     def validate(self):
-        """performs fasta validation"""
+        """Perform FASTA validation."""
         self.validate_num_seqs()
         self.validate_length()
         self.validate_alphabet()
@@ -98,41 +98,41 @@
     def validate_num_seqs(self) -> None:
         """Assert that only one sequence has been provided."""
         if len(self.fasta_list) > 1:
-            raise Exception(
-                'Error encountered validating fasta:'
+            raise ValueError(
+                'Error encountered validating FASTA:\n'
                 f' More than 1 sequence detected ({len(self.fasta_list)}).'
-                ' Please use single fasta sequence as input.')
+                ' Please use single FASTA sequence as input.')
         elif len(self.fasta_list) == 0:
-            raise Exception(
-                'Error encountered validating fasta:'
-                ' input file has no fasta sequences')
+            raise ValueError(
+                'Error encountered validating FASTA:\n'
+                ' input file has no FASTA sequences')
 
     def validate_length(self):
         """Confirm whether sequence length is valid."""
         fasta = self.fasta_list[0]
         if self.min_length:
             if len(fasta.aa_seq) < self.min_length:
-                raise Exception(
-                    'Error encountered validating fasta: Sequence too short'
+                raise ValueError(
+                    'Error encountered validating FASTA:\n Sequence too short'
                     f' ({len(fasta.aa_seq)}AA).'
                     f' Minimum length is {self.min_length}AA.')
         if self.max_length:
             if len(fasta.aa_seq) > self.max_length:
-                raise Exception(
-                    'Error encountered validating fasta:'
+                raise ValueError(
+                    'Error encountered validating FASTA:\n'
                     f' Sequence too long ({len(fasta.aa_seq)}AA).'
                     f' Maximum length is {self.max_length}AA.')
 
     def validate_alphabet(self):
-        """
-        Confirm whether the sequence conforms to IUPAC codes.
+        """Confirm whether the sequence conforms to IUPAC codes.
+
         If not, report the offending character and its position.
         """
         fasta = self.fasta_list[0]
         for i, char in enumerate(fasta.aa_seq.upper()):
             if char not in self.iupac_characters:
-                raise Exception(
-                    'Error encountered validating fasta: Invalid amino acid'
+                raise ValueError(
+                    'Error encountered validating FASTA:\n Invalid amino acid'
                     f' found at pos {i}: "{char}"')
 
     def validate_x(self):
@@ -140,8 +140,8 @@
         fasta = self.fasta_list[0]
         for i, char in enumerate(fasta.aa_seq.upper()):
             if char == 'X':
-                raise Exception(
-                    'Error encountered validating fasta: Unsupported AA code'
+                raise ValueError(
+                    'Error encountered validating FASTA:\n Unsupported AA code'
                     f' "X" found at pos {i}')
 
 
@@ -164,20 +164,31 @@
 
 def main():
     # load fasta file
-    args = parse_args()
-    fas = FastaLoader(args.input)
+    try:
+        args = parse_args()
+        fas = FastaLoader(args.input)
+
+        # validate
+        fv = FastaValidator(
+            fas.fastas,
+            min_length=args.min_length,
+            max_length=args.max_length,
+        )
+        fv.validate()
 
-    # validate
-    fv = FastaValidator(
-        fas.fastas,
-        min_length=args.min_length,
-        max_length=args.max_length,
-    )
-    fv.validate()
+        # write cleaned version
+        fw = FastaWriter()
+        fw.write(fas.fastas[0])
 
-    # write cleaned version
-    fw = FastaWriter()
-    fw.write(fas.fastas[0])
+    except ValueError as exc:
+        sys.stderr.write(f"{exc}\n\n")
+        raise exc
+
+    except Exception as exc:
+        sys.stderr.write(
+            "Input error: FASTA input is invalid. Please check your input.\n\n"
+        )
+        raise exc
 
 
 def parse_args() -> argparse.Namespace: