Mercurial > repos > galaxy-australia > alphafold2
changeset 7:eb085b3dbaf8 draft
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 8d9f0ae6af9e8d9313c6cdcc551b24c6c44ae341"
author | galaxy-australia |
---|---|
date | Tue, 19 Apr 2022 00:39:29 +0000 |
parents | 04e95886cf24 |
children | ca90d17ff51b |
files | README.rst alphafold.xml validate_fasta.py |
diffstat | 3 files changed, 52 insertions(+), 40 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Mon Apr 04 01:46:22 2022 +0000 +++ b/README.rst Tue Apr 19 00:39:29 2022 +0000 @@ -157,6 +157,18 @@ <param id="singularity_volumes">"$job_directory:ro,$tool_directory:ro,$job_directory/outputs:rw,$working_directory:rw,/data/alphafold_databases:/data:ro"</param> </destination> +CUSTOM PARAMETERS +~~~~~~~~~~~~~~~~~ + +A few parameters can be customized with the use of environment variables set in the job destination: + +- ``ALPHAFOLD_DB``: path to the reference database root (default ``/data``) +- ``ALPHAFOLD_AA_LENGTH_MIN``: minimum accepted sequence length (default ``30``) +- ``ALPHAFOLD_AA_LENGTH_MAX``: maximum accepted sequence length (default ``2000``) + +For the last two, these could be set to ``0`` and ``50000`` respectively to remove the valiation entirely. + + Closing ~~~~~~~
--- a/alphafold.xml Mon Apr 04 01:46:22 2022 +0000 +++ b/alphafold.xml Tue Apr 19 00:39:29 2022 +0000 @@ -195,17 +195,6 @@ Proteins associate in many biological processes, including intracellular signalling pathways and protein complex formation. To predict these interactions, other programs may ingest 3D models predicted by AlphaFold. Proprietary softwares include `GOLD <https://www.ccdc.cam.ac.uk/solutions/csd-discovery/components/gold/>`_ and `SeeSAR <https://www.biosolveit.de/SeeSAR>`_, but many `free and open-source options <https://en.wikipedia.org/wiki/List_of_protein-ligand_docking_software>`_ are available such as `AutoDock <https://autodock.scripps.edu/>`_ and `SwissDock <http://www.swissdock.ch/>`_. - *Expected run times* - - .. image:: https://github.com/usegalaxy-au/galaxy-local-tools/blob/1a8d3e8daa7ccc5a345ca377697735ab95ed0666/tools/alphafold/static/img/alphafold_runtime_graph.png?raw=true - :height: 520 - :alt: Run time graph - - | - | In general, we observe a quadratic relationship between sequence length and time to fold. - | Once your job begins, a sequence of 50aa will take approximately 1hr to complete, while a sequence of 2000aa will take about 18hrs. - | - **Input** *Amino acid sequence*
--- a/validate_fasta.py Mon Apr 04 01:46:22 2022 +0000 +++ b/validate_fasta.py Tue Apr 19 00:39:29 2022 +0000 @@ -86,7 +86,7 @@ } def validate(self): - """performs fasta validation""" + """Perform FASTA validation.""" self.validate_num_seqs() self.validate_length() self.validate_alphabet() @@ -98,41 +98,41 @@ def validate_num_seqs(self) -> None: """Assert that only one sequence has been provided.""" if len(self.fasta_list) > 1: - raise Exception( - 'Error encountered validating fasta:' + raise ValueError( + 'Error encountered validating FASTA:\n' f' More than 1 sequence detected ({len(self.fasta_list)}).' - ' Please use single fasta sequence as input.') + ' Please use single FASTA sequence as input.') elif len(self.fasta_list) == 0: - raise Exception( - 'Error encountered validating fasta:' - ' input file has no fasta sequences') + raise ValueError( + 'Error encountered validating FASTA:\n' + ' input file has no FASTA sequences') def validate_length(self): """Confirm whether sequence length is valid.""" fasta = self.fasta_list[0] if self.min_length: if len(fasta.aa_seq) < self.min_length: - raise Exception( - 'Error encountered validating fasta: Sequence too short' + raise ValueError( + 'Error encountered validating FASTA:\n Sequence too short' f' ({len(fasta.aa_seq)}AA).' f' Minimum length is {self.min_length}AA.') if self.max_length: if len(fasta.aa_seq) > self.max_length: - raise Exception( - 'Error encountered validating fasta:' + raise ValueError( + 'Error encountered validating FASTA:\n' f' Sequence too long ({len(fasta.aa_seq)}AA).' f' Maximum length is {self.max_length}AA.') def validate_alphabet(self): - """ - Confirm whether the sequence conforms to IUPAC codes. + """Confirm whether the sequence conforms to IUPAC codes. + If not, report the offending character and its position. """ fasta = self.fasta_list[0] for i, char in enumerate(fasta.aa_seq.upper()): if char not in self.iupac_characters: - raise Exception( - 'Error encountered validating fasta: Invalid amino acid' + raise ValueError( + 'Error encountered validating FASTA:\n Invalid amino acid' f' found at pos {i}: "{char}"') def validate_x(self): @@ -140,8 +140,8 @@ fasta = self.fasta_list[0] for i, char in enumerate(fasta.aa_seq.upper()): if char == 'X': - raise Exception( - 'Error encountered validating fasta: Unsupported AA code' + raise ValueError( + 'Error encountered validating FASTA:\n Unsupported AA code' f' "X" found at pos {i}') @@ -164,20 +164,31 @@ def main(): # load fasta file - args = parse_args() - fas = FastaLoader(args.input) + try: + args = parse_args() + fas = FastaLoader(args.input) + + # validate + fv = FastaValidator( + fas.fastas, + min_length=args.min_length, + max_length=args.max_length, + ) + fv.validate() - # validate - fv = FastaValidator( - fas.fastas, - min_length=args.min_length, - max_length=args.max_length, - ) - fv.validate() + # write cleaned version + fw = FastaWriter() + fw.write(fas.fastas[0]) - # write cleaned version - fw = FastaWriter() - fw.write(fas.fastas[0]) + except ValueError as exc: + sys.stderr.write(f"{exc}\n\n") + raise exc + + except Exception as exc: + sys.stderr.write( + "Input error: FASTA input is invalid. Please check your input.\n\n" + ) + raise exc def parse_args() -> argparse.Namespace: