# HG changeset patch # User cpt # Date 1655468535 0 # Node ID 3f0d07a1040503ef566d5e4ee60f37c23c48ab00 Uploaded diff -r 000000000000 -r 3f0d07a10405 cpt_disruptin_finder/cpt-macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_disruptin_finder/cpt-macros.xml Fri Jun 17 12:22:15 2022 +0000 @@ -0,0 +1,115 @@ + + + + + python + biopython + requests + + + + + + + + 10.1371/journal.pcbi.1008214 + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {C. Ross}, + title = {CPT Galaxy Tools}, + year = {2020-}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + diff -r 000000000000 -r 3f0d07a10405 cpt_disruptin_finder/disruptin_finder.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_disruptin_finder/disruptin_finder.py Fri Jun 17 12:22:15 2022 +0000 @@ -0,0 +1,93 @@ +""" +This program is intended to find gene products that would be acceptable disruptin candidates. + +The criteria can be toggled between selecting for proteins with: + - net charge above a give threshold (default = +4) and length less than given threshold (default = 100 aa) + OR + - ratio of number of charged residues to length of the sequence above a given threshold (default = 0.25 residue/aa) + and length less than given threshold (default = 100 aa) + OR + - net charge above a give threshold (default = +4), ratio of number of charged residues to length of the sequence + above a given threshold (default = 0.25 residue/aa), and length less than given threshold (default = 100 aa) + +Net charge of a sequence is calculated so that for every R or K residue the net charge increases by one, and for every +D or E residue the net charge decreases by one. The ratio of charged residues to length is calculated in a similar manner. +The residues R, K, D, and E each increase the number of charged residues by one, and total for the sequence is then +divided by the length to get the ratio. + +Input a multi fasta file with all of the predicted protein sequences from the genome as well as a threshold +sequence length, net charge, and charge residue to length ratio. The program outputs another fasta file. +The output fasta file includes records for all the sequences meeting the size and charge criteria. + +""" + +from Bio import SeqIO +import argparse +import sys + + +def disruptin_finder( + fasta_file, thresh_size, thresh_net_charge, thresh_charge_ratio, selection_criteria +): + # Iterable variables + net_charge = 0 + charge_res = 0 + + # Create record variable to store record information + total_record = [] + + # Parse the .fasta file and get the sequence + for rec in SeqIO.parse(fasta_file, "fasta"): + sequence = str(rec.seq) + + if len(sequence) <= thresh_size: + for aa in sequence: + # For R and K residues a positive charge is given + if aa in "RK": + net_charge += 1 + charge_res += 1 + # For D and E residues a negative charge is given + elif aa in "DE": + net_charge -= 1 + charge_res += 1 + + # Charge (total charged residues) to size ratio is calculated + Length = len(sequence) + charge_ratio = float(charge_res) / float(Length) + + # Based on the user-specified selection criteria a list of records is compiled + if selection_criteria == "net": + if net_charge >= thresh_net_charge: + total_record = total_record + [rec] + elif selection_criteria == "ratio": + if charge_ratio >= thresh_charge_ratio: + total_record = total_record + [rec] + elif selection_criteria == "both": + if ( + charge_ratio >= thresh_charge_ratio + and net_charge >= thresh_net_charge + ): + total_record = total_record + [rec] + + # Reset the iterable variables + net_charge = 0 + charge_res = 0 + + # The total list of records is returned by the function + yield total_record + + +if __name__ == "__main__": + # Grab all of the filters from our plugin loader + parser = argparse.ArgumentParser(description="Disruptin Finder") + parser.add_argument( + "fasta_file", type=argparse.FileType("r"), help="Multi-FASTA Input" + ) + parser.add_argument("--thresh_net_charge", type=int, default=4) + parser.add_argument("--thresh_size", type=int, default=100) + parser.add_argument("--thresh_charge_ratio", type=float, default=0.25) + parser.add_argument("--selection_criteria", action="store") + args = parser.parse_args() + + for seq in disruptin_finder(**vars(args)): + SeqIO.write(seq, sys.stdout, "fasta") diff -r 000000000000 -r 3f0d07a10405 cpt_disruptin_finder/disruptin_finder.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_disruptin_finder/disruptin_finder.xml Fri Jun 17 12:22:15 2022 +0000 @@ -0,0 +1,54 @@ + + + finds proteins with size and charge criteria + + macros.xml + cpt-macros.xml + + + $output]]> + + + + + + + + + + + + + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {A. Holt}, + title = {CPT Galaxy Tools}, + year = {2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + diff -r 000000000000 -r 3f0d07a10405 cpt_disruptin_finder/macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_disruptin_finder/macros.xml Fri Jun 17 12:22:15 2022 +0000 @@ -0,0 +1,23 @@ + + + + + python + biopython + cpt_gffparser + + + + + + + + + + + ln -s $genome_fasta genomeref.fa; + + + genomeref.fa + +