Mercurial > repos > galaxyp > fragpipe
annotate genericize_db.py @ 1:dc5de2ea607e draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 408c20289d67eb76a2482af828e64265bef29053
author | galaxyp |
---|---|
date | Fri, 12 Jul 2024 18:16:43 +0000 |
parents | 14785481da2b |
children |
rev | line source |
---|---|
0
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
2 # |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
3 # Prefixes sequence headers in the input FASTA file that are not formatted according to the UniProt, NCBI, or ENSEMBL formats with '>generic|' to avoid being misinterpreted by Philosopher. |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
4 # |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
5 |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
6 import re |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
7 import sys |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
8 |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
9 input_db_file = sys.argv[1] |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
10 output_db_file = sys.argv[2] |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
11 |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
12 |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
13 def sub_header(line): |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
14 return re.sub(r'^>(?!sp\||tr\||db\||AP_|NP_|YP_|XP_|WP_|ENSP|UniRef|nxp|generic)', '>generic|', line) |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
15 |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
16 |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
17 with open(input_db_file) as in_file, open(output_db_file, 'w') as out_file: |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
18 for line in in_file: |
14785481da2b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
galaxyp
parents:
diff
changeset
|
19 out_file.write(sub_header(line)) |