Mercurial > repos > proteore > proteore_get_unique_peptide_srm_method

--- a/get_unique_srm.py	Thu Jan 30 09:02:31 2020 -0500
+++ b/get_unique_srm.py	Mon May 10 13:56:03 2021 +0000
@@ -1,36 +1,45 @@
-import argparse, csv, re
+import argparse
+import csv
+import re
+

 def get_args():

     parser = argparse.ArgumentParser()
-    parser.add_argument("--input_type", help="type of input (list of id or filename)", required=True)
-    parser.add_argument("-i", "--input", help="list of IDs (text or filename)", required=True)
-    parser.add_argument("--header", help="true/false if your file contains a header")
-    parser.add_argument("-c", "--column_number", help="list of IDs (text or filename)")
-    parser.add_argument("-f", "--features", help="Protein features to return from SRM Atlas", required=True)
-    parser.add_argument("-d", "--ref_file", help="path to reference file", required=True)
-    parser.add_argument("-o", "--output", help="output filename", required=True)
+    parser.add_argument("--input_type", help="type of input (list of id or filename)", required=True)  # noqa 501
+    parser.add_argument("-i", "--input", help="list of IDs (text or filename)", required=True)  # noqa 501
+    parser.add_argument("--header", help="true/false if your file contains a header")  # noqa 501
+    parser.add_argument("-c", "--column_number", help="list of IDs (text or filename)")  # noqa 501
+    parser.add_argument("-f", "--features", help="Protein features to return from SRM Atlas", required=True)  # noqa 501
+    parser.add_argument("-d", "--ref_file", help="path to reference file", required=True)  # noqa 501
+    parser.add_argument("-o", "--output", help="output filename", required=True)  # noqa 501
     args = parser.parse_args()
     return args

-#return the column number in int format
+# return the column number in int format
+
+
 def nb_col_to_int(nb_col):
-    try :
+    try:
         nb_col = int(nb_col.replace("c", "")) - 1
         return nb_col
-    except :
-        sys.exit("Please specify the column where you would like to apply the filter with valid format")
+    except:  # noqa 722
+        sys.exit("Please specify the column where you would like to apply the filter with valid format")  # noqa 501, 821
+
+# replace all blank cells to NA
+

-#replace all blank cells to NA
-def blank_to_NA(csv_file) :
-    tmp=[]
-    for line in csv_file :
-        line = ["NA" if cell=="" or cell==" " or cell=="NaN" else cell for cell in line]
+def blank_to_NA(csv_file):
+    tmp = []
+    for line in csv_file:
+        line = ["NA" if cell == "" or cell == " " or cell == "NaN" else cell for cell in line]  # noqa 501
         tmp.append(line)
-
+
     return tmp

-#convert string to boolean
+# convert string to boolean
+
+
 def str2bool(v):
     if v.lower() in ('yes', 'true', 't', 'y', '1'):
         return True
@@ -39,154 +48,171 @@
     else:
         raise argparse.ArgumentTypeError('Boolean value expected.')

-#return list of (unique) ids from string
-def get_input_ids_from_string(input) :
+# return list of (unique) ids from string
+

-    ids_list = list(set(re.split(r'\s+',input.replace("_SNP","").replace("d_","").replace("\r","").replace("\n"," ").replace("\t"," "))))
-    if "" in ids_list : ids_list.remove("")
+def get_input_ids_from_string(input):
+
+    ids_list = list(set(re.split(r'\s+', input.replace("_SNP", "").replace("d_", "").replace("\r", "").replace("\n", " ").replace("\t", " "))))  # noqa 501
+    if "" in ids_list:
+        ids_list.remove("")

     return ids_list

-#return input_file and list of unique ids from input file path
-def get_input_ids_from_file(input,nb_col,header) :
-    with open(input, "r") as csv_file :
-        input_file= list(csv.reader(csv_file, delimiter='\t'))
+# return input_file and list of unique ids from input file path
+

-    input_file, ids_list = one_id_one_line(input_file,nb_col,header)
-    if "" in ids_list : ids_list.remove("")
+def get_input_ids_from_file(input, nb_col, header):
+    with open(input, "r") as csv_file:
+        input_file = list(csv.reader(csv_file, delimiter='\t'))
+
+    input_file, ids_list = one_id_one_line(input_file, nb_col, header)
+    if "" in ids_list:
+        ids_list.remove("")

     return input_file, ids_list

-#function to check if an id is an uniprot accession number : return True or False-
-def check_uniprot (id):
-    uniprot_pattern = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}")
-    if uniprot_pattern.match(id) :
+# function to check if an id is an uniprot accession number:
+# return True or False
+
+
+def check_uniprot(id):
+    uniprot_pattern = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}")  # noqa 501
+    if uniprot_pattern.match(id):
         return True
-    else :
+    else:
         return False

-#return input file by adding lines when there are more than one id per line
-def one_id_one_line(input_file,nb_col,header) :
+# return input file by adding lines when there are more than one id per line
+

-    if header :
+def one_id_one_line(input_file, nb_col, header):
+
+    if header:
         new_file = [input_file[0]]
         input_file = input_file[1:]
-    else :
-        new_file=[]
-    ids_list=[]
+    else:
+        new_file = []
+    ids_list = []

-    for line in input_file :
-        if line != [] and set(line) != {''}:
-            line[nb_col] = re.sub(r"\s+","",line[nb_col])
-            if line[nb_col] == "" : line[nb_col]='NA'
-            if ";" in line[nb_col] :
+    for line in input_file:
+        if line != [] and set(line) != {''}:
+            line[nb_col] = re.sub(r"\s+", "", line[nb_col])
+            if line[nb_col] == "":
+                line[nb_col] = 'NA'
+            if ";" in line[nb_col]:
                 ids = line[nb_col].split(";")
-                for id in ids :
+                for id in ids:
                     new_file.append(line[:nb_col]+[id]+line[nb_col+1:])
                     ids_list.append(id)
-            else :
+            else:
                 new_file.append(line)
                 ids_list.append(line[nb_col])

-    ids_list=[e.replace("_SNP","").replace("d_","") for e in ids_list]
-    ids_list= list(set(ids_list))
+    ids_list = [e.replace("_SNP", "").replace("d_", "") for e in ids_list]
+    ids_list = list(set(ids_list))

     return new_file, ids_list

-def create_srm_atlas_dictionary(features,srm_atlas_csv):
+
+def create_srm_atlas_dictionary(features, srm_atlas_csv):

-    srm_atlas={}
-    features_index = {"PeptideSeq" : 0, "SSRT" : 1 , "Length" : 2 , "type": 3 , "PA_AccNum" : 4, "MW" : 5 }
+    srm_atlas = {}
+    features_index = {"PeptideSeq": 0, "SSRT": 1, "Length": 2, "type":3, "PA_AccNum": 4, "MW": 5}  # noqa 501
     features_to_get = [features_index[feature] for feature in features]
     for line in srm_atlas_csv[1:]:
-        id = line[9].replace("_SNP","").replace("d_","")
+        id = line[9].replace("_SNP", "").replace("d_", "")
         if id not in srm_atlas:
-            srm_atlas[id]=[[line[i] for i in features_to_get]]
-        else:
+            srm_atlas[id] = [[line[i] for i in features_to_get]]
+        else:
             srm_atlas[id].append([line[i] for i in features_to_get])
     return srm_atlas

-def retrieve_srm_features(srm_atlas,ids):
+
+def retrieve_srm_features(srm_atlas, ids):

     result_dict = {}
     for id in ids:
         if id in srm_atlas:
             res = srm_atlas[id]
-        else :
-            res=""
-        result_dict[id]=res
+        else:
+            res = ""
+        result_dict[id] = res
     return result_dict

-def create_header(input_file,ncol,features):
-    col_names = list(range(1,len(input_file[0])+1))
+
+def create_header(input_file, ncol, features):
+    col_names = list(range(1, len(input_file[0])+1))
     col_names = ["col"+str(e) for e in col_names]
-    col_names[ncol]="Uniprot-AC"
+    col_names[ncol] = "Uniprot-AC"
     col_names = col_names+features
     return(col_names)

+
 def main():
-
-    #Get args from command line
+
+    # Get args from command line
     args = get_args()
-    features=args.features.split(",")
-    header=False
-    if args.input_type=="file" :
+    features = args.features.split(",")
+    header = False
+    if args.input_type == "file":
         column_number = nb_col_to_int(args.column_number)
         header = str2bool(args.header)

-    #Get reference file (Human SRM Atlas)
-    with open(args.ref_file, "r") as csv_file :
+    # Get reference file (Human SRM Atlas)
+    with open(args.ref_file, "r") as csv_file:
         srm_atlas_csv = csv.reader(csv_file, delimiter='\t')
         srm_atlas_csv = [line for line in srm_atlas_csv]

-    #Create srm Atlas dictionary
-    srm_atlas = create_srm_atlas_dictionary(features,srm_atlas_csv)
-
-    #Get file and/or ids from input
-    if args.input_type == "list" :
+    # Create srm Atlas dictionary
+    srm_atlas = create_srm_atlas_dictionary(features, srm_atlas_csv)
+
+    # Get file and/or ids from input
+    if args.input_type == "list":
         ids = get_input_ids_from_string(args.input)
-    elif args.input_type == "file" :
-        input_file, ids = get_input_ids_from_file(args.input,column_number,header)
+    elif args.input_type == "file":
+        input_file, ids = get_input_ids_from_file(args.input,
+                                                  column_number, header)

-    #Check Uniprot-AC
+    # Check Uniprot-AC
     if not any([check_uniprot(id) for id in ids]):
-        print ("No Uniprot-AC found, please check your input")
+        print("No Uniprot-AC found, please check your input")
         exit()

-    #retrieve features
-    result_dict = retrieve_srm_features(srm_atlas,ids)
+    # retrieve features
+    result_dict = retrieve_srm_features(srm_atlas, ids)

-    #write output
-    with open(args.output,"w") as output :
-        writer = csv.writer(output,delimiter="\t")
+    # write output
+    with open(args.output, "w") as output:
+        writer = csv.writer(output, delimiter="\t")

-        #write header
-        if header :
+        # write header
+        if header:
             writer.writerow(input_file[0]+features)
-            input_file = input_file[1:]
-        elif args.input_type=="file":
-            col_names = [create_header(input_file,column_number,features)]
+            input_file = input_file[1:]
+        elif args.input_type == "file":
+            col_names = [create_header(input_file, column_number, features)]
             writer.writerow(col_names)
-        else :
+        else:
             writer.writerow(["Uniprot-AC"]+features)

-        #write lines
-        previous_line=""
-        if args.input_type=="file" :
-            for line in input_file :
+        # write lines
+        previous_line = ""
+        if args.input_type == "file":
+            for line in input_file:
                 for res in result_dict[line[column_number]]:
-                    output_line = ["NA" if cell=="" or cell==" " or cell=="NaN" else cell for cell in line+res]
-                    if previous_line != output_line :
+                    output_line = ["NA" if cell == "" or cell == " " or cell == "NaN" else cell for cell in line+res]  # noqa 501
+                    if previous_line != output_line:
                         writer.writerow(output_line)
-                        previous_line=output_line
-        elif args.input_type=="list" :
-            for id in ids :
+                        previous_line = output_line
+        elif args.input_type == "list":
+            for id in ids:
                 for res in result_dict[id]:
-                    line = ["NA" if cell=="" or cell==" " or cell=="NaN" else cell for cell in [id]+res]
-                    if previous_line != line :
+                    line = ["NA" if cell == "" or cell == " " or cell == "NaN" else cell for cell in [id]+res]  # noqa 501
+                    if previous_line != line:
                         writer.writerow(line)
-                        previous_line=line
-
+                        previous_line = line
+

 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
--- a/proteore_get_unique_peptide_SRM-MRM_method.xml	Thu Jan 30 09:02:31 2020 -0500
+++ b/proteore_get_unique_peptide_SRM-MRM_method.xml	Mon May 10 13:56:03 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="proteore_get_unique_peptide_srm_method" name="Get unique peptide SRM-MRM method" version="2020.01.30">
+<tool id="proteore_get_unique_peptide_srm_method" name="Get unique peptide SRM-MRM method" version="2021.04.20">
     <description>[Human SRM Atlas]</description>
     <requirements>
     </requirements>