Previous changeset 1:b72ece649392 (2020-01-30) |
Commit message:
"planemo upload commit 7592fb20f8029142757d5e5fdb8f04ff6d5ed5cd-dirty" |
modified:
get_unique_srm.py proteore_get_unique_peptide_SRM-MRM_method.xml |
b |
diff -r b72ece649392 -r b526dba9dc40 get_unique_srm.py --- a/get_unique_srm.py Thu Jan 30 09:02:31 2020 -0500 +++ b/get_unique_srm.py Mon May 10 13:56:03 2021 +0000 |
[ |
b'@@ -1,36 +1,45 @@\n-import argparse, csv, re\n+import argparse\n+import csv\n+import re\n+\n \n def get_args():\n \n parser = argparse.ArgumentParser()\n- parser.add_argument("--input_type", help="type of input (list of id or filename)", required=True)\n- parser.add_argument("-i", "--input", help="list of IDs (text or filename)", required=True)\n- parser.add_argument("--header", help="true/false if your file contains a header")\n- parser.add_argument("-c", "--column_number", help="list of IDs (text or filename)")\n- parser.add_argument("-f", "--features", help="Protein features to return from SRM Atlas", required=True)\n- parser.add_argument("-d", "--ref_file", help="path to reference file", required=True)\n- parser.add_argument("-o", "--output", help="output filename", required=True)\n+ parser.add_argument("--input_type", help="type of input (list of id or filename)", required=True) # noqa 501\n+ parser.add_argument("-i", "--input", help="list of IDs (text or filename)", required=True) # noqa 501\n+ parser.add_argument("--header", help="true/false if your file contains a header") # noqa 501\n+ parser.add_argument("-c", "--column_number", help="list of IDs (text or filename)") # noqa 501\n+ parser.add_argument("-f", "--features", help="Protein features to return from SRM Atlas", required=True) # noqa 501\n+ parser.add_argument("-d", "--ref_file", help="path to reference file", required=True) # noqa 501\n+ parser.add_argument("-o", "--output", help="output filename", required=True) # noqa 501\n args = parser.parse_args()\n return args\n \n-#return the column number in int format\n+# return the column number in int format\n+\n+\n def nb_col_to_int(nb_col):\n- try :\n+ try:\n nb_col = int(nb_col.replace("c", "")) - 1\n return nb_col\n- except :\n- sys.exit("Please specify the column where you would like to apply the filter with valid format")\n+ except: # noqa 722\n+ sys.exit("Please specify the column where you would like to apply the filter with valid format") # noqa 501, 821\n+\n+# replace all blank cells to NA\n+\n \n-#replace all blank cells to NA\n-def blank_to_NA(csv_file) :\n- tmp=[]\n- for line in csv_file :\n- line = ["NA" if cell=="" or cell==" " or cell=="NaN" else cell for cell in line]\n+def blank_to_NA(csv_file):\n+ tmp = []\n+ for line in csv_file:\n+ line = ["NA" if cell == "" or cell == " " or cell == "NaN" else cell for cell in line] # noqa 501\n tmp.append(line)\n- \n+\n return tmp\n \n-#convert string to boolean\n+# convert string to boolean\n+\n+\n def str2bool(v):\n if v.lower() in (\'yes\', \'true\', \'t\', \'y\', \'1\'):\n return True\n@@ -39,154 +48,171 @@\n else:\n raise argparse.ArgumentTypeError(\'Boolean value expected.\')\n \n-#return list of (unique) ids from string\n-def get_input_ids_from_string(input) :\n+# return list of (unique) ids from string\n+\n \n- ids_list = list(set(re.split(r\'\\s+\',input.replace("_SNP","").replace("d_","").replace("\\r","").replace("\\n"," ").replace("\\t"," "))))\n- if "" in ids_list : ids_list.remove("")\n+def get_input_ids_from_string(input):\n+\n+ ids_list = list(set(re.split(r\'\\s+\', input.replace("_SNP", "").replace("d_", "").replace("\\r", "").replace("\\n", " ").replace("\\t", " ")))) # noqa 501\n+ if "" in ids_list:\n+ ids_list.remove("")\n \n return ids_list\n \n-#return input_file and list of unique ids from input file path\n-def get_input_ids_from_file(input,nb_col,header) :\n- with open(input, "r") as csv_file :\n- input_file= list(csv.reader(csv_file, delimiter=\'\\t\'))\n+# return input_file and list of unique ids from input file path\n+\n \n- input_file, ids_list = one_id_one_line(input_file,nb_col,header)\n- if "" in ids_list : ids_list.remove("")\n+def get_input_ids_from_file(input, nb_col, header):\n+ with open(input, "r") as csv_file:\n+ input_file = list(csv.reader(csv_file, delimiter=\'\\t\'))\n+\n+ input_file, ids_list = one_id_one_line(input_fil'..b' = args.features.split(",")\n+ header = False\n+ if args.input_type == "file":\n column_number = nb_col_to_int(args.column_number)\n header = str2bool(args.header)\n \n- #Get reference file (Human SRM Atlas)\n- with open(args.ref_file, "r") as csv_file :\n+ # Get reference file (Human SRM Atlas)\n+ with open(args.ref_file, "r") as csv_file:\n srm_atlas_csv = csv.reader(csv_file, delimiter=\'\\t\')\n srm_atlas_csv = [line for line in srm_atlas_csv]\n \n- #Create srm Atlas dictionary \n- srm_atlas = create_srm_atlas_dictionary(features,srm_atlas_csv)\n- \n- #Get file and/or ids from input \n- if args.input_type == "list" :\n+ # Create srm Atlas dictionary\n+ srm_atlas = create_srm_atlas_dictionary(features, srm_atlas_csv)\n+\n+ # Get file and/or ids from input\n+ if args.input_type == "list":\n ids = get_input_ids_from_string(args.input)\n- elif args.input_type == "file" :\n- input_file, ids = get_input_ids_from_file(args.input,column_number,header)\n+ elif args.input_type == "file":\n+ input_file, ids = get_input_ids_from_file(args.input,\n+ column_number, header)\n \n- #Check Uniprot-AC\n+ # Check Uniprot-AC\n if not any([check_uniprot(id) for id in ids]):\n- print ("No Uniprot-AC found, please check your input")\n+ print("No Uniprot-AC found, please check your input")\n exit()\n \n- #retrieve features\n- result_dict = retrieve_srm_features(srm_atlas,ids)\n+ # retrieve features\n+ result_dict = retrieve_srm_features(srm_atlas, ids)\n \n- #write output\n- with open(args.output,"w") as output :\n- writer = csv.writer(output,delimiter="\\t")\n+ # write output\n+ with open(args.output, "w") as output:\n+ writer = csv.writer(output, delimiter="\\t")\n \n- #write header\n- if header : \n+ # write header\n+ if header:\n writer.writerow(input_file[0]+features)\n- input_file = input_file[1:] \n- elif args.input_type=="file":\n- col_names = [create_header(input_file,column_number,features)]\n+ input_file = input_file[1:]\n+ elif args.input_type == "file":\n+ col_names = [create_header(input_file, column_number, features)]\n writer.writerow(col_names)\n- else : \n+ else:\n writer.writerow(["Uniprot-AC"]+features)\n \n- #write lines \n- previous_line=""\n- if args.input_type=="file" :\n- for line in input_file :\n+ # write lines\n+ previous_line = ""\n+ if args.input_type == "file":\n+ for line in input_file:\n for res in result_dict[line[column_number]]:\n- output_line = ["NA" if cell=="" or cell==" " or cell=="NaN" else cell for cell in line+res]\n- if previous_line != output_line :\n+ output_line = ["NA" if cell == "" or cell == " " or cell == "NaN" else cell for cell in line+res] # noqa 501\n+ if previous_line != output_line:\n writer.writerow(output_line)\n- previous_line=output_line\n- elif args.input_type=="list" :\n- for id in ids :\n+ previous_line = output_line\n+ elif args.input_type == "list":\n+ for id in ids:\n for res in result_dict[id]:\n- line = ["NA" if cell=="" or cell==" " or cell=="NaN" else cell for cell in [id]+res]\n- if previous_line != line :\n+ line = ["NA" if cell == "" or cell == " " or cell == "NaN" else cell for cell in [id]+res] # noqa 501\n+ if previous_line != line:\n writer.writerow(line)\n- previous_line=line\n- \n+ previous_line = line\n+\n \n if __name__ == "__main__":\n- main()\n\\ No newline at end of file\n+ main()\n' |
b |
diff -r b72ece649392 -r b526dba9dc40 proteore_get_unique_peptide_SRM-MRM_method.xml --- a/proteore_get_unique_peptide_SRM-MRM_method.xml Thu Jan 30 09:02:31 2020 -0500 +++ b/proteore_get_unique_peptide_SRM-MRM_method.xml Mon May 10 13:56:03 2021 +0000 |
[ |
@@ -1,4 +1,4 @@ -<tool id="proteore_get_unique_peptide_srm_method" name="Get unique peptide SRM-MRM method" version="2020.01.30"> +<tool id="proteore_get_unique_peptide_srm_method" name="Get unique peptide SRM-MRM method" version="2021.04.20"> <description>[Human SRM Atlas]</description> <requirements> </requirements> |