# HG changeset patch # User triasteran # Date 1655805049 0 # Node ID be394fb4725069b085f58c3c537e4034a6362bfe # Parent 1ce4b52212c4b8ff8a36516a5eea6229ea3b74f0 Uploaded diff -r 1ce4b52212c4 -r be394fb47250 UMI_riboseq_processing/UMI.py --- a/UMI_riboseq_processing/UMI.py Tue Jun 21 09:33:27 2022 +0000 +++ b/UMI_riboseq_processing/UMI.py Tue Jun 21 09:50:49 2022 +0000 @@ -27,18 +27,21 @@ seq = lines[1] sep = lines[2] qual = lines[3] - trimmed_seq = seq[2:-6]+"\n" # fooprint + barcode - UMI = seq[0:2]+seq.rstrip()[-5:] #7nt in total; 5'NN and last 3'NNNNN - split_header = header.split(" ") - new_header = split_header[0]+"_"+UMI+" "+split_header[1] - if qual[-1:] == "\n": - new_qual = qual[2:-6]+"\n" - else: - new_qual = qual[2:-6] - output.write(new_header) - output.write(trimmed_seq) - output.write(sep) - output.write(new_qual) + # check if header is OK + if (header.startswith('@')): + trimmed_seq = seq[2:-6]+"\n" # fooprint + barcode + UMI = seq[0:2]+seq.rstrip()[-5:] #7nt in total; 5'NN and last 3'NNNNN + split_header = header.split(" ") + print (split_header) + new_header = split_header[0]+"_"+UMI+" "+split_header[1] + if qual[-1:] == "\n": + new_qual = qual[2:-6]+"\n" + else: + new_qual = qual[2:-6] + output.write(new_header) + output.write(trimmed_seq) + output.write(sep) + output.write(new_qual) output.close() diff -r 1ce4b52212c4 -r be394fb47250 UMI_riboseq_processing/UMI_riboseq.xml --- a/UMI_riboseq_processing/UMI_riboseq.xml Tue Jun 21 09:33:27 2022 +0000 +++ b/UMI_riboseq_processing/UMI_riboseq.xml Tue Jun 21 09:50:49 2022 +0000 @@ -1,4 +1,4 @@ - +