changeset 2:908024e80b3d draft

Uploaded
author p.lucas
date Mon, 24 Jun 2024 14:31:34 +0000
parents 54d44429c585
children 67061ff36f1c
files VCF_create_major_and_minor_reference.py
diffstat 1 files changed, 21 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/VCF_create_major_and_minor_reference.py	Tue Jun 11 10:05:47 2024 +0000
+++ b/VCF_create_major_and_minor_reference.py	Mon Jun 24 14:31:34 2024 +0000
@@ -26,7 +26,7 @@
   for line in open(vcf_file):
     li = line.strip()
     if not li.startswith("#"):
-      # print(li)
+      print(li)
       name_seq = li.split("\t")[0]
       if name_seq == name_seq_ref:
         pos = str(li.split("\t")[1])
@@ -45,22 +45,25 @@
         if pos in final_dict.keys():
           old_majo = final_dict[pos][0]
           old_mino = final_dict[pos][1]
-          freq_majo = final_dict[pos][2]
-          freq_mino = final_dict[pos][3]
-          # Cas où la nouvelle fréquence alt est > à la freq majo déjà enregistrée
-          if freq_alt >= freq_majo:
-            iupac_to_test = "".join(sorted(old_majo + old_mino))
-            if iupac_to_test not in iupac.keys():
-              convert_iupac = list(iupac.keys())[list(iupac.values()).index(old_mino)]
-              iupac_to_test = "".join(sorted(old_majo + convert_iupac))
-            final_dict[pos] = [alt, iupac[iupac_to_test], freq_alt, freq_majo + freq_mino, name_seq]
+          old_freq_majo = final_dict[pos][2]
+          old_freq_mino = final_dict[pos][3]
+          if freq_alt > freq_ref:
+            majo = alt
+            freq_majo = freq_alt 
+            mino = ref
+            freq_mino = freq_ref
           else:
-            iupac_to_test = "".join(sorted(alt + old_mino))
-            if iupac_to_test not in iupac.keys():
-              convert_iupac = list(iupac.keys())[list(iupac.values()).index(old_mino)]
-              iupac_to_test = "".join(sorted(alt + convert_iupac))
-            freq_add = freq_alt + freq_mino
-            final_dict[pos] = [old_majo, iupac[iupac_to_test], freq_majo, freq_add, name_seq]
+            majo = ref
+            freq_majo = freq_ref
+            mino = alt
+            freq_mino = freq_alt
+          if freq_majo > old_freq_majo:
+              if freq_mino < old_freq_majo:
+                final_dict[pos] = [majo, old_majo, freq_majo, old_freq_majo, name_seq]
+              else:
+                final_dict[pos] = [majo, mino, freq_majo, freq_mino, name_seq]
+          elif freq_majo > old_freq_mino:
+            final_dict[pos] = [old_majo, majo, old_freq_majo, freq_majo, name_seq]
         # Nouvelle position
         else:
           # definition colonne:
@@ -116,14 +119,8 @@
       # print(mut_dict)
       for mut in mut_dict:
         # print(mut_dict[mut])
-        if bool(re.search('[A-Z]', mut_dict[mut][0])) is False:
-          major_seq[int(mut)-1] = 'N'
-        else:
-          major_seq[int(mut)-1] = mut_dict[mut][0]
-        if bool(re.search('[A-Z]', mut_dict[mut][1])) is False:
-          minor_seq[int(mut)-1] = 'N'
-        else:
-          minor_seq[int(mut)-1] = mut_dict[mut][1]
+        major_seq[int(mut)-1] = mut_dict[mut][0]
+        minor_seq[int(mut)-1] = mut_dict[mut][1]
       major_ref.append(SeqRecord(major_seq, id=f"{ide}_MAJOR", description=""))
       minor_ref.append(SeqRecord(minor_seq, id=f"{ide}_MINOR", description=""))