comparison Snakemake_files/Snakefile_wget_panaroo_heatmap_upset_COG @ 3:e42d30da7a74 draft

Uploaded
author dereeper
date Thu, 30 May 2024 11:52:25 +0000
parents
children
comparison
equal deleted inserted replaced
2:97e4e3e818b6 3:e42d30da7a74
1 import glob
2 import os
3 import shutil
4
5 with open("genbank_ids") as f:
6 SAMPLES = f.read().splitlines()
7
8 with open("genbank_files") as f:
9 for line in f.readlines():
10 cmd = "grep 'ACCESSION' "+line
11 returned_value = subprocess.getoutput(cmd)
12 words = returned_value.split()
13 SAMPLES.append(words[1])
14
15 rule final:
16 input:
17 "pav_matrix.csv",
18 "GCskew.txt",
19 "pav_matrix.tsv",
20 "heatmap.svg",
21 "cog_output.txt"
22
23 rule wget:
24 input:
25 "genbank_ids"
26 output:
27 expand("data/{sample}.fasta", sample=SAMPLES),
28 expand("data/{sample}.gb", sample=SAMPLES)
29 shell:
30 """
31 perl wget.pl {input} data
32 """
33
34 rule gcskew:
35 input:
36 "data/{sample}.fasta"
37 output:
38 "data/{sample}.fasta.gcskew.txt"
39 shell:
40 """
41 python3 SkewIT/src/gcskew.py -i {input} -o {input}.gcskew.txt -k 1000 -w 1000
42 """
43
44 rule concat_gcskew:
45 input:
46 expand("data/{sample}.fasta.gcskew.txt", sample=SAMPLES)
47 output:
48 out2="GCskew.txt"
49 shell:
50 """
51 cat {input} >>{output.out2}
52 """
53
54 rule genbank2gff3:
55 input:
56 "data/{sample}.gb"
57 output:
58 gff1="data/{sample}.gb.gff",
59 gff2="data/{sample}.gb.rmdup.gff"
60 shell:
61 """
62 perl bp_genbank2gff3.pl -o data {input}
63 perl remove_duplicates_in_gff.pl {output.gff1} {output.gff2}
64 """
65
66 rule panaroo:
67 input:
68 expand("data/{sample}.gb.rmdup.gff", sample=SAMPLES)
69 output:
70 pav="pav_matrix.csv"
71 shell:
72 """
73 mkdir panaroo_outdir
74 panaroo --clean-mode strict -o panaroo_outdir -i data/*gb.rmdup.gff
75 cp -rf panaroo_outdir/gene_presence_absence_roary.csv {output.pav}
76 """
77
78 rule convert_matrix:
79 input:
80 pav="pav_matrix.csv"
81 output:
82 "pav_matrix.tsv"
83 shell:
84 """
85 perl ConvertPanarooMatrix.pl data pav_matrix.csv pav_matrix.tsv data/strains.txt
86 """
87
88 rule heatmap_upset:
89 input:
90 pav="pav_matrix.tsv"
91 output:
92 "heatmap.svg"
93 shell:
94 """
95 perl GenerateHeatmapFromPAV.pl pav_matrix.tsv heatmap.svg
96 """
97
98 rule cog:
99 input:
100 pav="pav_matrix.tsv"
101 output:
102 "cog_output.txt"
103 shell:
104 """
105 perl GetCogOfCluster.pl pav_matrix.tsv data {output}
106 """