comparison Snakemake_files/Snakefile_orthofinder_heatmap_upset @ 3:e42d30da7a74 draft

Uploaded
author dereeper
date Thu, 30 May 2024 11:52:25 +0000
parents
children
comparison
equal deleted inserted replaced
2:97e4e3e818b6 3:e42d30da7a74
1 import glob
2 import os
3 import shutil
4
5 import yaml
6 configfile: "config.yaml"
7
8 SAMPLES = []
9 with open("config.yaml", "r") as yaml_file:
10 genome_data = yaml.safe_load(yaml_file)
11 for genome_name, genome_info in genome_data["input_genomes"].items():
12 name = genome_info["name"]
13 SAMPLES.append(name)
14
15
16
17 rule final:
18 input:
19 expand("outputs/genomes/{sample}.fasta", sample=SAMPLES),
20 expand("outputs/genomes/{sample}.gff", sample=SAMPLES),
21 expand("outputs/genomes/{sample}.pep", sample=SAMPLES),
22 "outputs/pav_matrix.tsv",
23 "outputs/heatmap.svg.gz",
24 "outputs/rarefaction_curves.txt"
25
26 rule get_data:
27 input:
28 "config.yaml"
29 output:
30 "outputs/genomes/strains.txt",
31 expand("outputs/genomes/{sample}.fasta", sample=SAMPLES),
32 expand("outputs/genomes/{sample}.gff", sample=SAMPLES),
33 run:
34 with open(input[0], "r") as yaml_file, open(output[0], "w") as output_file:
35 genome_data = yaml.safe_load(yaml_file)
36 for genome_name, genome_info in genome_data["input_genomes"].items():
37 fasta_file = genome_info["fasta"]
38 name = genome_info["name"]
39 gff_file = genome_info["gff3"]
40 shutil.copy(gff_file, "outputs/genomes/"+str(name)+".gff")
41 shutil.copy(fasta_file, "outputs/genomes/"+str(name)+".fasta")
42 output_file.write(f"{name}\t{name}\n")
43
44 rule gffread:
45 input:
46 fasta = "outputs/genomes/{sample}.fasta",
47 gff = "outputs/genomes/{sample}.gff"
48 output:
49 "outputs/genomes/{sample}.pep"
50 shell:
51 """
52 gffread -y {output} -g {input.fasta} {input.gff}
53 """
54
55 rule orthofinder:
56 input:
57 expand("outputs/genomes/{sample}.pep", sample=SAMPLES)
58 params:
59 identity=80
60 output:
61 pav="outputs/Orthogroups.tsv"
62 shell:
63 """
64 mkdir -p outputs/proteomes
65 sed -i '/^>/! s/\./N/g' outputs/genomes/*.pep
66 cp -rf outputs/genomes/*.pep outputs/proteomes
67 orthofinder -f outputs/proteomes
68 mv outputs/proteomes/OrthoFinder/R*/Orthogroups/Orthogroups.tsv outputs/Orthogroups.tsv
69 """
70
71 rule convert_matrix:
72 input:
73 pav="outputs/Orthogroups.tsv"
74 output:
75 "outputs/pav_matrix.tsv"
76 shell:
77 """
78 perl $PANEX_PATH/Perl/ConvertOrthofinderMatrix.pl outputs/genomes {input} {output} outputs/genomes/strains.txt
79 """
80
81 rule heatmap_upset:
82 input:
83 pav="outputs/pav_matrix.tsv"
84 output:
85 heatmap="outputs/heatmap.svg.gz",
86 html="outputs/heatmap.svg.heatmap_plotly.html",
87 upsetr="outputs/upsetr.svg",
88 binpav="outputs/heatmap.svg.pangenes_01matrix.txt"
89 shell:
90 """
91 perl $PANEX_PATH/Perl/GenerateHeatmapFromPAV.pl {input.pav} outputs/heatmap.svg
92 mv outputs/heatmap.svg.upsetr.svg {output.upsetr}
93 """
94
95 rule micropan:
96 input:
97 binpav="outputs/heatmap.svg.pangenes_01matrix.txt"
98 output:
99 txt="outputs/rarefaction_curves.txt",
100 pdf="outputs/rarefaction_curves.pdf",
101 svg="outputs/rarefaction_curves.svg",
102 heaps="outputs/heaps.tsv"
103 shell:
104 """
105 Rscript $PANEX_PATH/R/micropan_rarefaction.R -f {input.binpav} -p {output.pdf} -a {output.heaps} -o {output.txt}
106 pdf2svg {output.pdf} {output.svg}
107 """
108