Mercurial > repos > dereeper > pangenome_explorer
comparison Snakemake_files/Snakefile_wget_panaroo_heatmap_upset_COG @ 3:e42d30da7a74 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 30 May 2024 11:52:25 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:97e4e3e818b6 | 3:e42d30da7a74 |
---|---|
1 import glob | |
2 import os | |
3 import shutil | |
4 | |
5 with open("genbank_ids") as f: | |
6 SAMPLES = f.read().splitlines() | |
7 | |
8 with open("genbank_files") as f: | |
9 for line in f.readlines(): | |
10 cmd = "grep 'ACCESSION' "+line | |
11 returned_value = subprocess.getoutput(cmd) | |
12 words = returned_value.split() | |
13 SAMPLES.append(words[1]) | |
14 | |
15 rule final: | |
16 input: | |
17 "pav_matrix.csv", | |
18 "GCskew.txt", | |
19 "pav_matrix.tsv", | |
20 "heatmap.svg", | |
21 "cog_output.txt" | |
22 | |
23 rule wget: | |
24 input: | |
25 "genbank_ids" | |
26 output: | |
27 expand("data/{sample}.fasta", sample=SAMPLES), | |
28 expand("data/{sample}.gb", sample=SAMPLES) | |
29 shell: | |
30 """ | |
31 perl wget.pl {input} data | |
32 """ | |
33 | |
34 rule gcskew: | |
35 input: | |
36 "data/{sample}.fasta" | |
37 output: | |
38 "data/{sample}.fasta.gcskew.txt" | |
39 shell: | |
40 """ | |
41 python3 SkewIT/src/gcskew.py -i {input} -o {input}.gcskew.txt -k 1000 -w 1000 | |
42 """ | |
43 | |
44 rule concat_gcskew: | |
45 input: | |
46 expand("data/{sample}.fasta.gcskew.txt", sample=SAMPLES) | |
47 output: | |
48 out2="GCskew.txt" | |
49 shell: | |
50 """ | |
51 cat {input} >>{output.out2} | |
52 """ | |
53 | |
54 rule genbank2gff3: | |
55 input: | |
56 "data/{sample}.gb" | |
57 output: | |
58 gff1="data/{sample}.gb.gff", | |
59 gff2="data/{sample}.gb.rmdup.gff" | |
60 shell: | |
61 """ | |
62 perl bp_genbank2gff3.pl -o data {input} | |
63 perl remove_duplicates_in_gff.pl {output.gff1} {output.gff2} | |
64 """ | |
65 | |
66 rule panaroo: | |
67 input: | |
68 expand("data/{sample}.gb.rmdup.gff", sample=SAMPLES) | |
69 output: | |
70 pav="pav_matrix.csv" | |
71 shell: | |
72 """ | |
73 mkdir panaroo_outdir | |
74 panaroo --clean-mode strict -o panaroo_outdir -i data/*gb.rmdup.gff | |
75 cp -rf panaroo_outdir/gene_presence_absence_roary.csv {output.pav} | |
76 """ | |
77 | |
78 rule convert_matrix: | |
79 input: | |
80 pav="pav_matrix.csv" | |
81 output: | |
82 "pav_matrix.tsv" | |
83 shell: | |
84 """ | |
85 perl ConvertPanarooMatrix.pl data pav_matrix.csv pav_matrix.tsv data/strains.txt | |
86 """ | |
87 | |
88 rule heatmap_upset: | |
89 input: | |
90 pav="pav_matrix.tsv" | |
91 output: | |
92 "heatmap.svg" | |
93 shell: | |
94 """ | |
95 perl GenerateHeatmapFromPAV.pl pav_matrix.tsv heatmap.svg | |
96 """ | |
97 | |
98 rule cog: | |
99 input: | |
100 pav="pav_matrix.tsv" | |
101 output: | |
102 "cog_output.txt" | |
103 shell: | |
104 """ | |
105 perl GetCogOfCluster.pl pav_matrix.tsv data {output} | |
106 """ |