annotate Snakemake_files/Snakefile_wget_panaroo_heatmap_upset_COG @ 11:ae74fc0cb39c draft

Uploaded
author dereeper
date Thu, 30 May 2024 16:41:08 +0000
parents e42d30da7a74
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
1 import glob
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
2 import os
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
3 import shutil
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
4
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
5 with open("genbank_ids") as f:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
6 SAMPLES = f.read().splitlines()
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
7
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
8 with open("genbank_files") as f:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
9 for line in f.readlines():
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
10 cmd = "grep 'ACCESSION' "+line
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
11 returned_value = subprocess.getoutput(cmd)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
12 words = returned_value.split()
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
13 SAMPLES.append(words[1])
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
14
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
15 rule final:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
16 input:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
17 "pav_matrix.csv",
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
18 "GCskew.txt",
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
19 "pav_matrix.tsv",
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
20 "heatmap.svg",
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
21 "cog_output.txt"
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
22
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
23 rule wget:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
24 input:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
25 "genbank_ids"
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
26 output:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
27 expand("data/{sample}.fasta", sample=SAMPLES),
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
28 expand("data/{sample}.gb", sample=SAMPLES)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
29 shell:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
30 """
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
31 perl wget.pl {input} data
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
32 """
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
33
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
34 rule gcskew:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
35 input:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
36 "data/{sample}.fasta"
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
37 output:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
38 "data/{sample}.fasta.gcskew.txt"
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
39 shell:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
40 """
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
41 python3 SkewIT/src/gcskew.py -i {input} -o {input}.gcskew.txt -k 1000 -w 1000
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
42 """
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
43
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
44 rule concat_gcskew:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
45 input:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
46 expand("data/{sample}.fasta.gcskew.txt", sample=SAMPLES)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
47 output:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
48 out2="GCskew.txt"
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
49 shell:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
50 """
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
51 cat {input} >>{output.out2}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
52 """
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
53
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
54 rule genbank2gff3:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
55 input:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
56 "data/{sample}.gb"
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
57 output:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
58 gff1="data/{sample}.gb.gff",
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
59 gff2="data/{sample}.gb.rmdup.gff"
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
60 shell:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
61 """
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
62 perl bp_genbank2gff3.pl -o data {input}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
63 perl remove_duplicates_in_gff.pl {output.gff1} {output.gff2}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
64 """
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
65
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
66 rule panaroo:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
67 input:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
68 expand("data/{sample}.gb.rmdup.gff", sample=SAMPLES)
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
69 output:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
70 pav="pav_matrix.csv"
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
71 shell:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
72 """
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
73 mkdir panaroo_outdir
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
74 panaroo --clean-mode strict -o panaroo_outdir -i data/*gb.rmdup.gff
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
75 cp -rf panaroo_outdir/gene_presence_absence_roary.csv {output.pav}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
76 """
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
77
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
78 rule convert_matrix:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
79 input:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
80 pav="pav_matrix.csv"
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
81 output:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
82 "pav_matrix.tsv"
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
83 shell:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
84 """
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
85 perl ConvertPanarooMatrix.pl data pav_matrix.csv pav_matrix.tsv data/strains.txt
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
86 """
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
87
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
88 rule heatmap_upset:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
89 input:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
90 pav="pav_matrix.tsv"
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
91 output:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
92 "heatmap.svg"
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
93 shell:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
94 """
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
95 perl GenerateHeatmapFromPAV.pl pav_matrix.tsv heatmap.svg
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
96 """
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
97
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
98 rule cog:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
99 input:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
100 pav="pav_matrix.tsv"
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
101 output:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
102 "cog_output.txt"
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
103 shell:
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
104 """
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
105 perl GetCogOfCluster.pl pav_matrix.tsv data {output}
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
106 """