comparison cpt_phageqc_annotation/phageqc_report_genomea.tex @ 0:c3140b08d703 draft default tip

Uploaded
author cpt
date Fri, 17 Jun 2022 13:00:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c3140b08d703
1 \documentclass[]{article}
2 \usepackage{lmodern}
3 \usepackage{amssymb,amsmath}
4 \usepackage{ifxetex,ifluatex}
5 \usepackage{fixltx2e} % provides \textsubscript
6 \usepackage[T1]{fontenc}
7 \usepackage[utf8]{inputenc}
8
9 \addtolength{\oddsidemargin}{-.875in}
10 \addtolength{\evensidemargin}{-.875in}
11 \addtolength{\textwidth}{1.75in}
12
13 \addtolength{\topmargin}{-.875in}
14 \addtolength{\textheight}{1.75in}
15
16 \usepackage{fancyhdr}
17 \pagestyle{fancy}
18 \lhead{GenomeA Compliance Report}
19 \chead{}
20 \rhead{ {{record_name | texify}} }
21 \lfoot{}
22 \cfoot{\thepage}
23 \rfoot{}
24
25
26
27 \usepackage{microtype}
28 \usepackage{hyperref}
29 \hypersetup{unicode=true,
30 pdfborder={0 0 0},
31 breaklinks=true}
32 \urlstyle{same} % don't use monospace font for urls
33 \usepackage{longtable,booktabs}
34 \date{Compiled \today}
35 \title{GenomeA Compliance Report for {{record_nice_name | texify}}}
36
37 \begin{document}
38 %\pagestyle{plain}
39 \maketitle
40 This report details possible issues with your submitted genome annotations.
41
42 \section{Required Changes}
43
44 The changes detailed in this section are required for acceptance of your
45 submission.
46
47 \subsection{Missing Gene Features}
48
49 These coding sequences (``CDS'' in your GenBank file) are missing the
50 associated gene feature (``gene''). This is required for validation by NCBI's
51 rules which are encoded in the sequin and tbl2asn programs.
52 {%if missing_genes_bad > 0 %}
53
54 {{ missing_genes_bad }} out of {{ missing_genes_good + missing_genes_bad
55 }} features are lacking their associated gene feature.
56
57 \begin{longtable}{ll}
58 \hline
59 Feature ID & Location\\
60 \hline
61 \endhead
62 {% for row in missing_genes %}
63 {{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}}\tabularnewline
64 {% endfor %}
65 \end{longtable}
66 {% else %}
67 You are not missing any gene features
68 {% endif %}
69
70 \subsection{Missing Product Tags}\label{missing-product-tags}
71
72 {{missing_tags_good}} out of {{missing_tags_good + missing_tags_bad}} features have product tags (\texttt{/product="..."}).
73 {% if missing_tags_bad > 0 %}
74 The following features are missing product tags
75 \begin{longtable}{ll}
76 \hline
77 Feature & Location\\
78 \hline
79 \endhead
80 {% for row in missing_tags %}
81 {{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}}\tabularnewline
82 {% endfor %}
83 \end{longtable}
84 {% endif %}
85
86 \subsection{Missing Locus Tags}\label{missing-locus-tags}
87
88 {{gene_model_correction_good}} out of {{gene_model_correction_good + gene_model_correction_bad}} features have valid locus tags (\texttt{/locus\_tag="..."}).
89 {% if gene_model_correction_bad > 0 %}
90 The following features have issues with their locus tags
91 \begin{longtable}{lllll}
92 \hline
93 ID & Location & Gene Locus Tag & CDS Locus Tag & Issue \\
94 \hline
95 \endhead
96 {% for row in gene_model_correction %}
97 {{ row[0].id | texify }} & \texttt{{'{'}}{{row[1].location}}{{'}'}} & {{ row[0].qualifiers['locus_tag'][0] | texify }} & {{ row[1].qualifiers['locus_tag'][0] | texify }} & {{ row[2] | texify }}\tabularnewline
98 {% endfor %}
99 \end{longtable}
100 {% endif %}
101
102
103 \section{Suggested Changes}\label{suggested-changes}
104
105 These changes are not required, but are strongly encouraged in order to
106 provide a uniform genome annotation within the phage community.
107
108 \subsection{Start Codons}\label{start-codons}
109 Nearly all phage genes use ATG, GTG or TTG as start codons. The start codon distribution is as
110 follows:
111
112
113 \begin{longtable}{lll}
114 \hline
115 Start Codon & Count\\
116 \hline
117 \endhead
118 {% for codon_key in weird_starts_overall_sorted_keys %}
119 {{ codon_key }} & {{ weird_starts_overall[codon_key] }} \\
120 {% endfor %}
121 \end{longtable}
122
123 {% if weird_starts_bad != 0 %}
124 There are {{weird_starts_bad }} unusual start codons in the genome, these
125 should be carefully justified. If there is evidence for these starts, the
126 GenomeA text should note this.
127
128 \begin{longtable}{lll}
129 \hline
130 Feature ID & Location & Start Codon\\
131 \hline
132 \endhead
133 {% for row in weird_starts %}
134 {{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}} & {{row.__start}} \\
135 {% endfor %}
136 \end{longtable}
137
138 {% endif %}
139
140 \subsection{Unannotated RBSs}\label{unannotated-rbss}
141
142 The following CDSs either do not have a detectable ribosome binding site (RBS;
143 Shine-Dalgarno sequence), in which case there is a strong possibility that
144 this is not the correct start, or there is one but it is not annotated.
145 Annotating the RBS as part of the gene feature is the best practice.
146
147 \begin{longtable}{lllll}
148 \hline
149 ID & Location & Error & Upstream (-{{upstream_max}} .. -{{upstream_min}})\\
150 \hline
151 \endhead
152 {% for row in missing_rbs %}
153 {% if 'Unannotated' not in row.__message%}
154 {{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}} & {{row.__message | texify}} & \texttt{{'{'}}{{row.__upstream}}{{'}'}} \\
155 {% endif %}
156 {% endfor %}
157 {% for row in missing_rbs %}
158 {% if 'Unannotated' in row.__message%}
159 {{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}} & {{row.__message | texify}} & \texttt{{'{'}}{{row.__upstream}}{{'}'}} \\
160 {% endif %}
161 {% endfor %}
162 \end{longtable}
163
164 \section{Areas for Further Examination}\label{notes}
165
166 These areas may be indicative of a problem, or may simply be
167 informational. You should examine the areas mentioned in detail to ensure
168 that the annotations are valid and that no genes are missed.
169
170
171
172
173
174 \subsection{Unusual Gaps}\label{excessive-gaps}
175
176 {% if excessive_gap | length == 0 %}
177 No gaps over {{ params['excessive_gap_dist'] }} nt (for genes on the same
178 strand) or {{ params['excessive_gap_divergent_dist'] }} (for genes on
179 opposite strands) were found.
180 {% else %}
181 Gaps over {{ params['excessive_gap_dist'] }} nt (for genes on the same
182 strand) or {{ params['excessive_gap_divergent_dist'] }} (for genes on
183 opposite strands) were found.
184
185 \begin{longtable}{llll}
186 \hline
187 Region & Size & Surroundings & Messages\\
188 \hline
189 \endhead
190 {% for row in excessive_gap %}
191 \texttt{{'{'}}{{row[0]}}..{{row[1]}}{{'}'}} & {{row[1] - row[0]}} & {{row[2] | nice_strand_tex}} {{row[3] | nice_strand_tex}} & {% if row[4] != 0 %}{{row[4]}} ORFs found in this region{% endif %} \\
192
193 {% endfor %}
194 \end{longtable}
195 {% endif %}
196
197
198
199
200 \subsection{Unusual Overlaps}\label{excessive-overlaps}
201
202 {% if excessive_overlap | length == 0 %}
203 No overlaps over {{ params['excessive_overlap_dist'] }} nt (for genes on the same
204 strand) or {{ params['excessive_overlap_divergent_dist'] }} (for genes on
205 opposite strands) were found.
206 {% else %}
207 Overlaps over {{ params['excessive_overlap_dist'] }} nt (for genes on the same
208 strand) or {{ params['excessive_overlap_divergent_dist'] }} (for genes on
209 opposite strands) were found.
210 \begin{longtable}{llllll}
211 \hline
212 \multicolumn{2}{l}{Feature A} & \multicolumn{2}{l}{Feature B} & & \\
213 ID & Location & ID & Location & Region & Length\\
214 \hline
215 \endhead
216 {% for row in excessive_overlap %}
217 {{row[0].id | texify}} & \texttt{{'{'}}{{row[0].location}}{{'}'}} & {{row[1].id | texify}} & \texttt{{'{'}}{{row[1].location}}{{'}'}} & {{row[2]}}..{{row[3]}} & {{row[3] - row[2]}} \\
218 {% endfor %}
219 \end{longtable}
220 {% endif %}
221
222 \subsection{Coding Density}\label{coding-density}
223
224 You have a coding density of {{ coding_density_real }}\% which scores
225 {{ coding_density }} / 100 on our scale. Most genomes should be in the 90\% to 100\%
226 coding density range
227
228
229
230
231
232
233
234
235 \end{document}