Mercurial > repos > fubar > egapx_runner
comparison nf/subworkflows/ncbi/main.nf @ 0:d9c5c5b87fec draft
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
author | fubar |
---|---|
date | Sat, 03 Aug 2024 11:16:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d9c5c5b87fec |
---|---|
1 #!/usr/bin/env nextflow | |
2 // main nextflow script for EGAPx execution | |
3 // route data to subworkflows | |
4 | |
5 nextflow.enable.dsl=2 | |
6 | |
7 include { rnaseq_short_plane } from './rnaseq_short/main' | |
8 include { target_proteins_plane } from './target_proteins/main' | |
9 include { gnomon_plane; post_gnomon_plane } from './gnomon/main' | |
10 include { orthology_plane } from './orthology/main' | |
11 include { setup_genome; setup_proteins } from './setup/main' | |
12 include { annot_builder } from './default/annot_builder/main' | |
13 include { annotwriter } from './default/annotwriter/main' | |
14 | |
15 | |
16 params.intermediate = false | |
17 params.use_orthology = false | |
18 params.use_post_gnomon = false | |
19 | |
20 | |
21 workflow egapx { | |
22 take: | |
23 genome // path to genome | |
24 proteins // path to proteins, optional | |
25 | |
26 // Alternative groups of parameters, one of them should be set | |
27 // reads_query - SRA query in the form accepted by NCBI | |
28 // reads_ids - list of SRA IDs | |
29 // reads, reads_metadata - path to reads accompanied by metadata | |
30 reads_query // SRA query | |
31 reads_ids // list of SRA IDs | |
32 reads // path to reads | |
33 reads_metadata // path to reads metadata 13 tab-delimited fields, 1-st - SRA ID, 3-rd paired or unpaired, everything else - not used, but must be present | |
34 // 4, 5, 13 - numbers, 5 - non zero number | |
35 | |
36 organelles // path to organelle list | |
37 // Alternative parameters, one of them should be set | |
38 // tax_id - NCBI tax id of the closest taxon to the genome | |
39 // hmm_params - HMM parameters | |
40 tax_id // NCBI tax id of the closest taxon to the genome | |
41 hmm_params // HMM parameters | |
42 hmm_taxid // NCBI tax id of the HMM | |
43 // | |
44 softmask // softmask for GNOMON, optional | |
45 // | |
46 max_intron // max intron length | |
47 genome_size_threshold // the threshold for calculating actual max intron length | |
48 task_params // task parameters for every task | |
49 main: | |
50 print "workflow.container: ${workflow.container}" | |
51 | |
52 def setup_genome_params = task_params.get('setup', [:]) | |
53 setup_genome_params['max_intron'] = max_intron | |
54 setup_genome_params['genome_size_threshold'] = genome_size_threshold | |
55 def (scaffolds, gencoll_asn, unpacked_genome, genome_asn, genome_asnb, eff_max_intron) = setup_genome(genome, organelles, setup_genome_params) | |
56 | |
57 // Protein alignments | |
58 def protein_alignments = [] | |
59 def unpacked_proteins | |
60 def proteins_asn = [] | |
61 def proteins_asnb = [] | |
62 if (proteins) { | |
63 // miniprot plane | |
64 (unpacked_proteins, proteins_asn) = setup_proteins(proteins, task_params.get('setup', [:])) | |
65 target_proteins_plane(unpacked_genome, genome_asn, gencoll_asn, unpacked_proteins, proteins_asn, eff_max_intron, task_params) | |
66 protein_alignments = target_proteins_plane.out.protein_alignments | |
67 } | |
68 | |
69 // RNASeq short alignments | |
70 def rnaseq_alignments = [] | |
71 if (reads_query || reads_ids || reads) { | |
72 rnaseq_short_plane(genome_asn, scaffolds, unpacked_genome, reads_query, reads_ids, reads, reads_metadata, organelles, tax_id, eff_max_intron, task_params) | |
73 rnaseq_alignments = rnaseq_short_plane.out.rnaseq_alignments | |
74 } | |
75 | |
76 // Combine RNASeq and protein alignments | |
77 def alignments | |
78 if (proteins && (reads_query || reads_ids || reads)) [ | |
79 alignments = rnaseq_alignments.combine(protein_alignments) | |
80 ] else if (proteins) { | |
81 alignments = protein_alignments | |
82 } else { | |
83 alignments = rnaseq_alignments | |
84 } | |
85 | |
86 // GNOMON | |
87 | |
88 def gnomon_models = [] | |
89 def effective_hmm | |
90 gnomon_plane(genome_asn, scaffolds, gencoll_asn, proteins_asn, alignments, tax_id, hmm_params, hmm_taxid, softmask, eff_max_intron, task_params) | |
91 gnomon_models = gnomon_plane.out.gnomon_models | |
92 | |
93 | |
94 // outputs | |
95 annot_builder(gencoll_asn, gnomon_models, genome_asn, task_params.get('annot_builder', [:])) | |
96 def accept_annot_file = annot_builder.out.accept_ftable_annot | |
97 def annot_files = annot_builder.out.annot_files | |
98 | |
99 if (params.use_orthology) { | |
100 // ORTHOLOGY | |
101 orthology_plane(genome_asnb, gencoll_asn, gnomon_models, annot_files, task_params) | |
102 def orthologs = orthology_plane.out.orthologs | |
103 if (params.use_post_gnomon) { | |
104 //POST GNOMON | |
105 post_gnomon_plane(gnomon_models, gencoll_asn, orthologs, tax_id, task_params) | |
106 } | |
107 } | |
108 | |
109 annotwriter(accept_annot_file, [:]) | |
110 annotwriter.out.annoted_file | |
111 | |
112 emit: | |
113 out_files = annotwriter.out.annoted_file | |
114 annot_builder_output = annot_builder.out.outputs | |
115 // locus = post_gnomon_plane.out.locus | |
116 } |