Mercurial > repos > mvdbeek > r_goseq_1_22_0
comparison get_length_and_gc_content.r @ 7:15ce6435ab83 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
author | mvdbeek |
---|---|
date | Thu, 25 Feb 2016 06:09:34 -0500 |
parents | d4b5942ed347 |
children | 7f8d888e3355 |
comparison
equal
deleted
inserted
replaced
6:d4b5942ed347 | 7:15ce6435ab83 |
---|---|
19 FASTAfile = args$fasta | 19 FASTAfile = args$fasta |
20 output = args$output | 20 output = args$output |
21 | 21 |
22 #Load the annotation and reduce it | 22 #Load the annotation and reduce it |
23 GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon") | 23 GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon") |
24 grl <- reduce(split(GTF, elementMetadata(GTF)$gene_name)) | 24 grl <- reduce(split(GTF, elementMetadata(GTF)$gene_id)) |
25 reducedGTF <- unlist(grl, use.names=T) | 25 reducedGTF <- unlist(grl, use.names=T) |
26 elementMetadata(reducedGTF)$gene_name <- rep(names(grl), elementLengths(grl)) | 26 elementMetadata(reducedGTF)$gene_id <- rep(names(grl), elementLengths(grl)) |
27 | 27 |
28 #Open the fasta file | 28 #Open the fasta file |
29 FASTA <- FaFile(FASTAfile) | 29 FASTA <- FaFile(FASTAfile) |
30 open(FASTA) | 30 open(FASTA) |
31 | 31 |
37 calc_GC_length <- function(x) { | 37 calc_GC_length <- function(x) { |
38 nGCs = sum(elementMetadata(x)$nGCs) | 38 nGCs = sum(elementMetadata(x)$nGCs) |
39 width = sum(elementMetadata(x)$widths) | 39 width = sum(elementMetadata(x)$widths) |
40 c(width, nGCs/width) | 40 c(width, nGCs/width) |
41 } | 41 } |
42 output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_name), calc_GC_length)) | 42 output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_id), calc_GC_length)) |
43 colnames(output) <- c("Length", "GC") | 43 colnames(output) <- c("Length", "GC") |
44 | 44 |
45 write.table(output, file="GC_lengths.tsv", sep="\t") | 45 write.table(output, file="GC_lengths.tsv", sep="\t") |