# HG changeset patch # User Nick Stoler # Date 1393822061 18000 # Node ID 534516105ca967a4f107217d8013c240ea722e3a # Parent 2d99feb4a8e389359e361c8cdc01aa1a0ff3908e visualize.py: The second tool, for a more aesthetic, abstract visualization. diff -r 2d99feb4a8e3 -r 534516105ca9 visualize.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/visualize.py Sun Mar 02 23:47:41 2014 -0500 @@ -0,0 +1,120 @@ +#!/usr/bin/python +import sys +import random +import hashlib +import Image +import ImageDraw + +def main(): + + if len(sys.argv) == 1: + print """USAGE: + $ ./visualize.py genome.fa outfile.png pxsize +The last two arguments are optional, but must be in those positions. +If no outfile name is given, it will attempt to display the image directly. +If no pxsize is given, the default is 512. A power of 2 is highly +recommended as the resulting image will be much better laid out.""" + sys.exit(0) + + DEFAULT_SIZE = (512,512) + size = DEFAULT_SIZE + if len(sys.argv) > 3: + user_size = int(sys.argv[3]) + size = (user_size,user_size) + + # can skip hashing and specify the seed as an option (for testing) + if len(sys.argv) > 4: + seed = sys.argv[4] + else: + seed = get_hash(sys.argv[1]) + print "seed: "+seed + random.seed(seed) + level = 1 + layers = [] + image = draw_layer(size, level) + while 1: + level+=1 + layers.append(draw_layer(size, level)) + if layers[-1] == False: + break + # opacity = 256/2**level # opacity #1 + opacity = 256/2**(level-1) # opacity #2 + # opacity = 256*(level/2)/2**(level-1) # opacity #3 + # opacity = 256*(level/float(2**level)) # opacity #4 + print "opacity: "+str(int(opacity/2.56))+"%" + mask = Image.new("L", size, color=opacity) + image.paste(layers[-1], (0,0), mask) + + if len(sys.argv) > 2: + image.save(sys.argv[2], 'PNG') + else: + image.show() + +def draw_layer(image_size, level): + """Draw every block for a particular layer + (blocks of a particular pixel size). + Returns an image of the finished layer.""" + (image_width,image_height) = image_size + (width, height) = block_size(image_size, level) + if width < 1 or height < 1: + return False + print "width, height: "+str(width)+", "+str(height) + layer = Image.new("RGB", image_size, (0,0,0)) + draw = ImageDraw.Draw(layer) + (x,y) = (0,0) + while y < image_height: + while x < image_width: + draw.rectangle([(x,y), (x+width-1, y+height-1)], fill=randcolor()) + x += width + y += height + x = 0 + return layer + +def get_hash(filepath): + """Compute hash of the file""" + hashed = hashlib.sha256() + with open(filepath, 'rb') as filehandle: + for chunk in iter(lambda: filehandle.read(65536), b''): + hashed.update(chunk) + return hashed.hexdigest() + +def randcolor(): + """Return a tuple of random color values""" + color = [] + for i in range(3): + color.append(random.randrange(0,255)) + return tuple(color) + +def block_size(image_size, level): + """Compute the block width and height for this layer.""" + (width,height) = image_size + while level > 0: + width = width/2 + height = height/4 + level-=1 + if level < 1: + break + width = width/4 + height = height/2 + level-=1 + return (width,height) + +def format_genome(): + """Eventually I'd like to attempt to process the genome file into a standard + format, stripping out details of representation that can change the output, + such as: + - upper/lowercase + - line endings + - chromosome naming + - chromosome order + - noncanonical chromosomes""" + +def check_genome(): + """Eventually I'd like this to check assumptions I might make about the + genome file. A first check would be that it contains all the chromosomes, in + the format ''>chr1' or ''>1'. If these assumptions (which format_genome() + depends on) aren't met, and the format isn't recognized, then default to a + straight digest of the unmodified file.""" + +if __name__ == "__main__": + main() \ No newline at end of file diff -r 2d99feb4a8e3 -r 534516105ca9 visualize.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/visualize.xml Sun Mar 02 23:47:41 2014 -0500 @@ -0,0 +1,24 @@ + + Create a graphic from a DNA sequence + visualize.py $input $output $size + + + + + + + + + + + +.. class:: infomark + +**What it does** + +This tool creates a graphic representing a DNA sequence. +The graphic is derived from the sequence, but is not a literal representation. It is intended to be an aesthetic visual based on the sequence. Even a small change in the input sequence will result in a drastically different image. See [here](http://nstoler.com/misc/yourgenome.html) for a full description and rationale. + + + +