view extract_archive_and_merge.py @ 0:78d968479d52 draft default tip

Imported from capsule None
author cmonjeau
date Tue, 07 Jul 2015 08:49:40 -0400
parents
children
line wrap: on
line source

#!/usr/bin/env python
"""


Created by Cyril MONJEAUD
Cyril.Monjeaud@irisa.fr
Last modification: 11/19/2014

And with the help of Anthony Bretaudeau for some stuff with bz2.

"""

import argparse, os, sys, subprocess, tempfile, shutil, gzip, zipfile, tarfile, gzip, bz2, shutil
import glob 
from galaxy import eggs
from galaxy import util
from galaxy.datatypes.checkers import *

def stop_err( msg ):
    sys.stderr.write( '%s\n' % msg )
    sys.exit()

def main(archive, archivename, logfile, logid, workdir, merge, rm_header=0, concat=''):

    # create a temporary repository
    #tmp_dir = tempfile.mkdtemp(dir=os.getcwd())
    os.mkdir("decompress_files")    

    #open log file
    mylog = open(logfile, "w");

    is_gzipped, is_gzvalid = check_gzip( archive )
    is_bzipped, is_bzvalid = check_bz2( archive )

    # extract all files in a temp directory
    # test if is a zip file
    if check_zip( archive ):
	with zipfile.ZipFile(archive, 'r') as myarchive:
		myarchive.extractall("decompress_files")

    # test if is a tar file
    elif tarfile.is_tarfile( archive ):
	mytarfile=tarfile.TarFile.open(archive)
	mytarfile.extractall("decompress_files")
	mytarfile.close()
 
    # test if is a gzip file
    elif is_gzipped and is_gzvalid :
	mygzfile = gzip.open(archive, 'rb')

	myungzippedfile = open ("decompress_files/"+os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20)
	for i in iter(lambda: mygzfile.read(2**20), ''):
		myungzippedfile.write(i) 

	myungzippedfile.close()
	mygzfile.close()

    elif is_bzipped and is_bzvalid:
        mybzfile = bz2.BZ2File(archive, 'rb')

        myunbzippedfile = open ("decompress_files/"+os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20)
        for i in iter(lambda: mybzfile.read(2**20), ''):
                myunbzippedfile.write(i)

        myunbzippedfile.close()
        mybzfile.close()

		
    # test if merge is enable
    if merge == "true":
	mylog.write("Merge option is enabled with "+str(rm_header)+" lines to deleted\n\n")
	myfinalfile = open(concat, "w");
	for myfile in listdirectory("decompress_files"):
		myopenfile = open(myfile, "r")
		nblinesremove=0
		mylog.write(os.path.basename(myfile)+" is extracted from the archive and is added into the result file\n")
		for line in myopenfile:	

			#if not equal, don't write	
			if int(rm_header) != nblinesremove:
				nblinesremove=nblinesremove+1
			else:
				# write the line into the final file			
				myfinalfile.write(line)
	
	myfinalfile.close()

	shutil.rmtree("decompress_files")

    else:
	# if merge is disable
        mylog.write("Merge option is disabled\n\n")

   	# move all files (recursively) in the working dir
    	for myfile in listdirectory("decompress_files"):
		myfileclean = myfile.replace(" ", "\ ")

		mylog.write(os.path.basename(myfileclean)+" is extracted from the archive \n")

		fileext = os.path.splitext(myfile)[1].replace(".", "")

		# if no extension
		if fileext == '':
                        shutil.move(os.path.abspath(myfile), os.path.abspath(myfile)+".txt")

		if fileext == 'fa':
                        shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fa", ".fasta"))

		if fileext == 'fq':
			shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fq", ".fastq"))

   	mylog.write("\nPlease refresh your history if all files are not present\n")
    	mylog.close()



# parse the directory and return files path (in a tab)
def listdirectory(path):
	myfile=[]
	l = glob.glob(path+'/*') 
    	for i in l:
		# if directory
        	if os.path.isdir(i): 
			myfile.extend(listdirectory(i))
		# else put the file in the tab      	  	
		else:
			myfile.append(i)
    	return myfile


if __name__=="__main__": main(*sys.argv[1:])