view mrcanavar-0.34/utils.c @ 0:86522a0b5f59 default tip

Uploaded source code for mrCaNaVaR
author calkan
date Tue, 21 Feb 2012 10:44:13 -0500
parents
children
line wrap: on
line source


#include "utils.h"



void print_error(char *msg){
  fprintf(stderr, "\nmrCaNaVaR version %s.\nLast update: %s.\n", VERSION, LAST_UPDATE);
  fprintf(stderr, "\n%s\n", msg);
  fprintf(stderr, "Invoke parameter -h for help.\n");
  exit (0);
}

void set_runmode(enum MODETYPE NEWMODE){
  if (RUNMODE != NONE && RUNMODE != NEWMODE){
    print_error("Select one run mode only.\n"); 
  }
  RUNMODE = NEWMODE;
}

void set_gender(enum GENDERTYPE SET){
  if (GENDER != AUTODETECT && RUNMODE != SET){
    print_error("Select only one of --xx or --xy.\n"); 
  }
  GENDER= SET;
}

void set_str(char **target, char *source){

  if (*target != NULL) free((*target));

  (*target) = (char *) malloc(sizeof(char) * (strlen(source)+1));

  strncpy((*target), source, (strlen(source)+1));

}


void init_globals(void){
  GENOME_FASTA = NULL;
  GENOME_CONF  = NULL;
  GENOME_GAPS  = NULL;
  RUNMODE      = NONE;
  GENDER       = AUTODETECT;
  MULTGC       = 0;
  MAX_GC_CORR  = 20.0;
  MIN_GC_CORR  = 0.05;
  VERBOSE      = 0;
  
  num_chrom    = 0;

  CHECKSAM     = 1;

  /* set window size defaults */

  LW_SIZE  = 5000;
  SW_SIZE  = 1000;
  CW_SIZE  = 1000;
  LW_SLIDE = 1000;
  SW_SLIDE = 1000;

  LW_MEAN  = 0.0;
  SW_MEAN  = 0.0;
  CW_MEAN  = 0.0;

  LW_STD   = 0.0;
  SW_STD   = 0.0;
  CW_STD   = 0.0;

  CONT_WINDOW = 7;
  CUT_WINDOW  = 6;

}


FILE *my_fopen(char *fname, char *mode, char GZ){
  FILE *fp;
  

  if (!GZ)
    fp = fopen(fname, mode);
  else
    fp = gzopen(fname, mode);
  
  if (fp == NULL){
    fprintf(stderr, "Unable to open file %s in %s mode.", fname, mode[0]=='w' ? "write" : "read");
    exit (0);
  }

  return fp;
}


void saveRefConfig(char *configFile){
  
  int i;
  int wcnt;
  char chrom_name_len;
  int retVal;
  FILE *config;
  
  config = my_fopen(configFile, "w", 0);

  /* sort the chromosomes pointer array */

  qsort(chromosomes, num_chrom, sizeof (struct chrom *), compChr);


  /* start with the magicNum, I will use this as a format check when loading */
  retVal = fwrite(&magicNum, sizeof(magicNum), 1, config);

  /* window sizes / slides */
  
  retVal = fwrite(&LW_SIZE,  sizeof(LW_SIZE),  1, config);
  retVal = fwrite(&SW_SIZE,  sizeof(SW_SIZE),  1, config);
  retVal = fwrite(&CW_SIZE,  sizeof(CW_SIZE),  1, config);
  retVal = fwrite(&LW_SLIDE, sizeof(LW_SLIDE), 1, config);
  retVal = fwrite(&SW_SLIDE, sizeof(SW_SLIDE), 1, config);

  
  /* reference genome numbers */
  
  /* number of chromosomes */
  retVal = fwrite(&num_chrom, sizeof(num_chrom), 1, config);
  
  /* iterate through chromosomes, write their names, and window counts */

  for (i=0; i<num_chrom; i++){
    chrom_name_len = (char) strlen(chromosomes[i]->name);
    retVal = fwrite(&chrom_name_len, sizeof(chrom_name_len), 1, config);
    retVal = fwrite(chromosomes[i]->name, chrom_name_len * sizeof(char), 1, config);

    retVal = fwrite(&(chromosomes[i]->length), sizeof(chromosomes[i]->length), 1, config);
    retVal = fwrite(&(chromosomes[i]->lw_cnt), sizeof(chromosomes[i]->lw_cnt), 1, config);
    retVal = fwrite(&(chromosomes[i]->sw_cnt), sizeof(chromosomes[i]->sw_cnt), 1, config);
    retVal = fwrite(&(chromosomes[i]->cw_cnt), sizeof(chromosomes[i]->cw_cnt), 1, config);    

    /* long windows */
    for (wcnt=0; wcnt<chromosomes[i]->lw_cnt; wcnt++){
      retVal = fwrite(&(chromosomes[i]->lw[wcnt].start), sizeof(chromosomes[i]->lw[wcnt].start), 1, config);
      retVal = fwrite(&(chromosomes[i]->lw[wcnt].end),   sizeof(chromosomes[i]->lw[wcnt].end),   1, config);
      retVal = fwrite(&(chromosomes[i]->lw[wcnt].gc),    sizeof(chromosomes[i]->lw[wcnt].gc),    1, config);
    }

    /* short windows */
    for (wcnt=0; wcnt<chromosomes[i]->sw_cnt; wcnt++){
      retVal = fwrite(&(chromosomes[i]->sw[wcnt].start), sizeof(chromosomes[i]->sw[wcnt].start), 1, config);
      retVal = fwrite(&(chromosomes[i]->sw[wcnt].end),   sizeof(chromosomes[i]->sw[wcnt].end),   1, config);
      retVal = fwrite(&(chromosomes[i]->sw[wcnt].gc),    sizeof(chromosomes[i]->sw[wcnt].gc),    1, config);
    }

    /* copy windows */
    for (wcnt=0; wcnt<chromosomes[i]->cw_cnt; wcnt++){
      retVal = fwrite(&(chromosomes[i]->cw[wcnt].start), sizeof(chromosomes[i]->cw[wcnt].start), 1, config);
      retVal = fwrite(&(chromosomes[i]->cw[wcnt].end),   sizeof(chromosomes[i]->cw[wcnt].end),   1, config);
      retVal = fwrite(&(chromosomes[i]->cw[wcnt].gc),    sizeof(chromosomes[i]->cw[wcnt].gc),    1, config);
    }

  }

  fclose(config);

}




void loadRefConfig(char *configFile){
  
  int i;
  int wcnt;
  char chrom_name_len;
  char readString[MAX_STR];
  int retVal;

  int isMagicNum;

  FILE *config;

  config = my_fopen(configFile, "r", 0);

  /* start with the magicNum,  use this as a format check when loading */
  retVal = fread(&isMagicNum, sizeof(isMagicNum), 1, config);

  if (isMagicNum != magicNum)
    print_error("Reference configuration file seems to be invalid or corrupt.\n");


  fprintf(stdout, "Loading reference configuration, hold on ... ");
  fflush(stdout);

  /* window sizes / slides */
  
  retVal = fread(&LW_SIZE,  sizeof(LW_SIZE),  1, config);
  retVal = fread(&SW_SIZE,  sizeof(SW_SIZE),  1, config);
  retVal = fread(&CW_SIZE,  sizeof(CW_SIZE),  1, config);
  retVal = fread(&LW_SLIDE, sizeof(LW_SLIDE), 1, config);
  retVal = fread(&SW_SLIDE, sizeof(SW_SLIDE), 1, config);

  
  /* reference genome numbers */
  
  /* number of chromosomes */
  retVal = fread(&num_chrom, sizeof(num_chrom), 1, config);

  /* create chromosomes data structure */

  chromosomes = (struct chrom **) malloc(sizeof(struct chrom *) * num_chrom);
  for (i=0; i<num_chrom; i++)
    chromosomes[i] = (struct chrom *) malloc(sizeof(struct chrom) * num_chrom);

  /* iterate through chromosomes, write their names, and window counts */

  for (i=0; i<num_chrom; i++){

    retVal = fread(&chrom_name_len, sizeof(chrom_name_len), 1, config);

    retVal = fread(readString, chrom_name_len * sizeof(char), 1, config);
    readString[chrom_name_len] = 0;
    trimspace(readString);

    set_str(&(chromosomes[i]->name), readString);

    retVal = fread(&(chromosomes[i]->length), sizeof(chromosomes[i]->length), 1, config);
    retVal = fread(&(chromosomes[i]->lw_cnt), sizeof(chromosomes[i]->lw_cnt), 1, config);
    retVal = fread(&(chromosomes[i]->sw_cnt), sizeof(chromosomes[i]->sw_cnt), 1, config);
    retVal = fread(&(chromosomes[i]->cw_cnt), sizeof(chromosomes[i]->cw_cnt), 1, config);    

    /* create windows structures */


    chromosomes[i]->lw = (struct window *) malloc (sizeof(struct window) * chromosomes[i]->lw_cnt);
    chromosomes[i]->sw = (struct window *) malloc (sizeof(struct window) * chromosomes[i]->sw_cnt);
    chromosomes[i]->cw = (struct window *) malloc (sizeof(struct window) * chromosomes[i]->cw_cnt);


    
    /* long windows */
    for (wcnt=0; wcnt<chromosomes[i]->lw_cnt; wcnt++){
      retVal = fread(&(chromosomes[i]->lw[wcnt].start), sizeof(chromosomes[i]->lw[wcnt].start), 1, config);
      retVal = fread(&(chromosomes[i]->lw[wcnt].end),   sizeof(chromosomes[i]->lw[wcnt].end),   1, config);
      retVal = fread(&(chromosomes[i]->lw[wcnt].gc),    sizeof(chromosomes[i]->lw[wcnt].gc),    1, config);
      // this is the default, auto-control-picker will fix this
      chromosomes[i]->lw[wcnt].isControl = 1;
      chromosomes[i]->lw[wcnt].depth = 0.0;
    }

    /* short windows */
    for (wcnt=0; wcnt<chromosomes[i]->sw_cnt; wcnt++){
      retVal = fread(&(chromosomes[i]->sw[wcnt].start), sizeof(chromosomes[i]->sw[wcnt].start), 1, config);
      retVal = fread(&(chromosomes[i]->sw[wcnt].end),   sizeof(chromosomes[i]->sw[wcnt].end),   1, config);
      retVal = fread(&(chromosomes[i]->sw[wcnt].gc),    sizeof(chromosomes[i]->sw[wcnt].gc),    1, config);
      chromosomes[i]->sw[wcnt].isControl = 1;
      chromosomes[i]->sw[wcnt].depth = 0.0;
    }

    /* copy windows */
    for (wcnt=0; wcnt<chromosomes[i]->cw_cnt; wcnt++){
      retVal = fread(&(chromosomes[i]->cw[wcnt].start), sizeof(chromosomes[i]->cw[wcnt].start), 1, config);
      retVal = fread(&(chromosomes[i]->cw[wcnt].end),   sizeof(chromosomes[i]->cw[wcnt].end),   1, config);
      retVal = fread(&(chromosomes[i]->cw[wcnt].gc),    sizeof(chromosomes[i]->cw[wcnt].gc),    1, config);
      chromosomes[i]->cw[wcnt].isControl = 1;
      chromosomes[i]->cw[wcnt].depth = 0.0;
    }

    /* fill in the rest of the chromosomes structure */
    
  }

  fprintf(stdout, "[OK]. %d chromosomes loaded.\n", num_chrom);

  fclose(config);
}



static int compChr(const void *p1, const void *p2){
  /* compare function to sort the chromosome pointer array */
  struct chrom *a, *b;

  a = *((struct chrom **)p1);
  b = *((struct chrom **)p2);


  return (strcmp ( a->name, b->name) );

}


int endswith(char *src, char *end){
  

  int endlen = strlen(end);
  int srclen = strlen(src);
  int copyindex;


  if (endlen > srclen)
    return 0;

  copyindex = srclen - endlen;

  if (memcmp(src+copyindex, end, endlen))
    return 0;
  else 
    return 1;

  
}

void trimspace(char *str){
  int len;
  len = strlen(str) - 1;
  while(1){
    if (isspace(str[len]))
      str[len--]=0;
    else 
      break;
  }
}