#include "clustddp.h"

int nrow(FILE *fp) {
  char buf[10000];
  int n = 0;
  while(fgets(buf, sizeof(buf), fp) != NULL) n++;
  return n;
}

int newlinechar(char *buf, int k) {
  int i;
  int found = 0;
  for(i=0;i<k;i++) {
    if(buf[i] == '\n') {
      found = 1;
      break;
    }
  }
  return found;
}

int ncol(FILE *fp) {
  char buf[10000];
  int i,cont = 0;
  fgets(buf, sizeof(buf), fp);
  for(i=0;i<10000;i++) {
    if(buf[i] == '\t') cont++;
    if(buf[i] == '\0') break;
  }
  return cont;
}

void readHyperParam(FILE *fpp, MODEL *model) {
  int i,n;
  char buf1[64];
  char buf2[64];
  n = nrow(fpp);
  rewind(fpp);
  for(i=0;i<n;i++) {
    fscanf(fpp, "%s", buf1);
    fscanf(fpp, "%s", buf2);
    if(strcmp(buf1,"np") == 0) {
      model->np = atoi(buf2);
    }
    else if(strcmp(buf1,"nb") == 0) {
      model->nb = atoi(buf2);
    }
    else if(strcmp(buf1,"lambda") == 0) {
      model->lambda = atof(buf2);
    }
    else if(strcmp(buf1,"nu") == 0) {
      model->nu = atof(buf2);
    }
    else if(strcmp(buf1,"a") == 0) {
      model->a = atof(buf2);
    }
    else if(strcmp(buf1,"b") == 0) {
      model->b = atof(buf2);
    }
    else if(strcmp(buf1,"alpha") == 0) {
      model->dp.alpha = atof(buf2);
    }
    else if(strcmp(buf1,"rho") == 0) {
      model->hdp.rho = atof(buf2);
    }
    else if(strcmp(buf1,"gamma") == 0) {
      model->hdp.gamma = atof(buf2);
    }
    else if(strcmp(buf1,"nburn") == 0) {
      model->nburn = atoi(buf2);
    }
    else if(strcmp(buf1,"niter") == 0) {
      model->niter = atoi(buf2);
    }
    else if(strcmp(buf1,"minvar") == 0) {
      model->minvar = atof(buf2);
    }
    else {}
  }
}

void makeOutHeader(FILE *fpout, DATA *data) {
  int j;
  fprintf(fpout, "Iteration\tloglik\tNumBaitCL\tVarUsed\t");
  for(j=0;j<(data->nbait-1);j++) fprintf(fpout, "%s\t", data->baitName[j]);
  fprintf(fpout, "%s\n", data->baitName[data->nbait-1]);
}

void writeOut(FILE *fpout, MODEL *model, DATA *data, const int iter, char *moveType) {
  int j;
  fprintf(fpout, "%d\t%.3f\t%d\t%s\t", iter, model->curlik, nclust(model->dp.z, data->nbait), moveType);
  for(j=0;j<(data->nbait-1);j++) fprintf(fpout, "%d\t", nclust(model->hdp.y[model->dp.z[j]],data->nprey));
  fprintf(fpout, "%d\n", nclust(model->hdp.y[model->dp.z[j]],data->nprey));
}

void writeERING(MODEL *model) {
  int i;
  FILE *fper = fopen("Ering", "w");
  fprintf(fper, "Ring\tMinEnergy\tTemperature\n");
  for(i=0;i<model->numLadder;i++) fprintf(fper, "%d\t%.2f\t%.2f\n", i+1, model->H[i], model->T[i]);
  fclose(fper);
}

void sortBaitIP(DATA *data) {
  int i,j,k,l,cur,hit,hit1,hit2;
  char tmpBaits[data->nIP][100];
  int isUnique[data->nIP];  

  for(i=0;i<data->nIP;i++) isUnique[i] = 1;
  for(i=0;i<data->nIP-1;i++) {
    if(isUnique[i]) {
      for(j=i+1;j<data->nIP;j++) {
        if(strcmp(data->baitName[i], data->baitName[j]) == 0) isUnique[j] = 0; 
      }
    }
  }
  cur = 0;
  for(i=0;i<data->nIP;i++) {
    if(isUnique[i]) {
      strcpy(tmpBaits[cur], data->baitName[i]);
      cur++;
    }
  }
  data->nbait = cur;
  assert(data->n_b2ip = (int *) calloc(cur, sizeof(int)));  
  for(i=0;i<data->nbait;i++) {
    data->n_b2ip[i] = 0;
    for(j=0;j<data->nIP;j++) {
      if(strcmp(tmpBaits[i], data->baitName[j]) == 0) (data->n_b2ip[i])++;
    }
  }
  assert(data->b2ip = (int **) calloc(data->nbait, sizeof(int *)));
  for(i=0;i<data->nbait;i++) assert(data->b2ip[i] = (int *) calloc(data->n_b2ip[i], sizeof(int)));
  for(i=0;i<data->nbait;i++) {
    cur = 0;
    for(j=0;j<data->nIP;j++) {
      if(strcmp(tmpBaits[i], data->baitName[j]) == 0) {
        data->b2ip[i][cur] = j; 
        cur++;
      }
    }
  }
  assert(data->ip2b = (int *) calloc(data->nIP, sizeof(int)));
  for(j=0;j<data->nIP;j++) {
    for(i=0;i<data->nbait;i++) {
      if(strcmp(data->baitName[j], tmpBaits[i]) == 0) {
        data->ip2b[j] = i;
        break;
      }	
    }
  }
  for(i=0;i<data->nbait;i++) strcpy(data->baitName[i], tmpBaits[i]);

  assert(data->inter = (int **) calloc(data->nbait, sizeof(int *)));
  for(i=0;i<data->nbait;i++) assert(data->inter[i] = (int *) calloc(data->nbait, sizeof(int)));
  assert(data->ninter = (int *) calloc(data->nbait, sizeof(int)));

  for(i=0;i<data->nbait;i++) {
    cur = 0;
    for(j=0;j<data->nbait;j++) {
      hit = 0;
      for(k=0;k<data->nprey;k++) {
        hit1 = 0;
        hit2 = 0;
        for(l=0;l<data->n_b2ip[i];l++) {
          if(data->d[data->b2ip[i][l]][k] > 0.0) hit1 = 1; 
        }
        for(l=0;l<data->n_b2ip[j];l++) {
          if(data->d[data->b2ip[j][l]][k] > 0.0) hit2 = 1; 
        }
        if(hit1 && hit2) hit = 1;
      }

      if(hit) {
        data->inter[i][cur] = j;
        cur++;
      }
    }
    data->ninter[i] = cur;
  }

  FILE *fpinter = fopen("bait_lists", "w");
  for(i=0;i<data->nbait;i++) {
    fprintf(fpinter, "%s\t", data->baitName[i]);
    for(j=0;j<data->ninter[i];j++) fprintf(fpinter, "%d\t", data->inter[i][j]);
    fprintf(fpinter, "\n");
  }
 
}


void writeEE(FILE *fp, MODEL *model, DATA *data) {
  int i;
  double rate = ((double) model->EEaccept) / ((double) model->EEattempt);
  fprintf(fp, "%d\t%d\t%d\t%.2f\t", 
              model->curLadder+1,
              model->EEaccept, model->EEattempt, rate);
  for(i=0;i<(model->numLadder-1);i++) fprintf(fp, "%d\t", model->ering[i].count);
  fprintf(fp, "%d\n", model->ering[model->numLadder-1].count);
}

void printVAR(MODEL *model, DATA *data) {
  int i;
  for(i=0;i<data->nprey;i++) {
    fprintf(stderr, "%.2f ", model->sigma2[i]);
  }
  fprintf(stderr, "\n");
}

void printHDP(MODEL *model) {
  int i;
  for(i=0;i<model->np;i++) {
    fprintf(stderr, "(%.2f %.2f)   ", model->hdp.omega[i][0], model->hdp.omega[i][1]);
    if((i+1) % 10 == 0) fprintf(stderr, "\n");
  }
  fprintf(stderr, "\n");
}



int main(int argc, char **argv) {

  int iter;
  int ringInputCycle; 
  int nprey, nbait, nIP;  
  double total;
  DATA data;
  MODEL model;

  const gsl_rng_type *T;
  gsl_rng *r;
  gsl_rng_env_setup();
  T = gsl_rng_default;
  r = gsl_rng_alloc(T);

  if (argc != 3) {
    fprintf(stderr, "usage: nestedcluster [data] [parameter]\n");
    return 1;
  }  
  FILE *fp = fopen(argv[1], "r");  
  FILE *fpi = fopen(argv[1], "r");
  FILE *fpp = fopen(argv[2], "r");

  nprey = nrow(fpi)-2;
  nIP = ncol(fpi);
  nbait = nIP;  /* will be subtracted */ 
  fclose(fpi);

  if(fpi == NULL || fp == NULL) { 
    fprintf(stderr, "Spectral count data %s does not exist.\n", argv[1]);
    return 1; 
  }
  if(fpp == NULL) { 
    fprintf(stderr, "Essential Hyper Prior Parameters must be set.\n");
    return 1; 
  }

  readHyperParam(fpp, &model);
  total = ((double) model.niter); /*  ((double) _SKIP_); */
  ringInputCycle = model.niter / _SIZE_RING_;
  if(ringInputCycle < 10) ringInputCycle = 10;  

  FILE *fpout = fopen("MCMCparameters", "w");

  data.nprey = nprey;
  data.nbait = nbait;
  data.nIP = nIP;

  if(readData(fp, &model, &data)) return 1;

  sortBaitIP(&data);
  model.nprey = data.nprey;
  model.nbait = data.nbait;
  model.nIP = data.nIP;

  fprintf(stderr, "%d IP's are mapped to %d unique baits.\n", data.nIP, data.nbait);

  model.numLadder = 1;

  initModel(&model, &data, r);
  for(iter=0;iter<_NUM_RECORDS_;iter++) initOptimalCluster(&model, &(model.opt[iter]), &data);

  /* if nbait > model->nb, then stop. */
  fprintf(stderr, "%d clusters\t%.2f\n", 
                  nclust(model.dp.z, data.nbait), 
                  model.curlik);

  /* Burn-in's with no energy truncation for setting energy boundaries */
  model.curLadder = 0;
  model.curlik = loglik(&model, &data, r) + logPrior(&model, &data);
  setEnergyTruncation(&model, &data);

  for(iter=0;iter<model.nburn;iter++) {
    blockedGibbs(&model, &data, r, iter < 0 ? 0 : 1);
    if((iter+1) % _PRINT_FREQ_ == 0) printSampler(&model, &data, iter);
  }
  for(iter=0;iter<model.niter;iter++) {
    blockedGibbs(&model, &data, r, 1);
    if((iter+1) % _PRINT_FREQ_ == 0) printSampler(&model, &data, iter);
    writeOut(fpout, &model, &data, iter+1, "G");
    optimalCluster(&model, &data, r);
  }
  fprintf(stderr, "Done\n");

  shuffleOPTCL(&model, &data);
  
  fprintf(stderr, "Main Iterations Are Finished.\n");
  outputClustering(&model, &data);
  fclose(fp);
  fclose(fpout);
  return 0;
}

void shuffleOPTCL(MODEL *model, DATA *data) { 
  int i,j,curpos, nuniq;
  double scores[_NUM_RECORDS_];
  double uscores[_NUM_RECORDS_];
  int used[_NUM_RECORDS_];
  if(_NUM_RECORDS_ <= 1) {}
  else {
    for(i=0;i<_NUM_RECORDS_;i++) {
      scores[i] = -model->optScores[i];
      used[i] = 0;
    }
    gsl_sort(scores, 1, _NUM_RECORDS_);
    nuniq = 0;
    for(i=0;i<_NUM_RECORDS_-1;i++) {
      for(j=i+1;j<_NUM_RECORDS_;j++) {
        if(scores[i] == scores[j]) used[j] = 1;
      }
    }
    curpos = 0;
    for(i=0;i<_NUM_RECORDS_;i++) {
      if(used[i]==0) {
        uscores[curpos] = -scores[i];
        curpos++;
      }
    } 
    nuniq = curpos;

    curpos = 0;
    for(i=0;i<nuniq;i++) {
      for(j=0;j<_NUM_RECORDS_;j++) {
        if(model->optScores[j] == uscores[i]) {
          model->reportOrder[curpos] = j;
          curpos++;
        }
      }
    }
  }
}


void printSampler(MODEL *model, DATA *data, int iter) {
  int j;
  if((iter+1) % _PRINT_FREQ_ == 0) { 
    fprintf(stderr, "Ladder\tEnergy\tTemperature\n");
    for(j=0;j<model->numLadder;j++) {
      fprintf(stderr, "%d\t%.2f\t%.2f\n", j, model->H[j], model->T[j]);
    }
    fprintf(stderr, "\n");

    fprintf(stderr, "%d\t", iter+1);
    fprintf(stderr, "%d clusters\t%.2f\n", 
                         nclust(model->dp.z, data->nbait), 
                         model->curlik);
    fprintf(stderr, "Bait\tCluster\n");
    for(j=0;j<data->nbait;j++) {
      fprintf(stderr, "%s\t%d\t", data->baitName[j], model->dp.z[j]+1);
      fprintf(stderr, "\n"); 
    }
    fprintf(stderr, "\n");

    printHDP(model);
    /* printVAR(model, data); */
    fprintf(stderr, "\n\n");
  }
}
