static char rcsid[] = "$Id$";
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "mdprint.h"
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#include "mem.h"
#include "complement.h"
#include "assert.h"


#ifdef DEBUG
#define debug(x) x
#else
#define debug(x)
#endif


static char complCode[128] = COMPLEMENT_LC;

static void
make_complement_buffered (char *complement, char *sequence, unsigned int length) {
  int i, j;

  /* complement = (char *) CALLOC(length+1,sizeof(char)); */
  for (i = length-1, j = 0; i >= 0; i--, j++) {
    complement[j] = complCode[(int) sequence[i]];
  }
  complement[length] = '\0';
  return;
}


static int
print_md_string_plus (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdiff,
		      Filestring_T fp, int matchlength, char *genomicfwd_refdiff, char *genomicfwd_bothdiff,
		      int stringlength, int querypos, int querylength,
		      int hardclip_low, int hardclip_high, bool md_lowercase_variant_p, bool lastp) {
  int starti, endi, i;
  int local_nmismatches = 0;
  bool hardclip_end_p = false;

  debug(printf("\nEntering print_md_string_plus with matchlength %d, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plus: %s ref, %s both\n",
		matchlength,stringlength,querypos,querylength,hardclip_low,hardclip_high,genomicfwd_refdiff,genomicfwd_bothdiff));
  if (hardclip_low == 0) {
    debug(printf("  (plus) hardclip_low is 0, so setting starti to be 0\n"));
    starti = 0;
    hardclip_end_p = true;
  } else if (hardclip_low > querypos) {
    /* startpos = hardclip_low; */
    starti = hardclip_low - querypos;
    hardclip_end_p = true;
    debug(printf("  (plus) Setting starti %d = hardclip_low %d - querypos %d\n",
		  starti,hardclip_low,querypos));
  } else {
    debug(printf("  (plus) hardclip_low is 0, so setting starti to be 0\n"));
    /* startpos = querypos; */
    starti = 0;
  }
  
  if (querylength - hardclip_high < querypos + stringlength) {
    /* endpos = querylength - hardclip_high; */
    endi = (querylength - hardclip_high) - querypos;
    debug(printf("  (plus) Setting endi %d = (querylength %d - hardclip_high %d) - querypos %d\n",
		  endi,querylength,hardclip_high,querypos));
  } else {
    /* endpos = querypos + stringlength; */
    endi = stringlength;
  }
  
  debug(printf("  Counting matches from %d to %d\n",starti,endi));
  
  if (genomicfwd_refdiff == NULL) {
    if (endi > starti) {
      matchlength += (endi - starti);
    }
    
  } else if (md_lowercase_variant_p == false) {
    for (i = starti; i < endi; i++) {
      if (isupper(genomicfwd_refdiff[i])) {
	matchlength++;
	
      } else {
	/* A true mismatch against both variants */
	if (matchlength > 0 || hardclip_end_p == true) {
	  FPRINTF(fp,"%d",matchlength);
	  *printp = true;
	  hardclip_end_p = false;
	}
	FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
	*printp = true;
	local_nmismatches += 1;
	matchlength = 0;
      }
    }
    *nmismatches_refdiff += local_nmismatches;
    
  } else {
    for (i = starti; i < endi; i++) {
      if (isupper(genomicfwd_refdiff[i])) {
	matchlength++;
	
      } else if (isupper(genomicfwd_bothdiff[i])) {
	/* A mismatch against the reference only => alternate variant */
	if (matchlength > 0 || hardclip_end_p == true) {
	  FPRINTF(fp,"%d",matchlength);
	  *printp = true;
	  hardclip_end_p = false;
	}
	FPRINTF(fp,"%c",genomicfwd_refdiff[i]); /* Leave as lower case */
	*printp = true;
	local_nmismatches += 1;
	matchlength = 0;
	
      } else {
	/* A true mismatch against both variants */
	if (matchlength > 0 || hardclip_end_p == true) {
	  FPRINTF(fp,"%d",matchlength);
	  *printp = true;
	  hardclip_end_p = false;
	}
	FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
	*printp = true;
	local_nmismatches += 1;
	matchlength = 0;
      }
    }
    *nmismatches_refdiff += local_nmismatches;
  }


  /* Update nmismatches_bothdiff */
  if (genomicfwd_bothdiff == NULL) {
    /* No change to nmismatches_bothdiff */
  } else if (genomicfwd_bothdiff == genomicfwd_refdiff) {
    *nmismatches_bothdiff += local_nmismatches;
  } else {
    for (i = starti; i < endi; i++) {
      if (!isupper(genomicfwd_bothdiff[i])) {
	*nmismatches_bothdiff += 1;
      }
    }
  }

  debug(printf("  Ending with matchlength %d\n",matchlength));

  if (lastp == false) {
    return matchlength;
  } else if (matchlength > 0) {
    FPRINTF(fp,"%d",matchlength);
    *printp = true;
    return 0;
  } else {
    return 0;
  }
}


static int
print_md_string_minus (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdiff,
		       Filestring_T fp, int matchlength, char *genomicfwd_refdiff, char *genomicfwd_bothdiff,
		       int stringlength, int querypos, int querylength,
		       int hardclip_low, int hardclip_high, bool md_lowercase_variant_p, bool lastp) {
  int starti, endi, i;
  int local_nmismatches = 0;
  bool hardclip_end_p = false;

  debug(printf("\nEntering print_md_string_minus with matchlength %d, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, minus: %s ref, %s both\n",
		matchlength,stringlength,querypos,querylength,hardclip_low,hardclip_high,genomicfwd_refdiff,genomicfwd_bothdiff));
  querypos = querylength - querypos - stringlength;
  debug(printf("  Revising querypos to be %d\n",querypos));
  
  if (hardclip_low == 0) {
    starti = 0;
    hardclip_end_p = true;
  } else if (0 && hardclip_low > querypos) {
    /* ? Leads to a zero-length MD string in the minus case */
    /* startpos = hardclip_low; */
    starti = hardclip_low - querypos;
    hardclip_end_p = true;
    debug(printf("  (minus) Setting starti %d = hardclip_low %d - querypos %d\n",
		  starti,hardclip_low,querypos));
  } else {
    /* startpos = querypos; */
    starti = 0;
  }
  
  if (0 && querylength - hardclip_high < querypos + stringlength) {
    /* ? Leads to a zero-length MD string in the minus case */
    /* endpos = querylength - hardclip_high; */
    endi = (querylength - hardclip_high) - querypos;
    debug(printf("  (minus) Setting endi %d = (querylength %d - hardclip_high %d) - querypos %d\n",
		  endi,querylength,hardclip_high,querypos));
  } else {
    /* endpos = querypos + stringlength; */
    endi = stringlength;
  }
  
  debug(printf("  Counting matches from %d to %d\n",starti,endi));
  
  if (genomicfwd_refdiff == NULL) {
    if (endi > starti) {
      matchlength += (endi - starti);
    }
    
  } else if (md_lowercase_variant_p == false) {
    for (i = starti; i < endi; i++) {
      if (isupper(genomicfwd_refdiff[i])) {
	matchlength++;
	
      } else {
	if (matchlength > 0 || hardclip_end_p == true) {
	  FPRINTF(fp,"%d",matchlength);
	  *printp = true;
	  hardclip_end_p = false;
	}
	FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
	*printp = true;
	local_nmismatches += 1;
	matchlength = 0;
      }
    }
    *nmismatches_refdiff += local_nmismatches;
    
  } else {
    for (i = starti; i < endi; i++) {
      if (isupper(genomicfwd_refdiff[i])) {
	matchlength++;
	
      } else if (isupper(genomicfwd_bothdiff[i])) {
	/* A mismatch against the reference only => alternate variant */
	if (matchlength > 0 || hardclip_end_p == true) {
	  FPRINTF(fp,"%d",matchlength);
	  *printp = true;
	  hardclip_end_p = false;
	}
	FPRINTF(fp,"%c",genomicfwd_refdiff[i]); /* Leave as lower case */
	*printp = true;
	local_nmismatches += 1;
	matchlength = 0;
	
      } else {
	/* A true mismatch against both variants */
	if (matchlength > 0 || hardclip_end_p == true) {
	  FPRINTF(fp,"%d",matchlength);
	  *printp = true;
	  hardclip_end_p = false;
	}
	FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
	*printp = true;
	local_nmismatches += 1;
	matchlength = 0;
      }
    }
    *nmismatches_refdiff += local_nmismatches;
  }
  
  /* Update nmismatches_bothdiff */
  if (genomicfwd_bothdiff == NULL) {
    /* No change to nmismatches_bothdiff */
  } else if (genomicfwd_bothdiff == genomicfwd_refdiff) {
    *nmismatches_bothdiff += local_nmismatches;
  } else {
    for (i = starti; i < endi; i++) {
      if (!isupper(genomicfwd_bothdiff[i])) {
	*nmismatches_bothdiff += 1;
      }
    }
  }

  debug(printf("  Ending with matchlength %d\n",matchlength));

  if (lastp == false) {
    return matchlength;
  } else if (matchlength > 0) {
    FPRINTF(fp,"%d",matchlength);
    *printp = true;
    return 0;
  } else {
    return 0;
  }
}


int
MD_print_substring (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdiff,
		    int matchlength, Filestring_T fp, Substring_T substring,
		    int substring_length, int substring_start, int querylength,
		    int hardclip_low, int hardclip_high, bool md_lowercase_variant_p,
		    bool plusp, bool lastp) {
  char *genomicdir_refdiff, *genomicdir_bothdiff, *genomicfwd_bothdiff, *genomicfwd_refdiff;
  
  if (plusp == true) {
    if ((genomicfwd_bothdiff = Substring_genomic_bothdiff(substring)) == NULL) {
      debug(printf("(+1) Calling print_md_string\n"));
      matchlength = print_md_string_plus(&(*printp),&(*nmismatches_refdiff),&(*nmismatches_bothdiff),fp,matchlength,
					 /*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
					 substring_length,/*querypos*/substring_start,querylength,
					 hardclip_low,hardclip_high,md_lowercase_variant_p,lastp);
    } else {
      genomicfwd_refdiff = Substring_genomic_refdiff(substring);
      debug(printf("(+2) Calling print_md_string\n"));
      matchlength = print_md_string_plus(&(*printp),&(*nmismatches_refdiff),&(*nmismatches_bothdiff),fp,matchlength,
					 &(genomicfwd_refdiff[substring_start]),&(genomicfwd_bothdiff[substring_start]),
					 substring_length,/*querypos*/substring_start,querylength,
					 hardclip_low,hardclip_high,md_lowercase_variant_p,lastp);
    }
	
  } else {
    /* Minus */
    if ((genomicdir_bothdiff = Substring_genomic_bothdiff(substring)) == NULL) {
      debug(printf("(-1) Calling print_md_string\n"));
      matchlength = print_md_string_minus(&(*printp),&(*nmismatches_refdiff),&(*nmismatches_bothdiff),
					  fp,matchlength,/*genomicfwd_refdiff*/NULL,/*genomicfwd_bothdiff*/NULL,
					  substring_length,/*querypos*/substring_start,querylength,
					  hardclip_low,hardclip_high,md_lowercase_variant_p,lastp);

    } else if ((genomicdir_refdiff = Substring_genomic_refdiff(substring)) == genomicdir_bothdiff) {
      genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
      make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
      debug(printf("(-2) Calling print_md_string\n"));
      matchlength = print_md_string_minus(&(*printp),&(*nmismatches_refdiff),&(*nmismatches_bothdiff),
					  fp,matchlength,genomicfwd_refdiff,/*genomicfwd_bothdiff*/genomicfwd_refdiff,
					  substring_length,/*querypos*/substring_start,querylength,
					  hardclip_low,hardclip_high,md_lowercase_variant_p,lastp);
      FREEA(genomicfwd_refdiff);

    } else {
      genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
      genomicfwd_bothdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
      make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
      make_complement_buffered(genomicfwd_bothdiff,&(genomicdir_bothdiff[substring_start]),substring_length);
      debug(printf("(-3) Calling print_md_string\n"));
      matchlength = print_md_string_minus(&(*printp),&(*nmismatches_refdiff),&(*nmismatches_bothdiff),
					  fp,matchlength,genomicfwd_refdiff,genomicfwd_bothdiff,
					  substring_length,/*querypos*/substring_start,querylength,
					  hardclip_low,hardclip_high,md_lowercase_variant_p,lastp);
      FREEA(genomicfwd_bothdiff);
      FREEA(genomicfwd_refdiff);
    }
  }

  return matchlength;
}

