/*
  conserv_sort.c -- sorting CONSERV result

    Copyright (C) 1999-2005 Naohisa Goto <ngoto@gen-info.osaka-u.ac.jp>
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
   
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <stddef.h>
#include <unistd.h>
#include <alloca.h>
#include "mfstlib3.h"
#include "my_malloc.h"

#define LINE_BUF 1024

#define DEFAULT_SORT "+"
#define DEFAULT_SORTKEY "L-v-n-s+p+c+"
#define DEFAULT_ISORT "+"
#define DEFAULT_ISORTKEY "s+p+c+"

#define DEFAULT_OUT_EXT ".sorted"
#define DEFAULT_OUT_MIXED "auto"

#define DEFAULT_EXT_POSITION ".position"
#define DEFAULT_EXT_FST ".fst"
#define DEFAULT_EXT_SEQINFO ".seqinfo"
#define DEFAULT_EXT_MIXED ".mixed"

#define FOPEN_MODE_W "w"
#define FOPEN_MODE_R "r"
#define DIR_SEPARATOR '/'
#define STR_DIR_SEPARATOR "/"
#define L_STR_DIR_SEPARATOR 1

/* global variables */
int Verbose = 0;
int More_Verbose = 0;
int g_opt_fastawidth = 70;

typedef int countN_t;
typedef int seqLen_t;
typedef long long LONG_LONG;

struct position_single {
  countN_t seqno;
  int cflag;
  seqLen_t pos;
  countN_t group;
  seqLen_t pos_c;
  countN_t order; /* for sorting */
};

struct position_data {
  countN_t dn;
  seqLen_t dL;
  countN_t dv;
  char *name;
  char *rest;
  SEQUENCE *seq;
  struct position_single *darray;
  countN_t order; /* for sorting */
};

struct lsc_table_single {
  countN_t sort;
  countN_t count;
};

struct lsc_table {
  seqLen_t maxlen;
  struct lsc_table_single *lsc;
};

struct firstline_data {
  char *str[2];
};

/* global variables */
typedef int (*func_compare_spd)(const struct position_data *,
				const struct position_data *);

#define N_COMPARE_FUNC_ARRAY 7
func_compare_spd g_compare_func_array[N_COMPARE_FUNC_ARRAY + 2];
int g_compare_func_array_order[N_COMPARE_FUNC_ARRAY + 2];
/* sort order: ascend: 1 descend: -1 do not sort: 0 */
int g_sort_all_position_data_compare_order = 1;

#define SORT_INTERNAL_COMPARE_NULL 0
#define SORT_INTERNAL_COMPARE_SEQNO 1
#define SORT_INTERNAL_COMPARE_POS 2
#define SORT_INTERNAL_COMPARE_CFLAG 3
#define SORT_INTERNAL_COMPARE_ORDER 4
#define MAX_SORT_INTERNAL_COMPARE 4
#define N_SORT_INTERNAL_COMPARE 4
int g_sort_internal_compare[N_SORT_INTERNAL_COMPARE + 2];
int g_sort_internal_compare_order[N_SORT_INTERNAL_COMPARE + 2];

int g_sort_internal_position_data_compare_order = 1;
int g_sort_internal_position_data_perfect = 0;

struct lsc_table *make_lsc_table(struct position_data **spda, countN_t na,
 seqLen_t maxlen);
void free_lsc_table(struct lsc_table *lsct);
void print_lsc_table(FILE *fpo, struct lsc_table *lsct);
void init_firstline_data(struct firstline_data *fld);
void free_firstline_data_variables(struct firstline_data *fld);
void swap_firstline_data_c1(struct firstline_data *f0,
 struct firstline_data *f1);
void print_firstline_data(FILE *fpo, struct firstline_data *fld);
int get_firstline_data(char *buf, struct firstline_data *fld);

int delete_equal_DNA_data(struct position_data **spda, countN_t na);
int compact_arrayof_position_data(struct position_data **spda, countN_t na);

void print_a_fasta(FILE *fpo_fst, SEQUENCE *s, int width);
void print_all_fasta_data(FILE *fpo_fst, struct position_data **spda, countN_t na);

void print_all_position_data(int mixed_o, FILE *fpo_pos,
 struct position_data **spda, countN_t na);
void print_a_position_data(int mixed_o, FILE *fpo_pos,
 struct position_data *spd);

int diff_a_position_data_only_cflag(const struct position_data *spd1,
 const struct position_data *spd2);
int diff_a_position_data_without_cflag(const struct position_data *spd1,
 const struct position_data *spd2);

void init_sort_order_main(const char *str, int *order);
#define m_init_sort(str) \
 init_sort_order_main(str, &g_sort_all_position_data_compare_order)
#define m_init_isort(str) \
 init_sort_order_main(str, &g_sort_internal_position_data_compare_order)

static int select_order_from_a_char(int chr_order, int *order);

void init_compare_func_array(const char *str);
int init_compare_func_array_main(const char *str, int func_max, 
 func_compare_spd *funcarray, int *funcarray_order);
static int init_compare_func_array_select(int chr_func, int chr_order,
 func_compare_spd *func, int *order);

int compare_a_position_data_L(const struct position_data *spd1,
 const struct position_data *spd2);
int compare_a_position_data_v(const struct position_data *spd1,
 const struct position_data *spd2);
int compare_a_position_data_n(const struct position_data *spd1,
 const struct position_data *spd2);
int compare_a_position_data_seqno(const struct position_data *spd1,
 const struct position_data *spd2);
int compare_a_position_data_pos(const struct position_data *spd1,
 const struct position_data *spd2);
int compare_a_position_data_cflag(const struct position_data *spd1,
 const struct position_data *spd2);
int compare_a_position_data_keep_order(const struct position_data *spd1,
 const struct position_data *spd2);
int compare_a_position_data_null(const struct position_data *spd1,
 const struct position_data *spd2);

static int sort_all_position_data_compare(const void *a1, const void *a2);
void sort_all_position_data(struct position_data **spda, countN_t na);
void sort_all_position_data_step2(struct position_data **spda, countN_t na);

struct position_data **read_all_position_data(countN_t *ret_na, int mixed_i,
 FILE *fpi_pos, FILE *fpi_fst, char *first_line, char **next_line);

void free_all_position_data(struct position_data **spda, countN_t na);

static int sort_internal_position_data_compare_default(const void *a1,
 const void *a2);
static int sort_internal_position_data_compare_restore_order(const void *a1,
 const void *a2);
static int sort_internal_position_data_compare(const void *a1, const void *a2);
void sort_internal_position_data(struct position_data *spd);
void sort_internal_position_data_restore_order(struct position_data *spd);
void sort_internal_position_data_default(struct position_data *spd);

static int init_sort_internal_compare_select(int chr_func, int chr_order,
 int *func, int *order);
void init_sort_internal_compare(const char *str);

static struct position_data *make_struct_position_data(countN_t dn,
 seqLen_t dL, countN_t dv, char *name_src, char *rest_src, FILE *fpi_fst);

void free_struct_position_data(struct position_data *spd);
struct position_data *read_a_position_data(countN_t *ret_n, int mixed_i, 
 FILE *fpi_pos, FILE *fpi_fst, char *firstline);

static char *chomp_dup_string(const char *str);

static char *add_three_strings_MM( const char *str0, size_t len_str0,
 const char *str1, size_t len_str1, const char *str2, size_t len_str2);
static char *get_string_without_suffix_MM(const char *filename,
 const char *str_suffix);

static char *filename_basename(const char *filename);
static int select_yes_no_auto(const char *str);
static void fclose_with_check(FILE *fp);
static FILE *write_open_file_with_check(const char *fn, int force_overwrite);

int replace_mixed_separate(char *buf, size_t sizeof_buf,
 int mixed_i, int mixed_o);

int main2(int dnamode, int mixed_i, int mixed_o,
 FILE *fpi_pos, FILE *fpo_pos, FILE *fpi_fst, FILE *fpo_fst,
 FILE *fpi_seqinfo, FILE *fpo_seqinfo);
void print_usage(char *argv0, FILE *fpo);

void print_usage(char *argv0, FILE *fpo)
{
  fprintf(fpo, "Usage: %s [option...] [filename]\n", filename_basename(argv0));
  fprintf(fpo, "Options: \n"
	       " -h | --help      print help message\n"
	       " -verbose         verbose message to stderr\n"
	       " -more-verbose    more verbose\n"
	       " -force-overwrite overwrite output files\n"
	       " -out-path=DIR    output files to DIR (default: same as input files)\n"
	       " -out-basename=X  basename of output filenames\n"
	       " -out-ext=X       string added to output filenames (default=.sorted)\n"
	       " -input-ext=X     input file extension (default=.position)\n"
	       " -mix | -mixed    mixed mode input\n"
	       " -DNA             DNA mode: delete same position data\n"
	       " -sort={+|-|0}    sort order of whole data.   +:ascend -:descend 0: don\'t sort\n"
	       " -sortkey=XY...   sort keys(X) and orders(Y): X={L|v|n|s|p|c} Y={+|-|0}\n"
	       " -isort={+|-|0}   sort order of single data.  +:ascend -:descend 0: don\'t sort\n"
	       " -isortkey=XY...  sort keys(X) and orders(Y): X={s|p|c} Y={+|-|0}\n"
	       " --               end of options (using if capital letter of filename is \'-\')\n"
	  );

  /* advanced options */
  if (Verbose) {
    fprintf(fpo, "\n"
	    " -out-stdout      output to stdout\n"
	    " -out-mixed={yes|no|auto} output mixed mode (default=auto)\n"
	    " -ext-fst=.FST    fst file extension (default=.fst)\n"
	    " -ext-seqinfo=.SEQINFO seqinfo file extension (default=.seqinfo)\n"
	    " -out-ext-position=.POSITON output position file extension (default: same as input file)\n"
	    " -out-ext-fst=.FST output fst file extension (default: same as -ext-fst=)\n"
	    " -out-ext-seqinfo=.SEQINFO output seqinfo file extension (default: same as -ext-seqinfo=)\n"
	    " -out-ext-mixed=.MIXED output mixed file extension (default=mixed or same as input file)\n"
	    );

  fprintf(fpo, "\n");
  fprintf(fpo, " sort keys: \n"
	       "   L     L(length)\n"
	       "   v     v(number of hit sequences or sequence groups)\n"
	       "   n     n(number of hits)\n"
	       "   s     seq. No.\n"
               "   p     position\n"
               "   c     strand(complement or not)\n");
    } else {
      fprintf(fpo, "(There are more options. Please do \"-verbose -help\" to see all options.)\n");
    } /* if (Verbose) ... else ... */

  fprintf(fpo, "\n");
  fprintf(fpo, "Default sort options: \n"
	       " -sort=%s -sortkey=%s -isort=%s -isortkey=%s\n",
	  DEFAULT_SORT, DEFAULT_SORTKEY,
	  DEFAULT_ISORT, DEFAULT_ISORTKEY);

  fprintf(fpo, "\n");
  fprintf(fpo, "Input files:   file.position and file.fst\n"
	       "               or file and file.fst\n"
	       "               or stdin (only for mixed mode)\n"
	  );
  fprintf(fpo, "Output files:  file.sorted.position and file.sorted.fst\n"
	       "               or file.sorted and file.sorted.fst\n"
	       "               or stdout (only for mixed mode)\n"
	       "               (\"file\" can be changed with option -out-basename=)\n"
	       "               (\".sorted\" can be changed with option -out-ext=)\n"
	  );

  return;
} /* end of func */

static char *get_string_without_suffix_MM(const char *filename,
 const char *str_suffix)
{
  size_t i;
  size_t l_filename;
  size_t l_suffix;
  char *result;

  l_filename = strlen(filename);
  l_suffix = strlen(str_suffix);

  i = l_filename - l_suffix;
  if (l_filename > l_suffix 
      && strncmp(filename + i, str_suffix, l_suffix) == 0) {
    /* suffix of filename is str_suffix */
    result = my_malloc(i + 1);
    strncpy(result, filename, i);
    result[i] = '\0';
  } else {
    result = NULL;
  }
  return result;
} /* end of func */

static char *add_three_strings_MM(const char *str0, size_t len_str0,
 const char *str1, size_t len_str1, const char *str2, size_t len_str2)
{
  char *result;
  size_t len_whole;

  len_whole = len_str0 + len_str1 + len_str2 + 1;
  result = my_malloc(len_whole);
  snprintf(result, len_whole, "%s%s%s", str0, str1, str2);
  return result;
} /* end of func */

static char *filename_basename(const char *filename)
{
  char *s;

  s = strrchr(filename, DIR_SEPARATOR);
  if (s == NULL) return (char *)filename;
  return s + 1;
} /* end of func */

static char *make_output_filename_MM(const char *path,
 const char *mainstr, size_t l_mainstr,
 const char *middle_ext, size_t l_middle_ext,
 const char *ext, size_t l_ext)
{
  char *result;
  char *base;
  size_t l_base;
  size_t l_path;
  size_t len_whole;
  size_t l_sep = 0;
  char *sep = "";

  if (path == NULL) {
    return add_three_strings_MM(mainstr, l_mainstr,
				middle_ext, l_middle_ext, ext, l_ext);
  }

  l_path = strlen(path);
  if (l_path >= 1 && path[l_path - 1] != '/') {
    l_sep = L_STR_DIR_SEPARATOR;
    sep = STR_DIR_SEPARATOR;
  }
  base = filename_basename(mainstr);
  l_base = strlen(base);
  len_whole = l_path + l_sep + l_base + l_middle_ext + l_ext + 1;
  result = my_malloc(len_whole);
  snprintf(result, len_whole, "%s%s%s%s%s", path, sep, base, middle_ext, ext);

  return result;
} /* end of func */

static int select_yes_no_auto(const char *str)
/* 1: yes    -1: no    0: auto */
{
  int r = 0;
  if (strcmp(str, "yes") == 0 || strcmp(str, "YES") == 0) {
    r = 1;
  } else if (strcmp(str, "no") == 0 || strcmp(str, "NO") == 0) {
    r = -1;
  } else if (strcmp(str, "auto") == 0 || strcmp(str, "AUTO") == 0) {
    r = 0;
  } else {
    fprintf(stderr, "Error: \"%s\": cannot understand, reset to auto\n", str);
    r = 0;
  }
  return r;
} /* end of func */

static void fclose_with_check(FILE *fp)
{
  int r;
  if (fp != NULL && fp != stdin && fp != stdout && fp != stderr) {
    r = fclose(fp);
    if (r != 0) fprintf(stderr, "Error: fclose failed.\n");
  }
  return;
} /* end of func */

static FILE *write_open_file_with_check(const char *fn, int force_overwrite)
{
  FILE *fp;

  if (force_overwrite == 0) {
    fp = fopen(fn, FOPEN_MODE_R);
    if (fp != NULL) {
      fprintf(stderr, "Error: file %s exists;"
	      " if you\'d like to overwrite it,"
	      " plese use --force-overwrite option.\n", fn);
      fclose(fp);
      return NULL;
    }
  } /* if (force_overwrite == 0) */

  fp = fopen(fn, FOPEN_MODE_W);
  if (fp == NULL) {
    fprintf(stderr, "Error: cannot open file %s\n", fn);
    return NULL;
  }
  return fp;
} /* end of func */

int main(int argc, char *argv[])
{
  int r;
  char *argv0;
  char *carg;
  char *fn0 = NULL, *fnb = NULL;
  size_t l_fn0 = 0, l_fnb = 0;
  char *fno_pos = NULL, *fno_fst = NULL, *fni_fst = NULL;
  FILE *fpi_pos = NULL, *fpo_pos = NULL;
  FILE *fpi_fst = NULL, *fpo_fst = NULL;
  char *fni_seqinfo = NULL, *fno_seqinfo = NULL;
  FILE *fpi_seqinfo = NULL, *fpo_seqinfo = NULL;
  const char *in_ext_pos = DEFAULT_EXT_POSITION;
  const char *out_ext_pos = NULL;
  const char *in_ext_fst = DEFAULT_EXT_FST;
  const char *out_ext_fst = NULL;
  const char *in_ext_seqinfo = DEFAULT_EXT_SEQINFO;
  const char *out_ext_seqinfo = NULL;
  const char *out_ext_mixed = NULL;
  const char *ext_sorted = DEFAULT_OUT_EXT;
  size_t l_sorted;
  const char *output_path = NULL;
  const char *out_base = NULL;
  size_t l_out_base = 0;
  int dnamode = 0;
  int mixed_i = 0;
  int mixed_o = 0;
  int out_mode_mixed = 0; /* 0:auto 1:mixed -1:normal */
  int out_stdout = 0;
  int force_overwrite = 0;
#define OPTSTR_SORT "sort="
#define L_OPTSTR_SORT 5
#define OPTSTR_ISORT "isort="
#define L_OPTSTR_ISORT 6
#define OPTSTR_SORTKEY "sortkey="
#define L_OPTSTR_SORTKEY 8
#define OPTSTR_ISORTKEY "isortkey="
#define L_OPTSTR_ISORTKEY 9

#define OPTSTR_OUT_STDOUT "out-stdout"
#define L_OPTSTR_OUT_STDOUT 10
#define OPTSTR_OUT_MIXED "out-mixed="
#define L_OPTSTR_OUT_MIXED 10
#define OPTSTR_OUT_EXT "out-ext="
#define L_OPTSTR_OUT_EXT 8
#define OPTSTR_OUT_PATH "out-path="
#define L_OPTSTR_OUT_PATH 9
#define OPTSTR_OUT_BASENAME "out-basename="
#define L_OPTSTR_OUT_BASENAME 13

#define OPTSTR_EXT_FST "ext-fst="
#define L_OPTSTR_EXT_FST 8
#define OPTSTR_EXT_POSITION "input-ext="
#define L_OPTSTR_EXT_POSITION 10
#define OPTSTR_OUT_EXT_FST "out-ext-fst="
#define L_OPTSTR_OUT_EXT_FST 12
#define OPTSTR_OUT_EXT_POSITION "out-ext-position="
#define L_OPTSTR_OUT_EXT_POSITION 17
#define OPTSTR_OUT_EXT_MIXED "out-ext-mixed="
#define L_OPTSTR_OUT_EXT_MIXED 14
#define OPTSTR_EXT_SEQINFO "ext-seqinfo="
#define L_OPTSTR_EXT_SEQINFO 12
#define OPTSTR_OUT_EXT_SEQINFO "out-ext-seqinfo="
#define L_OPTSTR_OUT_EXT_SEQINFO 16

  argv0 = *argv++;
  argc--;

  m_init_sort(DEFAULT_SORT);
  init_compare_func_array(DEFAULT_SORTKEY);
  m_init_isort(DEFAULT_ISORT);
  init_sort_internal_compare(DEFAULT_ISORTKEY);

  while (argc > 0 && *argv[0] == '-') {
    carg = *argv++;
    argc--;
    if (strcmp(carg, "--") == 0) {
      break; /* while */
    }
    if (strncmp(carg, "--", 2) == 0) {
      carg += 2;
    } else if (strncmp(carg, "-", 1) == 0) {
      carg += 1;
    }
    if (strcmp(carg, "DNA") == 0) {
      dnamode = 1; /* DNA mode */
    } else if (strcmp(carg, "mix") == 0 || strcmp(carg, "mixed") == 0) {
      mixed_i = 1;
    } else if (strcmp(carg, "verbose") == 0) {
      Verbose = 1; /* verbose mode */
    } else if (strcmp(carg, "more-verbose") == 0) {
      Verbose = 1; /* verbose mode */
      More_Verbose = 1; /* more verbose mode */
    } else if (strcmp(carg, "help") == 0 || strcmp(carg, "h") == 0) {
      print_usage(argv0, stdout);
      exit(0);
    } else if (strcmp(carg, "force-overwrite") == 0) {
      force_overwrite = 1; /* force overwrite output files */
    } else if (strcmp(carg, OPTSTR_OUT_STDOUT) == 0) {
      out_stdout = 1; /* force output stdout */
    } else if (strncmp(carg, OPTSTR_OUT_EXT, L_OPTSTR_OUT_EXT) == 0) {
      carg += L_OPTSTR_OUT_EXT;
      ext_sorted = carg;
    } else if (strncmp(carg, OPTSTR_OUT_MIXED, L_OPTSTR_OUT_MIXED) == 0) {
      carg += L_OPTSTR_OUT_MIXED;
      out_mode_mixed = select_yes_no_auto(carg);
    } else if (strncmp(carg, OPTSTR_OUT_PATH, L_OPTSTR_OUT_PATH) == 0) {
      carg += L_OPTSTR_OUT_PATH;
      output_path = carg;
    } else if (strncmp(carg, OPTSTR_OUT_BASENAME, L_OPTSTR_OUT_BASENAME) == 0) {
      carg += L_OPTSTR_OUT_BASENAME;
      out_base = carg;
    } else if (strncmp(carg, OPTSTR_SORTKEY, L_OPTSTR_SORTKEY) == 0) {
      carg += L_OPTSTR_SORTKEY;
      init_compare_func_array(carg);
    } else if (strncmp(carg, OPTSTR_SORT, L_OPTSTR_SORT) == 0) {
      carg += L_OPTSTR_SORT;
      m_init_sort(carg);
    } else if (strncmp(carg, OPTSTR_ISORTKEY, L_OPTSTR_ISORTKEY) == 0) {
      carg += L_OPTSTR_ISORTKEY;
      init_sort_internal_compare(carg);
    } else if (strncmp(carg, OPTSTR_ISORT, L_OPTSTR_ISORT) == 0) {
      carg += L_OPTSTR_ISORT;
      m_init_isort(carg);
    } else if (strncmp(carg, OPTSTR_OUT_EXT_FST, L_OPTSTR_OUT_EXT_FST) == 0) {
      carg += L_OPTSTR_OUT_EXT_FST;
      out_ext_fst = carg;
    } else if (strncmp(carg, OPTSTR_OUT_EXT_POSITION, L_OPTSTR_OUT_EXT_POSITION) == 0) {
      carg += L_OPTSTR_OUT_EXT_POSITION;
      out_ext_pos = carg;
    } else if (strncmp(carg, OPTSTR_OUT_EXT_MIXED, L_OPTSTR_OUT_EXT_MIXED) == 0) {
      carg += L_OPTSTR_OUT_EXT_MIXED;
      out_ext_mixed = carg;
    } else if (strncmp(carg, OPTSTR_EXT_FST, L_OPTSTR_EXT_FST) == 0) {
      carg += L_OPTSTR_EXT_FST;
      in_ext_fst = carg;
    } else if (strncmp(carg, OPTSTR_EXT_POSITION, L_OPTSTR_EXT_POSITION) == 0) {
      carg += L_OPTSTR_EXT_POSITION;
      in_ext_pos = carg;
    } else if (strncmp(carg, OPTSTR_EXT_SEQINFO, L_OPTSTR_EXT_SEQINFO) == 0) {
      carg += L_OPTSTR_EXT_SEQINFO;
      in_ext_seqinfo = carg;
    } else if (strncmp(carg, OPTSTR_OUT_EXT_SEQINFO, L_OPTSTR_OUT_EXT_SEQINFO) == 0) {
      carg += L_OPTSTR_OUT_EXT_SEQINFO;
      out_ext_seqinfo = carg;
    } else {
      fprintf(stderr, "Warning: ignored unknown option %s\n", carg);
    } /* if */
  } /* while */

  if (argc < 1) {
    if (mixed_i) {
      fn0 = NULL;
    } else {
      print_usage(argv0, stdout);
      exit(1);
    }
  } else {
    fn0 = *argv;
  }

  l_sorted = strlen(ext_sorted);

  switch (out_mode_mixed) {
  case 1: /* yes */
    mixed_o = 1;
    break;
  case -1: /* no */
    mixed_o = 0;
    break;
  case 0: /* auto */
  default:
    if (mixed_i) mixed_o = 1;
    break;
  } /* switch */

  if (mixed_o && mixed_i == 0) {
    out_ext_pos = out_ext_mixed;
    if (out_ext_pos == NULL) out_ext_pos = DEFAULT_EXT_MIXED;
  }
  if (mixed_i && mixed_o == 0) {
    if (out_ext_pos == NULL) out_ext_pos = DEFAULT_EXT_POSITION;
  }

  if (fn0 == NULL) {
    if (Verbose) fprintf(stderr, "input(mixed) from stdin...\n");
    mixed_i = 1;
    fpi_pos = stdin;
    fpi_fst = NULL;
  } else {
    l_fn0 = strlen(fn0);
    fpi_pos = fopen(fn0, FOPEN_MODE_R);
    if (Verbose) {
      if (mixed_i) fprintf(stderr, "input file (mixed): %s\n", fn0);
      else fprintf(stderr, "input file (position): %s\n", fn0);
    }
    if (fpi_pos == NULL) {
      fprintf(stderr, "Error: cannot open file %s\n", fn0);
      exit(1);
    }

    fnb = get_string_without_suffix_MM(fn0, in_ext_pos);
    if (fnb != NULL) {
      /* suffix of fn0 is in_ext_pos */
      l_fnb = strlen(fnb);
      if (out_ext_pos == NULL) out_ext_pos = in_ext_pos;
    } else {
      fnb = my_malloc(l_fn0 + 1);
      strncpy(fnb, fn0, l_fn0 + 1);
      l_fnb = l_fn0;
      if (out_ext_pos == NULL) out_ext_pos = "";
    }
    fni_fst = add_three_strings_MM(fnb, l_fnb, "", 0,
				   in_ext_fst, strlen(in_ext_fst));
    fni_seqinfo = add_three_strings_MM(fnb, l_fnb, "", 0,
				   in_ext_seqinfo, strlen(in_ext_seqinfo));
    if (mixed_i == 0) {
      fpi_fst = fopen(fni_fst, FOPEN_MODE_R);
      if (fpi_fst == NULL) {
	fprintf(stderr, "Error: cannot open file %s\n", fni_fst);
	r = 1;
	goto ERR_EXIT;
      }
      if (Verbose) fprintf(stderr, "input file (fst): %s\n", fni_fst);
    } /* if (mixed_i == 0) */
  } /* if (fn0 == NULL) ... else ... */

  if (out_base == NULL) {
    out_base = fnb;
    l_out_base = l_fnb;
  } else {
    l_out_base = strlen(out_base);
  }

  if (out_base == NULL) {
    out_stdout = 1;
  } else {
    if (out_ext_pos == NULL) out_ext_pos = in_ext_pos;
    if (out_ext_fst == NULL) out_ext_fst = in_ext_fst;
    if (out_ext_seqinfo == NULL) out_ext_seqinfo = in_ext_seqinfo;

    fno_pos = make_output_filename_MM(output_path, out_base, l_out_base,
				      ext_sorted, l_sorted,
				      out_ext_pos, strlen(out_ext_pos));
    fno_fst = make_output_filename_MM(output_path, out_base, l_out_base,
				      ext_sorted, l_sorted,
				      out_ext_fst, strlen(out_ext_fst));
    fno_seqinfo = make_output_filename_MM(output_path, out_base, l_out_base,
					  ext_sorted, l_sorted,
					  out_ext_seqinfo,
					  strlen(out_ext_seqinfo));
  }

  if (out_stdout) {
    if (Verbose) fprintf(stderr, "output(mixed) to stdout...\n");
    fpo_pos = stdout;
    fpo_fst = NULL;
    mixed_o = 1;
  } else {
    fpo_pos = write_open_file_with_check(fno_pos, force_overwrite);
    if (fpo_pos == NULL) {
      r = 1;
      goto ERR_EXIT;
    }
    if (Verbose) {
      if (mixed_o) fprintf(stderr, "output file (mixed): %s\n", fno_pos);
      else fprintf(stderr, "output file (position): %s\n", fno_pos);
    }

    if (mixed_o == 0) {
      fpo_fst = write_open_file_with_check(fno_fst, force_overwrite);
      if (fpo_fst == NULL) {
	r = 1;
	goto ERR_EXIT;
      }
      if (Verbose) fprintf(stderr, "output file (fst): %s\n", fno_fst);
    } /* if (mixed_o == 0) */
  } /* if (out_stdout) */

  if (mixed_o && mixed_i == 0) {
    if (fni_seqinfo == NULL) {
      fprintf(stderr, "Error: cannot determine input seqinfo filename.\n");
      r = 2;
      goto ERR_EXIT;
    }
    fpi_seqinfo = fopen(fni_seqinfo, FOPEN_MODE_R);
    if (fpi_seqinfo == NULL) {
      fprintf(stderr, "Error: cannot open file %s\n", fni_seqinfo);
      r = 1;
      goto ERR_EXIT;
    }
    if (Verbose) fprintf(stderr, "input file (seqinfo): %s\n", fni_seqinfo);
  } /* if */
  if (mixed_i && mixed_o == 0) {
    if (fno_seqinfo == NULL) {
      fprintf(stderr, "Error: cannot determine output seqinfo filename.\n");
      r = 2;
      goto ERR_EXIT;
    }
    fpo_seqinfo = write_open_file_with_check(fno_seqinfo, force_overwrite);
    if (fpo_seqinfo == NULL) {
      r = 1;
      goto ERR_EXIT;
    }
    if (Verbose) fprintf(stderr, "output file (seqinfo): %s\n", fno_seqinfo);
  } /* if */

  r = main2(dnamode, mixed_i, mixed_o, fpi_pos, fpo_pos, fpi_fst, fpo_fst,
	    fpi_seqinfo, fpo_seqinfo);

 ERR_EXIT:
  fclose_with_check(fpi_pos);
  fclose_with_check(fpi_fst);
  fclose_with_check(fpo_pos);
  fclose_with_check(fpo_fst);
  fclose_with_check(fpi_seqinfo);
  fclose_with_check(fpo_seqinfo);

  if (fnb != NULL) my_free(fnb);
  if (fni_fst != NULL) my_free(fni_fst);
  if (fno_pos != NULL) my_free(fno_pos);
  if (fno_fst != NULL) my_free(fno_fst);
  if (fni_seqinfo != NULL) my_free(fni_seqinfo);
  if (fno_seqinfo != NULL) my_free(fno_seqinfo);
  exit(r);
} /* end of func */

#define MAGIC_SEPARATE "% separate"
#define L_MAGIC_SEPARATE 10
#define MAGIC_MIXED "% mixed"
#define L_MAGIC_MIXED 7

int replace_mixed_separate(char *buf, size_t sizeof_buf,
 int mixed_i, int mixed_o)
{
  char *tmpstr;
  int r = 0;

  if (mixed_i && mixed_o == 0) {
    /* mixed -> separate */
    if (strncmp(buf, MAGIC_MIXED, L_MAGIC_MIXED) == 0) {
      tmpstr = my_malloc(sizeof_buf);
      snprintf(tmpstr, sizeof_buf, "%s%s",
	       MAGIC_SEPARATE, buf + L_MAGIC_MIXED);
      strncpy(buf, tmpstr, sizeof_buf);
      my_free(tmpstr);
      r = 1;
    } /* if */
  } else if (mixed_o && mixed_i == 0) {
    /* separate -> mixed */
    if (strncmp(buf, MAGIC_SEPARATE, L_MAGIC_SEPARATE) == 0) {
      tmpstr = my_malloc(sizeof_buf);
      snprintf(tmpstr, sizeof_buf, "%s%s",
	       MAGIC_MIXED, buf + L_MAGIC_SEPARATE);
      strncpy(buf, tmpstr, sizeof_buf);
      my_free(tmpstr);
      r = 1;
    } /* if */
  } /* if ... else ... */

  return r;
} /* end of func */

int main2(int dnamode, int mixed_i, int mixed_o,
 FILE *fpi_pos, FILE *fpo_pos, FILE *fpi_fst, FILE *fpo_fst,
 FILE *fpi_seqinfo, FILE *fpo_seqinfo)
{
  int c;
  char buf0[LINE_BUF];
  char *next_line = NULL;
  countN_t na = 0;
  struct position_data **spda = NULL;
  struct firstline_data fld_pos, fld_tbl;
  struct lsc_table *lsct = NULL;
  countN_t r;
  FILE *fpo_tmp;
  int mr_ch = 0;

  init_firstline_data(&fld_pos);
  init_firstline_data(&fld_tbl);

  if (fpo_seqinfo != NULL) fpo_tmp = fpo_seqinfo;
  else fpo_tmp = fpo_pos;

  if (fpi_seqinfo != NULL) {
    while (fgets(buf0, sizeof(buf0), fpi_seqinfo) != NULL) {
      if (mr_ch == 0) {
	mr_ch = replace_mixed_separate(buf0, sizeof(buf0), mixed_i, mixed_o);
      } /* if (mr_ch == 0) */
      fprintf(fpo_tmp, "%s", buf0);
    } /* while */
  } /* if */

  r = 0;
  while (1) {
    if (fgets(buf0, sizeof(buf0), fpi_pos) == NULL) goto ERR_EOF;
    r += get_firstline_data(buf0, &fld_pos);
    if (r) {
      fpo_tmp = fpo_pos;
    } else {
      if (mr_ch == 0) {
	mr_ch = replace_mixed_separate(buf0, sizeof(buf0), mixed_i, mixed_o);
      } /* if (mr_ch == 0) */
    } /* if (r) ... else ... */
    if (buf0[0] == 'n') break;
    fprintf(fpo_tmp, "%s", buf0);
  } /* while */
  if (r == 0) {
    fprintf(stderr,   "Warning: cannnot get header of data.\n");
    if (dnamode) {
      fprintf(stderr, "         option -DNA ignored\n");
      dnamode = 0;
    }
  } else if (Verbose) {
    print_firstline_data(stderr, &fld_pos);
  }

  if (Verbose) fprintf(stderr, "reading all position/fasta data...\n");
  spda = read_all_position_data(&na, mixed_i, fpi_pos, fpi_fst,
				buf0, &next_line);
  if (spda == NULL) goto ERR_READ;
  if (Verbose) fprintf(stderr, "sort all position/fasta data...\n");
  sort_all_position_data(spda, na);

  if (dnamode) {
    delete_equal_DNA_data(spda, na);
    na = compact_arrayof_position_data(spda, na);
    lsct = make_lsc_table(spda, na, spda[0]->dL);
  }

  if (Verbose) fprintf(stderr, "sort step 2 (if needed)...\n");
  sort_all_position_data_step2(spda, na);

  if (Verbose) {
    if (mixed_o) fprintf(stderr, "output all data (mixed)...\n");
    else fprintf(stderr, "output all position data...\n");
  }
  print_all_position_data(mixed_o, fpo_pos, spda, na);

  if (mixed_o == 0) {
    if (Verbose) fprintf(stderr, "output all fasta data...\n");
    print_all_fasta_data(fpo_fst, spda, na);
  }

  if (dnamode) {
    if (next_line != NULL) {
      if ((r = get_firstline_data(next_line, &fld_tbl)) == 0) {
	fprintf(fpo_pos, "%s", next_line);
      }
      my_free(next_line);
      buf0[0] = '\0';
    }
    while (r == 0) {
      fprintf(fpo_pos, "%s", buf0);
      if (fgets(buf0, sizeof(buf0), fpi_pos) == NULL) goto ERR_EOF;
      r = get_firstline_data(buf0, &fld_tbl);
    }
    swap_firstline_data_c1(&fld_pos, &fld_tbl);
    print_firstline_data(fpo_pos, &fld_tbl);
    if (Verbose) print_firstline_data(stderr, &fld_tbl);
    print_lsc_table(fpo_pos, lsct);
    free_lsc_table(lsct);
    /* dummy read */
    while (fgets(buf0, sizeof(buf0), fpi_pos) != NULL) {
      if (buf0[0] == '#' || buf0[0] == '%') {
	fprintf(fpo_pos, "%s", buf0);
	break; /* while */
      }
    } /* while */
  } else {
    if (next_line != NULL) {
      fprintf(fpo_pos, "%s", next_line);
      my_free(next_line);
    }
  }
  while ((c = getc(fpi_pos)) != EOF) putc(c, fpo_pos);

  free_firstline_data_variables(&fld_pos);
  free_firstline_data_variables(&fld_tbl);
  free_all_position_data(spda, na);
  if (Verbose) fprintf(stderr, "done.\n");
  return 0;

 ERR_EOF:
  fprintf(stderr, "Unexpected EOF\n");
  return 1;

 ERR_READ:
  fprintf(stderr, "Read error???\n");
  return 2;
} /* end of func */

void print_all_fasta_data(FILE *fpo_fst, struct position_data **spda,
 countN_t na)
{
  countN_t i;
  struct position_data *spd;

  for (i = 0; i < na; i++) {
    spd = spda[i];
    if (spd->seq != NULL) {
      print_a_fasta(fpo_fst, spd->seq, g_opt_fastawidth);
    } else {
      fprintf(stderr, "Warning: not found sequence of %s\n", spd->name);
      fprintf(fpo_fst, "> \n");
      fprintf(fpo_fst, " \n");
    }
  } /* for */
  return;
} /* end of func */

void print_a_fasta(FILE *fpo_fst, SEQUENCE *s, int width)
{
  int c = 0;
  seqLen_t len;
  char *str;

  len = seq_getsize(s);
  str = seq_getseq(s);

  fprintf(fpo_fst, "%s\n", seq_getname(s));
  while (len-- > 0) {
    fputc(*str++, fpo_fst);
    if (++c >= width) {
      c = 0;
      fputc('\n', fpo_fst);
    }
  }
  if (c != 0) fputc('\n', fpo_fst);

  return;
} /* end of func */

void print_all_position_data(int mixed_o, FILE *fpo_pos,
 struct position_data **spda, countN_t na)
{
  countN_t i;

  for (i = 0; i < na; i++) {
    print_a_position_data(mixed_o, fpo_pos, spda[i]);
  }
  return;
} /* end of func */

void print_a_position_data(int mixed_o, FILE *fpo_pos,
 struct position_data *spd)
{
  countN_t dn;
  countN_t i;
  struct position_single *sps = NULL;

  dn = spd->dn;

  fprintf(fpo_pos, "n=%lu L=%lu v=%lu ---- %s",
	  (unsigned long)dn, (unsigned long)(spd->dL),
	  (unsigned long)(spd->dv), spd->name);
  if (spd->rest != NULL) fprintf(fpo_pos, "%s\n", spd->rest);
  else fprintf(fpo_pos, "\n");

  for (sps = spd->darray, i = 0; i < dn; i++, sps++) {
    switch (sps->cflag) {
    case 0:
      fprintf(fpo_pos, "%lu %lu", (unsigned long)(sps->seqno),
	      (unsigned long)(sps->pos));
      break;
    default:
      fprintf(fpo_pos, "%lu %c%lu", (unsigned long)(sps->seqno),
	      sps->cflag, (unsigned long)(sps->pos));
      if (sps->pos_c != 0) {
	fprintf(fpo_pos, " ( %lu%c %lu )", 
		(unsigned long)(sps->seqno),
		sps->cflag, (unsigned long)(sps->pos_c));
      }
      break;
    } /* switch */
    if (sps->group != 0) {
      fprintf(fpo_pos, " ( g=%lu )", (unsigned long)(sps->group));
    }
    fprintf(fpo_pos, "\n");
  } /* for */

  if (mixed_o) {
    if (spd->seq != NULL) {
      print_a_fasta(fpo_pos, spd->seq, g_opt_fastawidth);
    } else {
      fprintf(stderr, "Warning: not found sequence of %s\n", spd->name);
      fprintf(fpo_pos, "> \n");
    }
    fprintf(fpo_pos, "> \n");
  } /* if (mixed_o) */
  return;
} /* end of func */

struct lsc_table *make_lsc_table(struct position_data **spda, countN_t na,
 seqLen_t maxlen)
{
  countN_t i;
  struct lsc_table *lsct;
  struct lsc_table_single *lscs;

  lsct = my_calloc(1, sizeof(struct lsc_table));
  lsct->maxlen = maxlen;
  lscs = my_calloc(maxlen + 1, sizeof(struct lsc_table_single));
  lsct->lsc = lscs;

  for (i = 0; i < na; i++, spda++) {
    lscs[(**spda).dL].sort += 1;
    lscs[(**spda).dL].count += (**spda).dn;
  }

  return lsct;
} /* end of func */

void free_lsc_table(struct lsc_table *lsct)
{
  my_free(lsct->lsc);
  my_free(lsct);
  return;
} /* end of func */

void print_lsc_table(FILE *fpo, struct lsc_table *lsct)
{
  countN_t i;
  countN_t count;
  seqLen_t maxlen = lsct->maxlen;

  for (i = 1; i <= maxlen; i++) {
    count = lsct->lsc[i].count;
    if (count != 0) fprintf(fpo, "%lu\t%lu\t%lu\n", (unsigned long)i,
			    (unsigned long)(lsct->lsc[i].sort),
			    (unsigned long)count);
  } /* for */
  return;
} /* end of func */

void init_firstline_data(struct firstline_data *fld)
{
  fld->str[0] = NULL;
  fld->str[1] = NULL;
  return;
} /* end of func */

void free_firstline_data_variables(struct firstline_data *fld)
{
  my_free(fld->str[0]);
  my_free(fld->str[1]);
  return;
} /* end of func */

void swap_firstline_data_c1(struct firstline_data *f0,
 struct firstline_data *f1)
{
  char *f0str1;

  f0str1 = f0->str[1];
  f0->str[1] = f1->str[1];
  f1->str[1] = f0str1;
  return;
} /* end of func */

void print_firstline_data(FILE *fpo, struct firstline_data *fld)
{
  fprintf(fpo, "%s%s\n", fld->str[0], fld->str[1]);
  return;
} /* end of func */

int get_firstline_data(char *buf, struct firstline_data *fld)
/* if get correctly, return non-zero, otherwise return 0 */
{
  static const char *firstline_magic_strings[] = {
    "# location",
    "# len",
    NULL
  };

  const char **a;
  char *p;
  ptrdiff_t le;
  char *str1;

  if (buf[0] != '#') return 0;

  for (a = firstline_magic_strings; *a != NULL; a++) {
    if (strncmp(*a, buf, strlen(*a)) == 0) break;
  }
  if (*a == NULL) return 0;

  p = strchr(buf, '(');
  if (p == NULL) return 0;
  str1 = chomp_dup_string(p);
  if (str1 == NULL) return 0;
  fld->str[1] = str1;

  le = (p - buf) + 1;
  fld->str[0] = my_malloc(le);
  strncpy(fld->str[0], buf, le);
  fld->str[0][le - 1] = '\0';

  return 1;
} /* end of func */

#define m_get_sign_only(d) (((d) > 0) ? 1 : (((d) < 0) ? -1 : 0))

int compare_a_position_data_cflag(const struct position_data *spd1,
 const struct position_data *spd2)
/* spd1's proportion of 'c' is +:greater; 0:eaqual; -:shorter; than spd2 */
{
  countN_t i;
  LONG_LONG d;
  countN_t dn;

  dn = (*spd1).dn;
  if (dn != (*spd2).dn) return 0; /* cannot estimate */

  d = 0;
  for (i = 0; i < dn; i++) {
    d += ((LONG_LONG)(*spd1).darray[i].cflag - (*spd2).darray[i].cflag);
  }
  if (d != 0) return m_get_sign_only(d);

  d = 0;
  for (i = 0; i < dn; i++) {
    d += ((LONG_LONG)(*spd1).darray[i].cflag * (*spd1).darray[i].pos 
	  -(LONG_LONG)(*spd2).darray[i].cflag * (*spd1).darray[i].pos);
  }
  if (d != 0) return m_get_sign_only(d);

  d = 0;
  for (i = 0; i < dn; i++) {
    d += ((LONG_LONG)(*spd1).darray[i].cflag * (*spd1).darray[i].seqno 
	  -(LONG_LONG)(*spd2).darray[i].cflag * (*spd1).darray[i].seqno);
  }
  if (d != 0) return m_get_sign_only(d);

  return 0; /* perhaps completely equal */
} /* end of func */

int compare_a_position_data_n(const struct position_data *spd1,
 const struct position_data *spd2)
{
  LONG_LONG d;

  d = (LONG_LONG)(*spd1).dn - (*spd2).dn;
  return m_get_sign_only(d);
} /* end of func */

int compare_a_position_data_L(const struct position_data *spd1,
 const struct position_data *spd2)
{
  LONG_LONG d;

  d = (LONG_LONG)(*spd1).dL - (*spd2).dL;
  return m_get_sign_only(d);
} /* end of func */

int compare_a_position_data_v(const struct position_data *spd1,
 const struct position_data *spd2)
{
  LONG_LONG d;

  d = (LONG_LONG)(*spd1).dv - (*spd2).dv;
  return m_get_sign_only(d);
} /* end of func */

int compare_a_position_data_seqno(const struct position_data *spd1,
 const struct position_data *spd2)
{
  countN_t i;
  LONG_LONG d;
  countN_t dn;

  if ((*spd1).dn != (*spd2).dn) return 0; /* cannot estimate */

  dn = (*spd1).dn; /* note that (*spd1).dn == (*spd2).dn now */
  for (i = 0; i < dn; i++) {
    d = (LONG_LONG)(*spd1).darray[i].seqno - (*spd2).darray[i].seqno;
    if (d != 0) return m_get_sign_only(d);
  }

  return 0; /* seems equal */
} /* end of func */

int compare_a_position_data_pos(const struct position_data *spd1,
 const struct position_data *spd2)
{
  countN_t i;
  LONG_LONG d;
  countN_t dn;

  if ((*spd1).dn != (*spd2).dn) return 0; /* cannot estimate */

  dn = (*spd1).dn; /* note that (*spd1).dn == (*spd2).dn now */
  for (i = 0; i < dn; i++) {
    d = (LONG_LONG)(*spd1).darray[i].pos - (*spd2).darray[i].pos;
    if (d != 0) return m_get_sign_only(d);
  }

  return 0; /* seems equal */
} /* end of func */

int compare_a_position_data_keep_order(const struct position_data *spd1,
 const struct position_data *spd2)
{
  LONG_LONG d;

  d = (LONG_LONG)(*spd1).order - (*spd2).order;
  return m_get_sign_only(d);
} /* end of func */

int compare_a_position_data_null(const struct position_data *spd1,
 const struct position_data *spd2)
{
  return 0;
} /* end of func */

static int select_order_from_a_char(int chr_order, int *order)
{
  switch(chr_order) {
  case '+':
    *order = 1;
    break;
  case '-':
    *order = -1;
    break;
  case '0':
    *order = 0;
    break;
  default:
    *order = 0;
    return 1;
    break;
  }
  return 0;
} /* end of func */

void init_sort_order_main(const char *str, int *order)
{
  int tmp;
  int r;

  r = select_order_from_a_char(*str, &tmp);
  if (r != 0) {
    fprintf(stderr, "Error: \"%s\": cannot understand.\n", str);
    *order = 1;
  } else {
    *order = tmp;
  }
  return;
} /* end of func */

void init_compare_func_array(const char *str)
{
  int r;

  r = init_compare_func_array_main(str, N_COMPARE_FUNC_ARRAY - 1,
				   g_compare_func_array,
				   g_compare_func_array_order);

  if (r == 0) {
    g_sort_all_position_data_compare_order = 0;
  } else {
    init_compare_func_array_select('o', '+', &(g_compare_func_array[r]),
				   &(g_compare_func_array_order[r]));
    g_compare_func_array[r + 1] = NULL;
    g_compare_func_array_order[r + 1] = 0;
  }
  return;
} /* end of func */

static int init_compare_func_array_select(int chr_func, int chr_order,
 func_compare_spd *func, int *order)
{
  int r;
  
  switch (chr_func) {
  case 'n':
    *func = compare_a_position_data_n;
    break;
  case 'L':
    *func = compare_a_position_data_L;
    break;
  case 'v':
    *func = compare_a_position_data_v;
    break;
  case 's':
    *func = compare_a_position_data_seqno;
    break;
  case 'p':
    *func = compare_a_position_data_pos;
    break;
  case 'c':
    *func = compare_a_position_data_cflag;
    break;
  case 'o':
    *func = compare_a_position_data_keep_order;
    break;
  default:
    return 2;
    break;
  } /* switch */

  r = select_order_from_a_char(chr_order, order);
  if (r != 0) return 1;
  return 0;
} /* end of func */

int init_compare_func_array_main(const char *str, int func_max, 
func_compare_spd *funcarray, int *funcarray_order)
{
  int i, r;
  int chr_func, chr_order;
  func_compare_spd func;
  int order;
  int c;

  i = 0;
  while (i < func_max) {
    c = *str;
    if (c == '\0') break; /* while */
    chr_func = c;
    str++;
    c = *str;
    if (c == '\0') {
      fprintf(stderr, "Error: missing \'+\' or \'-\' or \'0\'.\n");
      break; /* while */
    }
    chr_order = c;
    str++;
    r = init_compare_func_array_select(chr_func, chr_order, &func, &order);
    switch (r) {
    case 0:
      if (order != 0) {
	funcarray[i] = func;
	funcarray_order[i] = order;
	i += 1;
      }
      break;
    case 1:
      fprintf(stderr, "Error: \'%c\': \'+\' or \'-\' or \'0\' required.\n", chr_order);
      break;
    case 2:
      fprintf(stderr, "Error: \'%c\': unknown sort key.\n", chr_func);
      break;
    default:
      fprintf(stderr, "Error: internal: unknown error\n");
      break;
    }
  } /* while */

  if (*str != '\0') {
    fprintf(stderr, "Warning: ignore \"%s\".\n", str);
  }
  funcarray[i] = NULL;
  funcarray_order[i] = 0;
  return i;
} /* end of func */

int diff_a_position_data_without_cflag(const struct position_data *spd1,
 const struct position_data *spd2)
{
  int d;

  d = compare_a_position_data_L(spd1, spd2);
  if (d != 0) return d;
  d = compare_a_position_data_v(spd1, spd2);
  if (d != 0) return d;
  d = compare_a_position_data_n(spd1, spd2);
  if (d != 0) return d;
  d = compare_a_position_data_seqno(spd1, spd2);
  if (d != 0) return d;
  d = compare_a_position_data_pos(spd1, spd2);
  if (d != 0) return d;

  return 0; /* seems equal */
} /* end of func */

int diff_a_position_data_only_cflag(const struct position_data *spd1,
 const struct position_data *spd2)
{
  return -compare_a_position_data_cflag(spd1, spd2);
} /* end of func */




static int sort_all_position_data_compare(const void *a1, const void *a2)
{
  int d;
  int i;
  struct position_data *spd1;
  struct position_data *spd2;
  func_compare_spd func;

  spd1 = *((struct position_data **)a1);
  spd2 = *((struct position_data **)a2);

  i = 0;
  while ((func = (g_compare_func_array[i])) != NULL) {
    d = func(spd1, spd2);
    if (d != 0) return d * g_compare_func_array_order[i] * g_sort_all_position_data_compare_order;
    i++;
  } /* while */

  return 0; /* perhaps completely equal */
} /* end of func */

void sort_all_position_data_step2(struct position_data **spda, countN_t na)
{
  countN_t i;

  if (g_sort_all_position_data_compare_order == 0) {
    if (g_sort_internal_position_data_compare_order != 0) {
      for (i = 0; i < na; i++) {
	sort_internal_position_data(spda[i]);
      } /* for */
    }
  } else { /* g_sort_all_position_data_compare_order != 0 */
    if (g_sort_internal_position_data_compare_order == 0) {
      for (i = 0; i < na; i++) {
	sort_internal_position_data_restore_order(spda[i]);
      } /* for */
    } else if (g_sort_internal_position_data_perfect == 0) {
      for (i = 0; i < na; i++) {
	sort_internal_position_data(spda[i]);
      } /* for */
    }
  } /* if (g_sort_all_position_data_compare_order == 0) ... else ... */
  return;
} /* end of func */

void sort_all_position_data(struct position_data **spda, countN_t na)
{
  countN_t i;

  if (g_sort_all_position_data_compare_order == 0) {
    if (g_sort_internal_position_data_compare_order != 0) {
      for (i = 0; i < na; i++) {
	sort_internal_position_data(spda[i]);
      } /* for */
    }
    return;
  } /* if ... */

  if (g_sort_internal_position_data_compare_order == 0 ||
      g_sort_internal_position_data_perfect == 0) {
    for (i = 0; i < na; i++) {
      sort_internal_position_data_default(spda[i]);
    } /* for */
  } else {
    for (i = 0; i < na; i++) {
      sort_internal_position_data(spda[i]);
    } /* for */
  } /* if ... else ... */

  qsort(spda, na, sizeof(struct position_data *),
	sort_all_position_data_compare);

  return;
} /* end of func */

int delete_equal_DNA_data(struct position_data **spda, countN_t na)
/* spda must be sorted!! */
{
  countN_t i;
  int r;
  int d;
  struct position_data *spd0 = NULL, *spd1 = NULL;
  SEQUENCE *s;

  for (i = 1, spd1 = spda[0]; i < na; i++) {
    spd0 = spd1;
    spd1 = spda[i];
    if (spd0 == NULL) continue; /* for */
    d = diff_a_position_data_without_cflag(spd0, spd1);
    if (d == 0) {
      if (spd0->seq == NULL || spd1->seq == NULL) {
	fprintf(stderr, "Warning: cannot get sequence; delete stopped.\n");
	return -1;
      }
      s = seq_open();
      r = seq_duplicate(spd0->seq, s);
      if (r != 0) {
	seq_close(s);
	fprintf(stderr, "Warning: cannot get memory; delete stopped\n");
	return -1;
      }
      seq_convertall_reverse(s, get_convert_table(CONVERT_COMPLEMENT_DNA));
      if (strcmp(seq_getseq(s), seq_getseq(spd1->seq)) != 0) {
	fprintf(stderr, "Warning: sequence data not match; not delete both\n");
      } else {
	d = diff_a_position_data_only_cflag(spd0, spd1);
	if (d > 0) {
	  if (More_Verbose) fprintf(stderr, "Delete %s (== %s)\n",
			       spd1->name, spd0->name);
	  free_struct_position_data(spd1);
	  spda[i] = NULL;
	} else if (d < 0) {
	  if (More_Verbose) fprintf(stderr, "Delete %s (== %s)\n",
			       spd0->name, spd1->name);
	  free_struct_position_data(spd0);
	  spda[i - 1] = NULL;
	} else { /* d == 0 */
	  fprintf(stderr, "Warning: position data completely equal??; not delete both.\n");
	} /* if (d) */
      } /* if (strcmp...) */
      seq_close(s);
    } /* if (d == 0) else */
  } /* for */

  return 0;
} /* end of func */

int compact_arrayof_position_data(struct position_data **spda, countN_t na)
{
  countN_t i, j;

  for (i = 0, j = 0; i < na; i++) {
    if (spda[i] != NULL) spda[j++] = spda[i];
  }

  return j;
} /* end of func */

void free_all_position_data(struct position_data **spda, countN_t na)
{
  countN_t i;
  for (i = 0; i < na; i++) {
    free_struct_position_data(spda[i]);
  }
  my_free(spda);
  return;
} /* end of func */

struct position_data **read_all_position_data(countN_t *ret_na, int mixed_i,
 FILE *fpi_pos, FILE *fpi_fst, char *first_line, char **next_line)
{
  char buf0[LINE_BUF];
  countN_t dn = 0;
  countN_t na = 0;
  countN_t i;
  size_t l;
  struct position_data *spd;
  struct spd_list {
    struct position_data *spd;
    struct spd_list *before;
  } *spdl = NULL, *spdl2 = NULL;
  struct position_data **spda = NULL;

  *next_line = NULL;

  if (first_line == NULL) {
    if (fgets(buf0, sizeof(buf0), fpi_pos) == NULL) goto ERR_EOF;
  } else {
    strncpy(buf0, first_line, sizeof(buf0));
    buf0[sizeof(buf0) - 1] = '\0';
  }
  
  while (buf0[0] == 'n') {
    spd = read_a_position_data(&dn, mixed_i, fpi_pos, fpi_fst, buf0);
    if (spd != NULL) {
      spdl2 = my_malloc(sizeof(struct spd_list));
      spdl2->spd = spd;
      spdl2->before = spdl;
      spdl = spdl2;
      na += 1;
    } else {
      fprintf(stderr, "read error (code %d)\n", dn);
    }
    if (fgets(buf0, sizeof(buf0), fpi_pos) == NULL) goto ERR_EOF;
  } /* while */

  l = strlen(buf0) + 1;
  *next_line = my_malloc(l);
  strncpy(*next_line, buf0, l);

  if (na <= 0) goto ERR_NODATA;

  spda = my_malloc(sizeof(struct position_data *) * na);
  for (i = na - 1; i >= 0; i--) {
    spda[i] = spdl->spd;
    spda[i]->order = i;
    spdl2 = spdl->before;
    my_free(spdl);
    spdl = spdl2;
  } /* for */

  *ret_na = na;
  return spda;

 ERR_EOF:
  *ret_na = -1;
  goto ABNORMAL_END;
 ERR_NODATA:
  *ret_na = -2;
  goto ABNORMAL_END;

 ABNORMAL_END:
  if (na > 0) for (i = na - 1; i >= 0; i--) {
    free_struct_position_data(spdl->spd);
    spdl2 = spdl->before;
    my_free(spdl);
    spdl = spdl2;
  }
  if (spda != NULL) my_free(spda);
  return NULL;
} /* end of func */

static int sort_internal_position_data_compare_restore_order(const void *a1,
 const void *a2)
{
  struct position_single *sps1, *sps2;
  LONG_LONG d;

  sps1 = (struct position_single *)a1;
  sps2 = (struct position_single *)a2;

  d = (LONG_LONG)sps1->order - sps2->order;
  return m_get_sign_only(d);
} /* end of func */

static int sort_internal_position_data_compare_default(const void *a1,
 const void *a2)
{
  struct position_single *sps1, *sps2;
  LONG_LONG d;

  sps1 = (struct position_single *)a1;
  sps2 = (struct position_single *)a2;

  d = (LONG_LONG)sps1->seqno - sps2->seqno;
  if (d != 0) return m_get_sign_only(d);
  d = (LONG_LONG)sps1->pos - sps2->pos;
  if (d != 0) return m_get_sign_only(d);
  d = (LONG_LONG)sps1->cflag - sps2->cflag;
  if (d != 0) return m_get_sign_only(d);
  d = (LONG_LONG)sps1->order - sps2->order;
  return m_get_sign_only(d);
} /* end of func */

static int sort_internal_position_data_compare(const void *a1, const void *a2)
{
  struct position_single *sps1, *sps2;
  LONG_LONG d[MAX_SORT_INTERNAL_COMPARE + 1];
  int i, j;

  sps1 = (struct position_single *)a1;
  sps2 = (struct position_single *)a2;

  d[SORT_INTERNAL_COMPARE_SEQNO] = (LONG_LONG)sps1->seqno - sps2->seqno;
  d[SORT_INTERNAL_COMPARE_POS] = (LONG_LONG)sps1->pos - sps2->pos;
  d[SORT_INTERNAL_COMPARE_CFLAG] = sps1->cflag - sps2->cflag;
  d[SORT_INTERNAL_COMPARE_ORDER] = (LONG_LONG)sps1->order - sps2->order;

  i = 0;
  while ((j = g_sort_internal_compare[i]) != SORT_INTERNAL_COMPARE_NULL) {
    if (d[j] != 0) return m_get_sign_only(d[j]) * g_sort_internal_compare_order[i];
    i++;
  } /* for */

  return 0;
} /* end of func */

static int init_sort_internal_compare_select(int chr_func, int chr_order,
 int *func, int *order)
{
  int r;
  
  switch (chr_func) {
  case 's':
    *func = SORT_INTERNAL_COMPARE_SEQNO;
    break;
  case 'p':
    *func = SORT_INTERNAL_COMPARE_POS;
    break;
  case 'c':
    *func = SORT_INTERNAL_COMPARE_CFLAG;
    break;
  case 'o':
    *func = SORT_INTERNAL_COMPARE_ORDER;
    break;
  default:
    *func = SORT_INTERNAL_COMPARE_NULL;
    return 2;
    break;
  } /* switch */

  r = select_order_from_a_char(chr_order, order);
  if (r != 0) return 1;
  return 0;
} /* end of func */

void init_sort_internal_compare(const char *str)
{
  int i, r;
  int chr_func, chr_order;
  int func;
  int order;
  int c;

  i = 0;
  while (i < N_SORT_INTERNAL_COMPARE) {
    c = *str;
    if (c == '\0') break; /* while */
    chr_func = c;
    str++;
    c = *str;
    if (c == '\0') {
      fprintf(stderr, "Error: missing \'+\' or \'-\' or \'0\'.\n");
      break; /* while */
    }
    chr_order = c;
    str++;
    r = init_sort_internal_compare_select(chr_func, chr_order, &func, &order);
    switch (r) {
    case 0:
      if (order != 0) {
	g_sort_internal_compare[i] = func;
	g_sort_internal_compare_order[i] = order;
	i += 1;
      }
      break;
    case 1:
      fprintf(stderr, "Error: \'%c\': \'+\' or \'-\' or \'0\' required.\n", chr_order);
      break;
    case 2:
      fprintf(stderr, "Error: \'%c\': unknown sort key.\n", chr_func);
      break;
    default:
      fprintf(stderr, "Error: internal: unknown error\n");
      break;
    }
  } /* while */

  if (*str != '\0') {
    fprintf(stderr, "Warning: ignore \"%s\".\n", str);
  }

  if (i == 0) {
    g_sort_internal_position_data_compare_order = 0;
  }

  g_sort_internal_compare[i] = SORT_INTERNAL_COMPARE_ORDER;
  g_sort_internal_compare_order[i] = 1;
  g_sort_internal_compare[i + 1] = SORT_INTERNAL_COMPARE_NULL;
  g_sort_internal_compare_order[i + 1] = 0;

  if ((g_sort_internal_compare[0] == SORT_INTERNAL_COMPARE_SEQNO &&
       g_sort_internal_compare[1] == SORT_INTERNAL_COMPARE_POS &&
       g_sort_internal_compare[2] == SORT_INTERNAL_COMPARE_CFLAG)
      ||
      (g_sort_internal_compare[0] == SORT_INTERNAL_COMPARE_POS &&
       g_sort_internal_compare[1] == SORT_INTERNAL_COMPARE_SEQNO &&
       g_sort_internal_compare[2] == SORT_INTERNAL_COMPARE_CFLAG)
      ) {
    g_sort_internal_position_data_perfect = 1;
  } else {
    g_sort_internal_position_data_perfect = 0;
  }

  return;
} /* end of func */

void sort_internal_position_data(struct position_data *spd)
{
  qsort(spd->darray, spd->dn, sizeof(struct position_single),
	sort_internal_position_data_compare);
  return;
} /* end of func */

void sort_internal_position_data_restore_order(struct position_data *spd)
{
  qsort(spd->darray, spd->dn, sizeof(struct position_single),
	sort_internal_position_data_compare_restore_order);
  return;
} /* end of func */

void sort_internal_position_data_default(struct position_data *spd)
{
  qsort(spd->darray, spd->dn, sizeof(struct position_single),
	sort_internal_position_data_compare_default);
  return;
} /* end of func */

static char *chomp_dup_string(const char *str)
/* cut last return code (\n) and copy string */
/* if result string length is 0, return NULL */
{
  size_t l;
  char *ret;

  l = strlen(str);
  if (l < 1) return NULL;

  /* remove last \n */
  if (str[l - 1] == '\n') l -= 1;
  if (l < 1) return NULL;

  ret = my_malloc(l + 1);
  strncpy(ret, str, l + 1);
  ret[l] = '\0';
  return ret;
} /* end of func */

static struct position_data *make_struct_position_data(countN_t dn,
 seqLen_t dL, countN_t dv, char *name_src, char *rest_src, FILE *fpi_fst)
{
  struct position_data *spd;
  size_t l;
  int r;

  spd = my_malloc(sizeof(struct position_data));
  spd->dn = dn;
  spd->dL = dL;
  spd->dv = dv;
  spd->darray = my_calloc(dn, sizeof(struct position_single));

  l = strlen(name_src) + 1;
  spd->name = my_malloc(l);
  strncpy(spd->name, name_src, l);

  spd->rest = NULL;
  if (rest_src != NULL) {
    spd->rest = chomp_dup_string(rest_src);
  }

  spd->seq = NULL;
  if (fpi_fst != NULL) {
    spd->seq = seq_open();
    r = get_fasta(spd->seq, fpi_fst);
    if (r != 0) {
      seq_close(spd->seq);
      spd->seq = NULL;
      fprintf(stderr, "Error: cannot get fasta sequence.\n");
    }
    if (seq_getsize(spd->seq) != spd->dL) {
      fprintf(stderr, "Warning: sequence length not match.\n");
    }
  } /* if */

  return spd;
} /* end of func */

void free_struct_position_data(struct position_data *spd)
{
  if (spd->seq != NULL) seq_close(spd->seq);
  if (spd->darray != NULL) my_free(spd->darray);
  if (spd->name != NULL) my_free(spd->name);
  if (spd->rest != NULL) my_free(spd->rest);
  my_free(spd);
  return;
} /* end of func */

struct position_data *read_a_position_data(countN_t *ret_n, int mixed_i,
 FILE *fpi_pos, FILE *fpi_fst, char *firstline)
{
  int r;
  int c;
  countN_t n;
  countN_t dn, dv;
  seqLen_t dl;
  countN_t strno, gid;
  int cflag;
  seqLen_t pos;
  seqLen_t pos_c;
  countN_t strno_c;
  unsigned long t0;
  int tn0;
  char tk0[LINE_BUF];
  char tk1[128];
  unsigned char *p;
  char *p2;
  struct position_data *spd = NULL;
  struct position_single *sps;

  if (firstline == NULL) {
    if (fgets(tk0, sizeof(tk0), fpi_pos) == NULL) goto ERR_EOF;
    p = tk0;
  } else {
    p = firstline;
  }

  dn = 0, dl = 0, dv = 0;
  while (1) {
    c = *p++;
    if (*p++ != '=') {
      /* skip current token */
      while (isspace(*p) == 0) p++;
      while (isspace(*p)) p++;
      break; /* while */
    }
    r = sscanf(p, "%lu%n", &t0, &tn0);
    if (r >= 0) {
      p += tn0;
      switch (c) {
      case 'n':
	dn = t0;
	break;
      case 'L':
	dl = t0;
	break;
      case 'v':
	dv = t0;
	break;
      default:
	break;
      }
    }
    tn0 = 0;
    r = sscanf(p, " %n", &tn0);
    p += tn0;
  } /* while (1) */

  /* reading "No_???" */
  r = sscanf(p, "%127s%n", tk1, &tn0);
  if (r <= 0) goto ERR_SYNTAX; /* data syntax error? */
  p += tn0;
  if (More_Verbose) fprintf(stderr, "%s\n", tk1); /* for DEBUG */

  /* allocate data area */
  if (dn <=0 || dl <= 0) goto ERR_SYNTAX; /* data syntax error? */
  spd = make_struct_position_data(dn, dl, dv, tk1, p, fpi_fst);
  sps = spd->darray;

  /* reading data */
  for (n = 0; n < dn; n++) {
    if (fgets(tk0, sizeof(tk0), fpi_pos) == NULL) goto ERR_EOF;
    p = tk0;
    r = sscanf(p, " %lu%n", &t0, &tn0);
    if (r <= 0) goto ERR_SYNTAX; /* data syntax error? */
    p += tn0;
    strno = t0;
    r = sscanf(p, " %n", &tn0);
    p += tn0;
    if (*p == 'c') {
      cflag = *p; 
      p++;
    } else {
      cflag = 0;
    }
    r = sscanf(p, "%lu%n", &t0, &tn0);
    if (r <= 0) goto ERR_SYNTAX; /* data syntax error? */
    p += tn0;
    pos = t0;
    r = sscanf(p, " %n", &tn0);
    p += tn0;

    /* read "( ?c ????" */
    pos_c = 0;
    if (*p == '(') {
      p++;
    }
    r = sscanf(p, " %lu%n", &t0, &tn0);
    if (r > 0) do {
      strno_c = t0;
      p += tn0;
      if (strno_c != strno) {
	fprintf(stderr, "Warning: sequence No. not match.\n");
	break; /* do */
      }
      if (*p != 'c') {
	fprintf(stderr, "Warning: unknown data syntax?\n");
	break; /* do */
      } else p++;
      r = sscanf(p, " %lu%n", &t0, &tn0);
      if (r <= 0) {
	fprintf(stderr, "Warning: unknown data syntax?\n");
	break; /* do */
      } else {
	p += tn0;
	pos_c = t0;
      }
    } while (0); /* if (r > 0) do */

    /* search "g=?" */
    p2 = strstr(p, "g=");
    if (p2 != NULL) {
      p2 += 2; /* strlen("g=") */
      r = sscanf(p2, "%lu%n", &t0, &tn0);
      if (r >= 1) {
	p2 += tn0;
	gid = t0;
      } else gid = 0;
    } else gid = 0;

    sps->seqno = strno;
    sps->pos = pos;
    sps->cflag = cflag;
    sps->group = gid;
    sps->pos_c = pos_c;
    sps->order = n;
    sps++;
  } /* for (n) */

  /* when mixed, read fasta sequence from fpi_pos */
  if (mixed_i) {
    spd->seq = seq_open();
    r = get_fasta(spd->seq, fpi_pos);
    if (r != 0) {
      seq_close(spd->seq);
      spd->seq = NULL;
      fprintf(stderr, "Error: cannot get fasta sequence.\n");
      goto ERR_EOF;
    }
    if (seq_getsize(spd->seq) != spd->dL) {
      fprintf(stderr, "Warning: sequence length not match.\n");
    }
    /* dummy read last '> ' line */
    if (fgets(tk0, sizeof(tk0), fpi_pos) == NULL) goto ERR_EOF;
  } /* if (mixed_i) */

  /* normal ending */
  *ret_n = dn;
  return spd;

 ERR_SYNTAX:
  if (spd != NULL) free_struct_position_data(spd);
  *ret_n = -2; /* syntax error */
  return NULL;
 ERR_EOF:
  if (spd != NULL) free_struct_position_data(spd);
  *ret_n = -1; /* unexpected EOF */
  return NULL;
} /* end of func */

