/* locate -- search databases for filenames that match patterns
   Copyright (C) 1994 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */

/* Usage: locate [options] pattern...

   Scan a pathname list for the full pathname of a file, given only
   a piece of the name (possibly containing shell globbing metacharacters).
   The list has been processed with front-compression, which reduces
   the list size by a factor of 4-5.
   Recognizes two database formats, old and new.  The old format is
   bigram coded, which reduces space by a further 20-25% and uses the
   following encoding of the database bytes:

   0-28		likeliest differential counts + offset (14) to make nonnegative
   30		escape code for out-of-range count to follow in next halfword
   128-255 	bigram codes (the 128 most common, as determined by `updatedb')
   32-127  	single character (printable) ASCII remainder

   Uses a novel two-tiered string search technique:

   First, match a metacharacter-free subpattern and a partial pathname
   BACKWARDS to avoid full expansion of the pathname list.
   The time savings is 40-50% over forward matching, which cannot efficiently
   handle overlapped search patterns and compressed path remainders.

   Then, match the actual shell glob-style regular expression (if in this form)
   against the candidate pathnames using the slower shell filename
   matching routines.

   Described more fully in Usenix ;login:, Vol 8, No 1,
   February/March, 1983, p. 8.

   Written by James A. Woods <jwoods@adobe.com>.
   Modified by David MacKenzie <djm@gnu.ai.mit.edu>.  */

#include <config.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <time.h>
#include <fnmatch.h>
#include <getopt.h>

#define NDEBUG
#include <assert.h>

#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
#include <string.h>
#else
#include <strings.h>
#define strchr index
#endif

#ifdef STDC_HEADERS
#include <stdlib.h>
#else
char *getenv ();
#endif

#ifdef STDC_HEADERS
#include <errno.h>
#include <stdlib.h>
#else
extern int errno;
#endif

#include "locatedb.h"

typedef enum {false, true} boolean;

/* Warn if a database is older than this.  8 days allows for a weekly
   update that takes up to a day to perform.  */
#define WARN_SECONDS (60 * 60 * 24 * 8)

/* Printable version of WARN_SECONDS.  */
#define WARN_MESSAGE "8 days"

char *next_element ();
char *xmalloc ();
char *xrealloc ();
void error ();

/* Read in a 16-bit int, high byte first (network byte order).  */

static int
get_short (fp)
     FILE *fp;
{
  register short x;

  x = fgetc (fp);
  return (x << 8) | (fgetc (fp) & 0xff);
}

/* Return a pointer to the last character in a static copy of the last
   glob-free subpattern in NAME,
   with '\0' prepended for a fast backwards pre-match.  */

static char *
last_literal_end (name)
     char *name;
{
  static char *globfree = NULL;	/* A copy of the subpattern in NAME.  */
  static size_t gfalloc = 0;	/* Bytes allocated for `globfree'.  */
  register char *subp;		/* Return value.  */
  register char *p;		/* Search location in NAME.  */

  /* Find the end of the subpattern.
     Skip trailing metacharacters and [] ranges. */
  for (p = name + strlen (name) - 1; p >= name && strchr ("*?]", *p) != NULL;
       p--)
    {
      if (*p == ']')
	while (p >= name && *p != '[')
	  p--;
    }
  if (p < name)
    p = name;

  if (p - name + 3 > gfalloc)
    {
      gfalloc = p - name + 3 + 64; /* Room to grow.  */
      globfree = xrealloc (globfree, gfalloc);
    }
  subp = globfree;
  *subp++ = '\0';

  /* If the pattern has only metacharacters, make every path match the
     subpattern, so it gets checked the slow way.  */
  if (p == name && strchr ("?*[]", *p) != NULL)
    *subp++ = '/';
  else
    {
      char *endmark;
      /* Find the start of the metacharacter-free subpattern.  */
      for (endmark = p; p >= name && strchr ("]*?", *p) == NULL; p--)
	;
      /* Copy the subpattern into globfree.  */
      for (++p; p <= endmark; )
	*subp++ = *p++;
    }
  *subp-- = '\0';		/* Null terminate, though it's not needed.  */

  return subp;
}


#if 0
/* thayer: cut because it ran 50% slower than original code */
/* Compare two characters.  If ignore_case is true then 'a' == 'A' */
static int 
char_case_cmp(a, b, ignore_case)
     char a, b;
     int ignore_case;
{
  return (a == b 
	  || (ignore_case && (tolower(a) == tolower(b))));
}
#endif

/* Compare two characters.  If ic (ignore_case) is true then 'a' == 'A' */
/* BEWARE OF SIDE-EFFECTS */
#define char_case_cmp(a,b,ic)((a)==(b)||((ic)&&(((a)-(b)=='a'-'A')||(b)-(a)=='a'-'A')))

/* Print the entries in DBFILE that match shell globbing pattern PATHPART.
   Return the number of entries printed.  */

static int
locate (pathpart, dbfile, ignore_case)
     char *pathpart, *dbfile;
     int ignore_case;
{
  /* The pathname database.  */
  FILE *fp;
  /* An input byte.  */
  int c;
  /* Number of bytes read from an entry.  */
  int nread;

  /* true if PATHPART contains globbing metacharacters.  */
  boolean globflag;
  /* The end of the last glob-free subpattern in PATHPART.  */
  char *patend;

  /* The current input database entry.  */
  char *path;
  /* Amount allocated for it.  */
  size_t pathsize;

  /* The length of the prefix shared with the previous database entry.  */
  int count = 0;
  /* Where in `path' to stop the backward search for the last character
     in the subpattern.  Set according to `count'.  */
  char *cutoff;

  /* true if we found a fast match (of patend) on the previous path.  */
  boolean prev_fast_match = false;
  /* The return value.  */
  int printed = 0;

  /* true if reading a bigram-encoded database.  */
  boolean old_format = false;
  /* For the old database format,
     the first and second characters of the most common bigrams.  */
  char bigram1[128], bigram2[128];

  /* To check the age of the database.  */
  struct stat st;
  time_t now;

  if (stat (dbfile, &st) || (fp = fopen (dbfile, "r")) == NULL)
    {
      error (0, errno, "%s", dbfile);
      return 0;
    }
  time(&now);
  if (now - st.st_mtime > WARN_SECONDS)
    {
      error (0, 0, "warning: database `%s' is more than %s old",
	     dbfile, WARN_MESSAGE);
    }

  pathsize = 1026;		/* Increased as necessary by getstr.  */
  path = xmalloc (pathsize);

  nread = fread (path, 1, sizeof (LOCATEDB_MAGIC), fp);
  if (nread != sizeof (LOCATEDB_MAGIC)
      || memcmp (path, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
    {
      int i;
      /* Read the list of the most common bigrams in the database.  */
      fseek (fp, 0, 0);
      for (i = 0; i < 128; i++)
	{
	  bigram1[i] = getc (fp);
	  bigram2[i] = getc (fp);
	}
      old_format = true;
    }

  globflag = strchr (pathpart, '*') || strchr (pathpart, '?')
    || strchr (pathpart, '[');

  patend = last_literal_end (pathpart);

  c = getc (fp);
  while (c != EOF)
    {
      register char *s;		/* Scan the path we read in.  */

      if (old_format)
	{
	  /* Get the offset in the path where this path info starts.  */
	  if (c == LOCATEDB_OLD_ESCAPE)
	    count += getw (fp) - LOCATEDB_OLD_OFFSET;
	  else
	    count += c - LOCATEDB_OLD_OFFSET;

	  /* Overlay the old path with the remainder of the new.  */
	  for (s = path + count; (c = getc (fp)) > LOCATEDB_OLD_ESCAPE;)
	    if (c < 0200)
	      *s++ = c;		/* An ordinary character.  */
	    else
	      {
		/* Bigram markers have the high bit set. */
		c &= 0177;
		*s++ = bigram1[c];
		*s++ = bigram2[c];
	      }
	  *s-- = '\0';
	}
      else
	{
	  if (c == LOCATEDB_ESCAPE)
	    count += get_short (fp);
	  else if (c > 127)
	    count += c - 256;
	  else
	    count += c;

	  /* Overlay the old path with the remainder of the new.  */
	  nread = getstr (&path, &pathsize, fp, '\0', count);
	  if (nread < 0)
	    break;
	  c = getc (fp);
	  s = path + count + nread - 2; /* Move to the last char in path.  */
	  assert (s[0] != '\0');
	  assert (s[1] == '\0'); /* Our terminator.  */
	  assert (s[2] == '\0'); /* Added by getstr.  */
	}

      /* If the previous path matched, scan the whole path for the last
	 char in the subpattern.  If not, the shared prefix doesn't match
	 the pattern, so don't scan it for the last char.  */
      cutoff = prev_fast_match ? path : path + count;

      /* Search backward starting at the end of the path we just read in,
	 for the character at the end of the last glob-free subpattern
	 in PATHPART.  */
      for (prev_fast_match = false; s >= cutoff; s--)
	/* Fast first char check. */
	if (char_case_cmp(*s, *patend, ignore_case))
	  {
	    char *s2;		/* Scan the path we read in. */
	    register char *p2;	/* Scan `patend'.  */

	    for (s2 = s - 1, p2 = patend - 1; 
		 *p2 != '\0' && char_case_cmp(*s2, *p2, ignore_case);
		 s2--, p2--)
	      ;
	    if (*p2 == '\0')
	      {
		/* Success on the fast match.  Compare the whole pattern
		   if it contains globbing characters.  */
		prev_fast_match = true;
		if (globflag == false || fnmatch (pathpart, path, 0) == 0)
		  {
		    puts (path);
		    ++printed;
		  }
		break;
	      }
	  }
    }

  if (ferror (fp))
    {
      error (0, errno, "%s", dbfile);
      return 0;
    }
  if (fclose (fp) == EOF)
    {
      error (0, errno, "%s", dbfile);
      return 0;
    }

  return printed;
}

extern char *version_string;

/* The name this program was run with. */
char *program_name;

static void
usage (stream, status)
     FILE *stream;
     int status;
{
  fprintf (stream, "\
Usage: %s [-d path] [--database=path] [-i] [--ignore-case] [--version] [--help] pattern...\n",
	   program_name);
  exit (status);
}

static struct option const longopts[] =
{
  {"database", required_argument, NULL, 'd'},
  {"ignore-case", no_argument, NULL, 'i'},
  {"help", no_argument, NULL, 'h'},
  {"version", no_argument, NULL, 'v'},
  {NULL, no_argument, NULL, 0}
};

void
main (argc, argv)
     int argc;
     char **argv;
{
  char *dbpath;
  int found = 0, optc, ignore_case = 0;

  program_name = argv[0];

  dbpath = getenv ("LOCATE_PATH");
  if (dbpath == NULL)
    dbpath = LOCATE_DB;

  while ((optc = getopt_long (argc, argv, "id:", longopts, (int *) 0)) != -1)
    switch (optc)
      {
      case 'd':
	dbpath = optarg;
	break;

      case 'i':
	ignore_case++;
	break;

      case 'h':
	usage (stdout, 0);

      case 'v':
	printf ("GNU locate version %s\n", version_string);
	exit (0);

      default:
	usage (stderr, 1);
      }

  if (optind == argc)
    usage (stderr, 1);

  for (; optind < argc; optind++)
    {
      char *e;
      next_element (dbpath);	/* Initialize.  */
      while ((e = next_element ((char *) NULL)) != NULL)
	found |= locate (argv[optind], e, ignore_case);
    }

  exit (!found);
}
