Goto sanos source index
//
// glob.c
//
// Path name pattern matching
//
// Copyright (C) 2002 Michael Ringgaard. All rights reserved.
//
// This code is derived from software contributed to Berkeley by
// Guido van Rossum.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. Neither the name of the project nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
// SUCH DAMAGE.
//
//
// glob(3) -- a superset of the one defined in POSIX 1003.2.
//
// The [!...] convention to negate a range is supported (SysV, Posix, ksh).
//
// Optional extra services, controlled by flags not defined by POSIX:
//
// GLOB_QUOTE:
// Escaping convention: \ inhibits any special meaning the following
// character might have (except \ at end of string is retained).
// GLOB_MAGCHAR:
// Set in gl_flags if pattern contained a globbing character.
// GLOB_NOMAGIC:
// Same as GLOB_NOCHECK, but it will only append pattern if it did
// not contain any magic characters. [Used in csh style globbing]
// GLOB_TILDE:
// expand ~user/foo to the /home/dir/of/user/foo
// GLOB_BRACE:
// expand {1,2}{a,b} to 1a 1b 2a 2b
//
// gl_matchc:
// Number of matches in the current invocation of glob.
//
#include <os.h>
#ifdef DEBUG
#include <stdio.h>
#endif
#include <glob.h>
#include <limits.h>
#include <string.h>
#include <stdlib.h>
#include <dirent.h>
#define DOLLAR '$'
#define DOT '.'
#define EOS '\0'
#define LBRACKET '['
#define NOT '!'
#define QUESTION '?'
#define QUOTE '\\'
#define RANGE '-'
#define RBRACKET ']'
#define SEP '/'
#define STAR '*'
#define TILDE '~'
#define UNDERSCORE '_'
#define LBRACE '{'
#define RBRACE '}'
#define SLASH '/'
#define COMMA ','
#define M_QUOTE 0x80
#define M_PROTECT 0x40
#define M_MASK 0xff
#define M_CHAR 0x7f
#define CHAR(c) ((int)((unsigned char)(c) & M_CHAR))
#define META(c) ((int)((unsigned char)(c) | M_QUOTE))
#define M_ALL META('*')
#define M_END META(']')
#define M_NOT META('!')
#define M_ONE META('?')
#define M_RNG META('-')
#define M_SET META('[')
#define ismeta(c) (((c) & M_QUOTE) != 0)
static int compare(const void *p, const void *q);
static int glob0(const char *pattern, glob_t *pglob, size_t *limit);
static int glob1(char *pattern, glob_t *pglob, size_t *limit);
static int glob2(char *pathbuf, char *pathend, char *pathend_last, char *pattern, glob_t *pglob, size_t *limit);
static int glob3(char *pathbuf, char *pathend, char *pathend_last, char *pattern, char *restpattern, glob_t *pglob, size_t *limit);
static int globextend(const char *path, glob_t *pglob, size_t *limit);
static const char *globtilde(const char *pattern, char *patbuf, size_t patbuf_len, glob_t *pglob);
static int globexp1(const char *pattern, glob_t *pglob, size_t *limit);
static int globexp2(const char *ptr, const char *pattern, glob_t *pglob, int *rv, size_t *limit);
static int match(char *name, char *pat, char *patend);
#ifdef DEBUG
static void qprintf(const char *, char *);
#endif
int glob(const char *pattern, int flags, int (*errfunc)(const char *, int), glob_t *pglob) {
const char *patnext;
size_t limit;
char *bufnext, *bufend, patbuf[MAXPATH], prot;
patnext = pattern;
if (!(flags & GLOB_APPEND)) {
pglob->gl_pathc = 0;
pglob->gl_pathv = NULL;
if (!(flags & GLOB_DOOFFS)) pglob->gl_offs = 0;
}
if (flags & GLOB_LIMIT) {
limit = pglob->gl_matchc;
if (limit == 0) limit = ARG_MAX;
} else {
limit = 0;
}
pglob->gl_flags = flags & ~GLOB_MAGCHAR;
pglob->gl_errfunc = errfunc;
pglob->gl_matchc = 0;
bufnext = patbuf;
bufend = bufnext + MAXPATH - 1;
if (flags & GLOB_NOESCAPE) {
while (bufend - bufnext >= 1) {
if (*patnext == EOS) break;
*bufnext++ = *patnext++;
}
} else {
// Protect the quoted characters
while (bufend - bufnext >= 1) {
if (*patnext == EOS) break;
if (*patnext == QUOTE) {
if (*++patnext == EOS) {
*bufnext++ = QUOTE | M_PROTECT;
continue;
}
prot = M_PROTECT;
} else {
prot = 0;
}
*bufnext++ = *patnext++ | prot;
}
}
*bufnext = EOS;
if (flags & GLOB_BRACE) {
return globexp1(patbuf, pglob, &limit);
} else {
return glob0(patbuf, pglob, &limit);
}
}
//
// Expand recursively a glob {} pattern. When there is no more expansion
// invoke the standard globbing routine to glob the rest of the magic
// characters
//
static int globexp1(const char *pattern, glob_t *pglob, size_t *limit) {
const char *ptr = pattern;
int rv;
// Protect a single {}, for find(1), like csh
if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS) {
return glob0(pattern, pglob, limit);
}
while ((ptr = (const char *) strchr((char *) ptr, LBRACE)) != NULL) {
if (!globexp2(ptr, pattern, pglob, &rv, limit)) return rv;
}
return glob0(pattern, pglob, limit);
}
//
// Recursive brace globbing helper. Tries to expand a single brace.
// If it succeeds then it invokes globexp1 with the new pattern.
// If it fails then it tries to glob the rest of the pattern and returns.
//
static int globexp2(const char *ptr, const char *pattern, glob_t *pglob, int *rv, size_t *limit) {
int i;
char *lm, *ls;
const char *pe, *pm, *pm1, *pl;
char patbuf[MAXPATH];
// Copy part up to the brace
for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++) continue;
*lm = EOS;
ls = lm;
// Find the balanced brace
for (i = 0, pe = ++ptr; *pe; pe++) {
if (*pe == LBRACKET) {
// Ignore everything between []
for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++) continue;
if (*pe == EOS) {
// We could not find a matching RBRACKET. Ignore and just look for RBRACE.
pe = pm;
}
} else if (*pe == LBRACE) {
i++;
} else if (*pe == RBRACE) {
if (i == 0) break;
i--;
}
}
// Non matching braces; just glob the pattern
if (i != 0 || *pe == EOS) {
*rv = glob0(patbuf, pglob, limit);
return 0;
}
for (i = 0, pl = pm = ptr; pm <= pe; pm++) {
switch (*pm) {
case LBRACKET:
// Ignore everything between []
for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++) continue;
if (*pm == EOS) {
// We could not find a matching RBRACKET. Ignore and just look for RBRACE.
pm = pm1;
}
break;
case LBRACE:
i++;
break;
case RBRACE:
if (i) {
i--;
break;
}
// FALLTHROUGH
case COMMA:
if (i && *pm == COMMA) {
break;
} else {
// Append the current string
for (lm = ls; (pl < pm); *lm++ = *pl++) continue;
// Append the rest of the pattern after the closing brace
for (pl = pe + 1; (*lm++ = *pl++) != EOS;) continue;
// Expand the current pattern
#ifdef DEBUG
qprintf("globexp2:", patbuf);
#endif
*rv = globexp1(patbuf, pglob, limit);
// Move after the comma, to the next string
pl = pm + 1;
}
break;
default:
break;
}
}
*rv = 0;
return 0;
}
//
// Expand tilde from the passwd file.
//
static const char *globtilde(const char *pattern, char *patbuf, size_t patbuf_len, glob_t *pglob) {
struct passwd *pwd;
char *h;
const char *p;
char *b, *eb;
if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE)) return pattern;
// Copy up to the end of the string or /
eb = &patbuf[patbuf_len - 1];
for (p = pattern + 1, h = (char *) patbuf; h < (char *)eb && *p && *p != SLASH; *h++ = *p++) continue;
*h = EOS;
if (patbuf[0] == EOS) {
// Handle a plain ~ or ~/
if ((h = getenv("HOME")) == NULL) {
if ((pwd = getpwuid(getuid())) != NULL) {
h = pwd->pw_dir;
} else {
return pattern;
}
}
} else {
// Expand a ~user
if ((pwd = getpwnam((char *) patbuf)) == NULL) {
return pattern;
} else {
h = pwd->pw_dir;
}
}
// Copy the home directory
for (b = patbuf; b < eb && *h; *b++ = *h++) continue;
// Append the rest of the pattern
while (b < eb && (*b++ = *p++) != EOS) continue;
*b = EOS;
return patbuf;
}
//
// The main glob() routine: compiles the pattern (optionally processing
// quotes), calls glob1() to do the real pattern matching, and finally
// sorts the list (unless unsorted operation is requested). Returns 0
// if things went well, nonzero if errors occurred.
//
static int glob0(const char *pattern, glob_t *pglob, size_t *limit) {
const char *qpatnext;
int c, err;
size_t oldpathc;
char *bufnext, patbuf[MAXPATH];
qpatnext = globtilde(pattern, patbuf, MAXPATH, pglob);
oldpathc = pglob->gl_pathc;
bufnext = patbuf;
// We don't need to check for buffer overflow any more
while ((c = *qpatnext++) != EOS) {
switch (c) {
case LBRACKET:
c = *qpatnext;
if (c == NOT) ++qpatnext;
if (*qpatnext == EOS || strchr(qpatnext + 1, RBRACKET) == NULL) {
*bufnext++ = LBRACKET;
if (c == NOT) --qpatnext;
break;
}
*bufnext++ = M_SET;
if (c == NOT) *bufnext++ = M_NOT;
c = *qpatnext++;
do {
*bufnext++ = CHAR(c);
if (*qpatnext == RANGE && (c = qpatnext[1]) != RBRACKET) {
*bufnext++ = M_RNG;
*bufnext++ = CHAR(c);
qpatnext += 2;
}
} while ((c = *qpatnext++) != RBRACKET);
pglob->gl_flags |= GLOB_MAGCHAR;
*bufnext++ = M_END;
break;
case QUESTION:
pglob->gl_flags |= GLOB_MAGCHAR;
*bufnext++ = M_ONE;
break;
case STAR:
pglob->gl_flags |= GLOB_MAGCHAR;
// Collapse adjacent stars to one, to avoid exponential behavior
if (bufnext == patbuf || bufnext[-1] != M_ALL) *bufnext++ = M_ALL;
break;
default:
*bufnext++ = CHAR(c);
break;
}
}
*bufnext = EOS;
#ifdef DEBUG
qprintf("glob0:", patbuf);
#endif
if ((err = glob1(patbuf, pglob, limit)) != 0) return err;
//
// If there was no match we are going to append the pattern
// if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
// and the pattern did not contain any magic characters
// GLOB_NOMAGIC is there just for compatibility with csh.
//
if (pglob->gl_pathc == oldpathc) {
if (((pglob->gl_flags & GLOB_NOCHECK) ||
((pglob->gl_flags & GLOB_NOMAGIC) &&
!(pglob->gl_flags & GLOB_MAGCHAR)))) {
return globextend(pattern, pglob, limit);
} else {
return GLOB_NOMATCH;
}
}
if (!(pglob->gl_flags & GLOB_NOSORT)) {
qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
pglob->gl_pathc - oldpathc, sizeof(char *), compare);
}
return 0;
}
static int compare(const void *p, const void *q) {
return strcmp(*(char **) p, *(char **) q);
}
static int glob1(char *pattern, glob_t *pglob, size_t *limit) {
char pathbuf[MAXPATH];
// A null pathname is invalid -- POSIX 1003.1 sect. 2.4.
if (*pattern == EOS) return 0;
return glob2(pathbuf, pathbuf, pathbuf + MAXPATH - 1, pattern, pglob, limit);
}
//
// The functions glob2 and glob3 are mutually recursive; there is one level
// of recursion for each segment in the pattern that contains one or more
// meta characters.
//
static int glob2(char *pathbuf, char *pathend, char *pathend_last, char *pattern, glob_t *pglob, size_t *limit) {
struct stat sb;
char *p, *q;
int anymeta;
//
// Loop over pattern segments until end of pattern or until
// segment with meta character found.
//
for (anymeta = 0;;) {
if (*pattern == EOS) { // End of pattern?
*pathend = EOS;
if (lstat(pathbuf, &sb)) return 0;
if (((pglob->gl_flags & GLOB_MARK) &&
pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
|| (S_ISLNK(sb.st_mode) &&
(stat(pathbuf, &sb) == 0) &&
S_ISDIR(sb.st_mode)))) {
if (pathend + 1 > pathend_last) return GLOB_ABORTED;
*pathend++ = SEP;
*pathend = EOS;
}
pglob->gl_matchc++;
return globextend(pathbuf, pglob, limit);
}
// Find end of next segment, copy tentatively to pathend.
q = pathend;
p = pattern;
while (*p != EOS && *p != SEP) {
if (ismeta(*p)) anymeta = 1;
if (q + 1 > pathend_last) return GLOB_ABORTED;
*q++ = *p++;
}
if (!anymeta) {
// No expansion, do next segment.
pathend = q;
pattern = p;
while (*pattern == SEP) {
if (pathend + 1 > pathend_last) return GLOB_ABORTED;
*pathend++ = *pattern++;
}
} else {
// Need expansion, recurse
return glob3(pathbuf, pathend, pathend_last, pattern, p, pglob, limit);
}
}
}
static int glob3(char *pathbuf, char *pathend, char *pathend_last, char *pattern, char *restpattern, glob_t *pglob, size_t *limit) {
struct dirent *dp;
DIR *dirp;
int err;
if (pathend > pathend_last) return GLOB_ABORTED;
*pathend = EOS;
errno = 0;
if ((dirp = opendir(pathbuf)) == NULL) {
if (pglob->gl_errfunc) {
if (pglob->gl_errfunc(pathbuf, errno) || pglob->gl_flags & GLOB_ERR) {
return GLOB_ABORTED;
}
}
return 0;
}
err = 0;
// Search directory for matching names
while ((dp = readdir(dirp))) {
// Initial DOT must be matched literally
if (dp->d_name[0] == DOT && *pattern != DOT) continue;
if (pathend + dp->d_namlen > pathend_last) return GLOB_ABORTED;
memcpy(pathend, dp->d_name, dp->d_namlen);
pathend[dp->d_namlen] = 0;
if (!match(pathend, pattern, restpattern)) {
*pathend = EOS;
continue;
}
err = glob2(pathbuf, pathend + dp->d_namlen, pathend_last, restpattern, pglob, limit);
if (err) break;
}
closedir(dirp);
return err;
}
//
// Extend the gl_pathv member of a glob_t structure to accomodate a new item,
// add the new item, and update gl_pathc.
//
// Return 0 if new item added, error code if memory couldn't be allocated.
//
// Invariant of the glob_t structure:
// Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
// gl_pathv points to (gl_offs + gl_pathc + 1) items.
//
static int globextend(const char *path, glob_t *pglob, size_t *limit) {
char **pathv;
size_t i, newsize, len;
char *copy;
const char *p;
if (*limit && pglob->gl_pathc > *limit) {
errno = 0;
return GLOB_NOSPACE;
}
newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
pathv = realloc(pglob->gl_pathv, newsize);
if (pathv == NULL) {
if (pglob->gl_pathv) {
free(pglob->gl_pathv);
pglob->gl_pathv = NULL;
}
return GLOB_NOSPACE;
}
if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
// First time around -- clear initial gl_offs items
pathv += pglob->gl_offs;
for (i = pglob->gl_offs + 1; --i > 0; ) *--pathv = NULL;
}
pglob->gl_pathv = pathv;
for (p = path; *p++;) continue;
len = p - path;
if ((copy = malloc(len)) != NULL) {
memcpy(copy, path, len);
pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
}
pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
return copy == NULL ? GLOB_NOSPACE : 0;
}
//
// Pattern matching function for filenames. Each occurrence of the *
// pattern causes a recursion level.
//
static int match(char *name, char *pat, char *patend) {
int ok, negate_range;
char c, k;
while (pat < patend) {
c = *pat++;
switch (c & M_MASK) {
case M_ALL:
if (pat == patend) return 1;
do {
if (match(name, pat, patend)) return 1;
} while (*name++ != EOS);
return 0;
case M_ONE:
if (*name++ == EOS) return 0;
break;
case M_SET:
ok = 0;
if ((k = *name++) == EOS) return 0;
if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS) pat++;
while (((c = *pat++) & M_MASK) != M_END) {
if ((*pat & M_MASK) == M_RNG) {
if (CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1])) ok = 1;
pat += 2;
} else if (c == k) {
ok = 1;
}
}
if (ok == negate_range) return 0;
break;
default:
if (*name++ != c) return 0;
}
}
return *name == EOS;
}
//
// Free allocated data belonging to a glob_t structure
//
void globfree(glob_t *pglob) {
size_t i;
char **pp;
if (pglob->gl_pathv != NULL) {
pp = pglob->gl_pathv + pglob->gl_offs;
for (i = pglob->gl_pathc; i--; ++pp) {
if (*pp) free(*pp);
}
free(pglob->gl_pathv);
pglob->gl_pathv = NULL;
}
}
#ifdef DEBUG
static void qprintf(const char *str, char *s) {
char *p;
printf("%s:\n", str);
for (p = s; *p; p++) printf("%c", CHAR(*p));
printf("\n");
for (p = s; *p; p++) printf("%c", *p & M_PROTECT ? '"' : ' ');
printf("\n");
for (p = s; *p; p++) printf("%c", ismeta(*p) ? '_' : ' ');
printf("\n");
}
#endif