minimatch source code analysis
text
0. Basic information
- version: v3.0.4
- Functions: brace expansion, glob expression matching, globstar extension (* *)
- Npm Repository: https://www.npmjs.com/package/minimatch
1. Source code analysis
minimatch still has a lot of content, mainly the reproduction of Bash's glob parsing based on js, and learn how to parse glob expressions by the way
The core code is concentrated in the minimatch.js single file. Let's move on
1.1 project deployment
First, let's take a look at the general structure of the minimatch.js file
- minimatch.js (reading notes: / minimatch.js/0_structure.js)
module.exports = minimatch; minimatch.Minimatch = Minimatch; var path = { sep: '/' }; try { path = require('path'); } catch (er) {} var GLOBSTAR = (minimatch.GLOBSTAR = Minimatch.GLOBSTAR = {}); var expand = require('brace-expansion'); /* 2_4_parse */ function charSet(s) {} /* 1_1_filter */ minimatch.filter = filter; function filter(pattern, options) {} /* 3_3_ext */ function ext(a, b) {} /* 1_0_defaults */ minimatch.defaults = function (def) {}; /* 2_0_defaults */ Minimatch.defaults = function (def) {}; /* 1_minimatch */ function minimatch(p, pattern, options) {} /* 2_Minimatch */ function Minimatch(pattern, options) {} /* 2_1_make */ Minimatch.prototype.debug = function () {}; /* 2_1_make */ Minimatch.prototype.make = make; function make() {} /* 2_2_parseNegate */ Minimatch.prototype.parseNegate = parseNegate; function parseNegate() {} /* 1_2_braceExpand */ minimatch.braceExpand = function (pattern, options) {}; /* 2_3_braceExpand */ Minimatch.prototype.braceExpand = braceExpand; function braceExpand(pattern, options) {} /* 2_4_parse */ Minimatch.prototype.parse = parse; function parse(pattern, isSub) {} /* 1_3_makeRe */ minimatch.makeRe = function (pattern, options) {}; /* 2_5_makeRe */ Minimatch.prototype.makeRe = makeRe; function makeRe() {} /* 1_4_match */ minimatch.match = function (list, pattern, options) {}; /* 2_6_match */ Minimatch.prototype.match = match; function match(f, partial) {} /* 2_7_matchOne */ Minimatch.prototype.matchOne = function (file, pattern, partial) {}; /* 3_1_globUnescape */ function globUnescape(s) {} /* 3_2_regExpEscape */ function regExpEscape(s) {}
Note attach the reading note file corresponding to each method
1.2 Minimatch
Although readme recommends that we directly use the default minimatch function, it is actually a shell of the minimatch class, so let's go directly to the definition and implementation of the core class
1.2.1 constructor
- minimatch.js (reading notes: / minimatch.js/2_Minimatch.js)
/* Core class */ function Minimatch(pattern, options) { if (!(this instanceof Minimatch)) { return new Minimatch(pattern, options); } if (typeof pattern !== 'string') { throw new TypeError('glob pattern string required'); } if (!options) options = {}; pattern = pattern.trim(); // ==========The above is parameter verification========== // windows support: need to use /, not \ if (path.sep !== '/') { // Change sep to/ pattern = pattern.split(path.sep).join('/'); } this.options = options; this.set = []; this.pattern = pattern; this.regexp = null; this.negate = false; this.comment = false; // Mode is annotation this.empty = false; // Mode is empty // make the set of regexps etc. this.make(); // initialization }
The constructor implements the parameter validation and the initialization of the basic property, and then calls the make method to initialize the real expression.
1.2.2 make initialization expression parsing
The logic of the whole make is quite long and may span two or three subheadings
- minimatch.js (reading notes: / minimatch.js/2_1_make.js)
// normalizes slashes. var slashSplit = /\/+/; Minimatch.prototype.debug = function () {}; Minimatch.prototype.make = make; /* Initialize Minimatch */ function make() { // don't do it more than once. if (this._made) return; var pattern = this.pattern; var options = this.options; // empty patterns and comments match nothing. // 1. The matching mode is comment = > if (!options.nocomment && pattern.charAt(0) === '#') { this.comment = true; return; } // 2. The matching mode is null = > if (!pattern) { this.empty = true; return; }
First, in the first part, check two situations that can end the parsing in advance:
- The expression is a comment
- The expression is empty
1.2.3 cutting leader!
Next, we need to perform expression leading first! Cutting of
- minimatch.js (reading notes: / minimatch.js/2_1_make.js)
// step 1: figure out negation, etc. // 3. Cutting leader! this.parseNegate();
- minimatch.js (reading notes: / minimatch.js/2_2_parseNegate.js)
/* Crop leader! */ Minimatch.prototype.parseNegate = parseNegate; function parseNegate() { var pattern = this.pattern; var negate = false; var options = this.options; var negateOffset = 0; // Suppress the leader! if (options.nonegate) return; // Record the number of negate s for (var i = 0, l = pattern.length; i < l && pattern.charAt(i) === '!'; i++) { negate = !negate; negateOffset++; } // Clipping pattern leading! if (negateOffset) this.pattern = pattern.substr(negateOffset); this.negate = negate; }
In essence, it is to calculate how many preambles!, Then initialize the this.negate flag
1.2.4 curly bracket expansion
Next, before actually starting to convert glob expressions, complete the brace expansion feature, and then initialize the debug output
- minimatch.js (reading notes: / minimatch.js/2_1_make.js)
// step 2: expand braces // 4. Curly bracket expansion = > [P1, P2, P3,...] var set = (this.globSet = this.braceExpand()); if (options.debug) this.debug = console.error; // debug output, the default is () = > {} this.debug(this.pattern, set);
- minimatch.js (reading notes: / minimatch.js/2_3_braceExpand.js)
Minimatch.prototype.braceExpand = braceExpand; /* Curly bracket extension (brace expansion feature) */ function braceExpand(pattern, options) { // Author, you are really a little lazy. Can you pass on the parameters well if (!options) { if (this instanceof Minimatch) { options = this.options; } else { options = {}; } } pattern = typeof pattern === 'undefined' ? this.pattern : pattern; if (typeof pattern === 'undefined') { throw new TypeError('undefined pattern'); } // ==========Extract parameters from the Minimatch instance========== // options.nobrace: suppress the brace expansion feature if (options.nobrace || !pattern.match(/\{.*\}/)) { // shortcut. no need to expand. return [pattern]; } return expand(pattern); }
In essence, it borrows the function of the brace expansion library( Portal: source code analysis of brace expansion)
1.2.5 segment & parse regular expression conversion
The next step is to segment by / and convert each segment into a regular expression
- minimatch.js (reading notes: / minimatch.js/2_1_make.js)
// step 3: now we have a set, so turn each one into a series of path-portion // matching patterns. // These will be regexps, except in the case of "**", which is // set to the GLOBSTAR object for globstar behavior, // and will not contain any / characters // 5. Press / separate = > [[P1], [P2], [P3],...] set = this.globParts = set.map(function (s) { return s.split(slashSplit); }); this.debug(this.pattern, set); // glob --> regexps // 6. Convert glob expression fragment to regexp set = set.map(function (s, si, set) { return s.map(this.parse, this); }, this); this.debug(this.pattern, set);
1.2.6 regular expression conversion: Part I - initialization status
The whole regular expression conversion is relatively long (about 400 lines). Let's divide it into several pieces
- minimatch.js (reading notes: / minimatch.js/2_4_parse.js)
The first part is about the initialization of transition state
var plTypes = { '!': { open: '(?:(?!(?:', close: '))[^/]*?)' }, // !(xxx) in glob => (?:(?!(?:xxx))[^/]*?) in reg '?': { open: '(?:', close: ')?' }, // ?(xxx) in glob => (?:xxx)? in reg '+': { open: '(?:', close: ')+' }, // +(xxx) in glob => (?:xxx)+ in reg '*': { open: '(?:', close: ')*' }, // *(xxx) in glob => (?:xxx)* in reg '@': { open: '(?:', close: ')' }, // @(xxx) in glob => (?:xxx)@ in reg }; // any single thing other than / // don't need to escape / when using new RegExp() var qmark = '[^/]'; // * => any number of characters var star = qmark + '*?'; // characters that need to be escaped in RegExp. var reSpecials = charSet('().*{}+?[]^$\\!'); // "abc" -> { a:true, b:true, c:true } function charSet(s) { return s.split('').reduce(function (set, c) { set[c] = true; return set; }, {}); } // parse a component of the expanded set. // At this point, no pattern may contain "/" in it // so we're going to return a 2d array, where each entry is the full // pattern, split on '/', and then turned into a regular expression. // A regexp is made at the end which joins each array with an // escaped /, and another full one which joins each regexp with |. // // Following the lead of Bash 4.1, note that "**" only has special meaning // when it is the *only* thing in a path portion. Otherwise, any series // of * is equivalent to a single *. Globstar behavior is enabled by // default, and can be disabled by setting options.noglobstar. Minimatch.prototype.parse = parse; var SUBPARSE = {}; /* Parse path fragment */ function parse(pattern, isSub) { // 0. The mode is limited to 64KB if (pattern.length > 1024 * 64) { throw new TypeError('pattern is too long'); } var options = this.options; // shortcuts // 1. Turn off GLOBSTAR mode if (!options.noglobstar && pattern === '**') return GLOBSTAR; if (pattern === '') return ''; var re = ''; var hasMagic = !!options.nocase; var escaping = false; // ? => one single character var patternListStack = []; var negativeLists = []; var stateChar; var inClass = false; var reClassStart = -1; var classStart = -1; // . and .. never match anything that doesn't start with ., // even when options.dot is set. // 2. Handle the special starting character of glob string var patternStart = pattern.charAt(0) === '.' ? '' // anything : // not (start or / followed by . or .. followed by / or end) options.dot ? '(?!(?:^|\\/)\\.{1,2}(?:$|\\/))' : '(?!\\.)'; var self = this; // Clean up stateChar status characters function clearStateChar() { if (stateChar) { // we had some state-tracking character // that wasn't consumed by this pass. switch (stateChar) { case '*': re += star; hasMagic = true; break; case '?': re += qmark; hasMagic = true; break; default: re += '\\' + stateChar; break; } self.debug('clearStateChar %j %j', stateChar, re); stateChar = false; } }
1.2.7 regular expression conversion: Part II - loop parsing
- minimatch.js (reading notes: / minimatch.js/2_4_parse.js)
The next step is to loop through the characters of the expression
// 3. Loop processing expression for (var i = 0, len = pattern.length, c; i < len && (c = pattern.charAt(i)); i++) { this.debug('%s\t%s %s %j', pattern, i, re, c); // skip over any that are escaped. // 3.1 ignore special characters if (escaping && reSpecials[c]) { re += '\\' + c; escaping = false; continue; } // 3.2 processing status characters of extended glob switch (c) {} // switch } // for
1.2.7.1 status extension
For special characters, let's look at them separately. For the status extender, it is recorded to stateChar
case '/': // completely not allowed, even escaped. // Should already be path-split by now. return false; case '\\': clearStateChar(); escaping = true; continue; // the various stateChar values // for the "extglob" stuff. case '?': case '*': case '+': case '@': case '!': this.debug('%s\t%s %s %j <-- stateChar', pattern, i, re, c); // all of those are literals inside a class, except that // the glob [!a] means [^a] in regexp // [!a] in glob => [^a] in reg if (inClass) { this.debug(' in class'); if (c === '!' && i === classStart + 1) c = '^'; re += c; continue; } // if we already have a stateChar, then it means // that there was something like ** or +? in there. // Handle the stateChar, then proceed with this one. self.debug('call clearStateChar %j', stateChar); clearStateChar(); stateChar = c; // if extglob is disabled, then +(asdf|foo) isn't a thing. // just clear the statechar *now*, rather than even diving into // the patternList stuff. if (options.noext) clearStateChar(); continue;
1.2.7.2 grouping symbols
Next, for the grouping character (()), use the patternListStack stack to save the state
case '(': // In [] if (inClass) { re += '('; continue; } // No leading status characters if (!stateChar) { re += '\\('; continue; } patternListStack.push({ type: stateChar, start: i - 1, reStart: re.length, open: plTypes[stateChar].open, close: plTypes[stateChar].close, }); // negation is (?:(?!js)[^/]*) // Why don't you pick it up and use it re += stateChar === '!' ? '(?:(?!(?:' : '(?:'; this.debug('plType %j %j', stateChar, re); stateChar = false; continue; case ')': // In [] or without the first half (in the stack) if (inClass || !patternListStack.length) { re += '\\)'; continue; } clearStateChar(); hasMagic = true; var pl = patternListStack.pop(); // negation is (?:(?!js)[^/]*) // The others are (?:<pattern>)<type> re += pl.close; if (pl.type === '!') { negativeLists.push(pl); } pl.reEnd = re.length; continue;
1.2.7.3 or symbols
For the or operator (|), it is the or operator directly mapped to a regular expression
case '|': // [] medium, non () medium and escaping are escape characters if (inClass || !patternListStack.length || escaping) { re += '\\|'; escaping = false; continue; } clearStateChar(); re += '|'; continue;
1.2.7.4 class operators
For the class ([]) operator, use the inClass flag to save the state
// these are mostly the same in regexp and glob case '[': // swallow any state-tracking char before the [ clearStateChar(); // In [] if (inClass) { re += '\\' + c; continue; } // Top [] inClass = true; classStart = i; reClassStart = re.length; re += c; continue; case ']': // a right bracket shall lose its special // meaning and represent itself in // a bracket expression if it occurs // first in the list. -- POSIX.2 2.8.3.2 // There must be at least one character between [] if (i === classStart + 1 || !inClass) { re += '\\' + c; escaping = false; continue; } // handle the case where we left a class open. // "[z-a]" is valid, equivalent to "\[z-a\]" if (inClass) { // split where the last [ was, make sure we don't have // an invalid re. if so, re-walk the contents of the // would-be class to re-translate any characters that // were passed through as-is // TODO: It would probably be faster to determine this // without a try/catch and a new RegExp, but it's tricky // to do safely. For now, this is safe and works. var cs = pattern.substring(classStart + 1, i); try { RegExp('[' + cs + ']'); } catch (er) { // not a valid class! var sp = this.parse(cs, SUBPARSE); re = re.substr(0, reClassStart) + '\\[' + sp[0] + '\\]'; hasMagic = hasMagic || sp[1]; inClass = false; continue; } } // finish up the class. hasMagic = true; inClass = false; re += c; continue;
1.2.7.5 general characters
Finally, by default, we write regular expressions directly for general characters
default: // swallow any state char that wasn't consumed clearStateChar(); if (escaping) { // no need escaping = false; } else if (reSpecials[c] && !(c === '^' && inClass)) { // Escape all special characters re += '\\'; } re += c; } // switch } // for
1.2.8 regular expression conversion: Part III - residual processing
1.2.8.1 handling of unclosed conditions
Special treatment is required for the case that the paired symbols such as [], () are not closed
- minimatch.js (reading notes: / minimatch.js/2_4_parse.js)
Not closed for [] class
// handle the case where we left a class open. // "[abc" is valid, equivalent to "\[abc" // 4. Clean up the unclosed [] if (inClass) { // split where the last [ was, and escape it // this is a huge pita. We now have to re-walk // the contents of the would-be class to re-translate // any characters that were passed through as-is cs = pattern.substr(classStart + 1); sp = this.parse(cs, SUBPARSE); re = re.substr(0, reClassStart) + '\\[' + sp[0]; hasMagic = hasMagic || sp[1]; }
For () not closed
// handle the case where we had a +( thing at the *end* // of the pattern. // each pattern list stack adds 3 chars, and we need to go through // and escape any | chars that were passed through as-is for the regexp. // Go through and escape them, taking care not to double-escape any // | chars that were already escaped. // 5. Clean up those not closed+( for (pl = patternListStack.pop(); pl; pl = patternListStack.pop()) { var tail = re.slice(pl.reStart + pl.open.length); this.debug('setting tail', re, pl); // maybe some even number of \, then maybe 1 \, followed by a | tail = tail.replace(/((?:\\{2}){0,64})(\\?)\|/g, function (_, $1, $2) { if (!$2) { // the | isn't already escaped, so escape it. $2 = '\\'; } // need to escape all those slashes *again*, without escaping the // one that we need for escaping the | character. As it works out, // escaping an even number of slashes can be done by simply repeating // it exactly after itself. That's why this trick works. // // I am sorry that you have to see this. return $1 + $1 + $2 + '|'; }); this.debug('tail=%j\n %s', tail, tail, pl, re); var t = pl.type === '*' ? star : pl.type === '?' ? qmark : '\\' + pl.type; hasMagic = true; re = re.slice(0, pl.reStart) + t + '\\(' + tail; }
1.2.8.2 escape flag
In addition to the unclosed class, there is a part of character escape
- minimatch.js (reading notes: / minimatch.js/2_4_parse.js)
// handle trailing things that only matter at the very end. clearStateChar(); if (escaping) { // trailing \\ re += '\\\\'; } // only need to apply the nodot start if the re starts with // something that could conceivably capture a dot // 6. Add start header var addPatternStart = false; switch (re.charAt(0)) { case '.': case '[': case '(': addPatternStart = true; }
1.2.8.3 review of antisense expressions
In addition to grouping, we should also use the result of parsing the negative antisense expression here, plus the front and back packaging of the antisense expression
- minimatch.js (reading notes: / minimatch.js/2_4_parse.js)
// Hack to work around lack of negative lookbehind in JS // A pattern like: *.!(x).!(y|z) needs to ensure that a name // like 'a.xyz.yz' doesn't match. So, the first negative // lookahead, has to look ALL the way ahead, to the end of // the pattern. // 7. Negative matching needs to look back for (var n = negativeLists.length - 1; n > -1; n--) { var nl = negativeLists[n]; var nlBefore = re.slice(0, nl.reStart); var nlFirst = re.slice(nl.reStart, nl.reEnd - 8); var nlLast = re.slice(nl.reEnd - 8, nl.reEnd); var nlAfter = re.slice(nl.reEnd); nlLast += nlAfter; // Handle nested stuff like *(*.js|!(*.json)), where open parens // mean that we should *not* include the ) in the bit that is considered // "after" the negated section. var openParensBefore = nlBefore.split('(').length - 1; var cleanAfter = nlAfter; for (i = 0; i < openParensBefore; i++) { cleanAfter = cleanAfter.replace(/\)[+*?]?/, ''); } nlAfter = cleanAfter; var dollar = ''; if (nlAfter === '' && isSub !== SUBPARSE) { dollar = '$'; } var newRe = nlBefore + nlFirst + nlAfter + dollar + nlLast; re = newRe; }
1.2.8.4 regular expression generation
Finally, there are a few trivial boundary conditions, and then the regular expression is generated
- minimatch.js (reading notes: / minimatch.js/2_4_parse.js)
// if the re is not "" at this point, then we need to make sure // it doesn't match against an empty path part. // Otherwise a/* will match a/, which it should not. // 8. Ensure that the non null mode does not match the empty string if (re !== '' && hasMagic) { re = '(?=.)' + re; } // 9. Add a pattern header according to addPatternStart if (addPatternStart) { re = patternStart + re; } // parsing just a piece of a larger pattern. if (isSub === SUBPARSE) { return [re, hasMagic]; } // skip the regexp for non-magical patterns // unescape anything in it, though, so that it'll be // an exact match against a file etc. // 10. For patterns that do not have any special matching if (!hasMagic) { return globUnescape(pattern); } // 11. Build regular expressions var flags = options.nocase ? 'i' : ''; // ignore case try { var regExp = new RegExp('^' + re + '$', flags); } catch (er) { // If it was an invalid regular expression, then it can't match // anything. This trick looks for a character after the end of // the string, which is of course impossible, except in multi-line // mode, but it's not a /m regex. return new RegExp('$.'); } regExp._glob = pattern; regExp._src = re; return regExp; }
1.2.9 regular expression group generation
See here to complete the regular expression generation
- minimatch.js (reading notes: / minimatch.js/2_1_make.js)
// filter out everything that didn't compile properly. // 7. Filter expressions that fail to match set = set.filter(function (s) { return s.indexOf(false) === -1; }); this.debug(this.pattern, set); this.set = set; }
Back to the make function, our glob expression will generate a two-dimensional regular expression group after segmentation and regularization (one dimension is multiple expressions expanded by curly braces, and two dimensions are multiple fragments after segmentation of an expression)
The whole make process determines the content of this.set
1.2.10 makeRe generate complete expression
After making, we can generate a complete regular expression based on set
- minimatch.js (reading notes: / minimatch.js/2_5_makeRe.js)
// ** when dots are allowed. Anything goes, except .. and . // not (^ or / followed by one or two dots followed by $ or /), // followed by anything, any number of times. var twoStarDot = '(?:(?!(?:\\/|^)(?:\\.{1,2})($|\\/)).)*?'; Minimatch.prototype.makeRe = makeRe; function makeRe() { // Manually initiate the creation of regexp if (this.regexp || this.regexp === false) return this.regexp; // at this point, this.set is a 2d array of partial // pattern strings, or "**". // // It's better to use .match(). This function shouldn't // be used, really, but it's pretty convenient sometimes, // when you just want to work with a regex. // 1. Ensure that this.set is a regular expression fragment after transformation var set = this.set; // Parsing failed if (!set.length) { this.regexp = false; return this.regexp; } var options = this.options; var twoStar = options.noglobstar ? star // '*' : options.dot ? twoStarDot // '**.' : twoStarNoDot; // '**' var flags = options.nocase ? 'i' : '';
The first part is mainly about the verification of some parameters. The core part is the following one
var re = set .map(function (pattern) { return pattern .map(function (p) { return p === GLOBSTAR ? twoStar // p === GLOBSTAR === {} : typeof p === 'string' ? regExpEscape(p) // p === string(plain text) : p._src; // p === regexp }) .join('\\/'); // s1/s2/s3/... }) .join('|'); // p1|p2|p3|...
For each expression, each fragment of is integrated
Next, the final product is returned after a little modification
// must match entire pattern // ending in a * or ** will make it less strict. re = '^(?:' + re + ')$'; // Head and tail // can match anything, as long as it's not this. if (this.negate) re = '^(?!' + re + ').*$'; // Reverse try { // Building full regular expressions this.regexp = new RegExp(re, flags); } catch (ex) { // Build failed this.regexp = false; } return this.regexp; }
1.2.11 match string
After generating the regular expression, you can really match the actual target string
- minimatch.js (reading notes: / minimatch.js/2_6_match.js)
Start by preprocessing the target string
Minimatch.prototype.match = match; /* Match string */ function match(f, partial) { this.debug('match', f, this.pattern); // short-circuit in the case of busted things. // comments, etc. // 1. Matching notes if (this.comment) return false; // 2. Match empty string if (this.empty) return f === ''; // 3. Matching/ if (f === '/' && partial) return true; var options = this.options; // windows: need to use /, not \ // 4. Unified delimiter/ if (path.sep !== '/') { f = f.split(path.sep).join('/'); } // treat the test path as a set of pathparts. // 5. Press / split = > F = [] f = f.split(slashSplit); this.debug(this.pattern, 'split', f); // just ONE of the pattern sets in this.set needs to match // in order for it to be valid. If negating, then just one // match means that we have failed. // Either way, return on the first hit. // 6. Get the decomposed mode = > this. Set = [] var set = this.set; this.debug(this.pattern, 'set', set);
Next, the second part is a loop to check whether any expression can match the target string
// Find the basename of the path by looking for the last non-empty segment var filename; // basename var i; for (i = f.length - 1; i >= 0; i--) { // Back forward filename = f[i]; if (filename) break; } // 7. Test hit for (i = 0; i < set.length; i++) { var pattern = set[i]; var file = f; if (options.matchBase && pattern.length === 1) { file = [filename]; } var hit = this.matchOne(file, pattern, partial); if (hit) { if (options.flipNegate) return true; return !this.negate; } } // didn't get any hits. this is success if it's a negative // pattern, failure otherwise. // 8. Miss if (options.flipNegate) return false; return this.negate; }
The core here is the matchOne function, which checks whether the target path fragment matches a regular expression fragment
1.2.12 matchOne the first mock exam
matchOne is to check whether the target matches the current pattern
- minimatch.js (reading notes: / minimatch.js/2_7_matchOne.js)
The whole matching cycle process is divided into three blocks. The first block is the expression that failed to match resolution
// set partial to true to test if, for example, // "/a/b" matches the start of "/*/b/*/d" // Partial means, if you run out of file before you run // out of pattern, then that's fine, as long as all // the parts match. /* Match expression fragment */ Minimatch.prototype.matchOne = function (file, pattern, partial) { var options = this.options; this.debug('matchOne', { this: this, file: file, pattern: pattern }); this.debug('matchOne', file.length, pattern.length); for (var fi = 0, pi = 0, fl = file.length, pl = pattern.length; fi < fl && pi < pl; fi++, pi++) { this.debug('matchOne loop'); var p = pattern[pi]; var f = file[fi]; this.debug(pattern, p, f); // should be impossible. // some invalid regexp stuff in the set. // 1. Pattern matching failed = > P = = = false if (p === false) return false;
The second part is to match the extended features of globstar
// 2. Matching** if (p === GLOBSTAR) { this.debug('GLOBSTAR', [pattern, p, f]); // "**" // a/**/b/**/c would match the following: // a/b/x/y/z/c // a/x/y/z/b/c // a/b/x/b/x/c // a/b/c // To do this, take the rest of the pattern after // the **, and see if it would match the file remainder. // If so, return success. // If not, the ** "swallows" a segment, and try again. // This is recursively awful. // // a/**/b/**/c matching a/b/x/y/z/c // - a matches a // - doublestar // - matchOne(b/x/y/z/c, b/**/c) // - b matches b // - doublestar // - matchOne(x/y/z/c, c) -> no // - matchOne(y/z/c, c) -> no // - matchOne(z/c, c) -> no // - matchOne(c, c) yes, hit var fr = fi; var pr = pi + 1; // 2.1 * * as tail if (pr === pl) { this.debug('** at the end'); // a ** at the end will just swallow the rest. // We have found a match. // however, it will not swallow /.x, unless // options.dot is set. // . and .. are *never* matched by **, for explosively // exponential reasons. for (; fi < fl; fi++) { // '* *' does not match '.'.. / '. xxx' if (file[fi] === '.' || file[fi] === '..' || (!options.dot && file[fi].charAt(0) === '.')) return false; } return true; } // ok, let's see if we can swallow whatever we can. // 2.2 greedy mode while (fr < fl) { var swallowee = file[fr]; this.debug('\nglobstar while', file, fr, pattern, pr, swallowee); // XXX remove this slice. Just pass the start index. if (this.matchOne(file.slice(fr), pattern.slice(pr), partial)) { // Recursive matching succeeded in the second half = > true this.debug('globstar found match!', fr, fl, swallowee); // found a match. return true; } else { // can't swallow "." or ".." ever. // can only swallow ".foo" when explicitly asked. // Encounter * * mismatch = > complete * * matching if (swallowee === '.' || swallowee === '..' || (!options.dot && swallowee.charAt(0) === '.')) { this.debug('dot detected!', file, fr, pattern, pr); break; } // ** swallows a segment, and continue. this.debug('globstar swallow a segment, and continue'); fr++; } } // no match was found. // However, in partial mode, we can't say this is necessarily over. // If there's more *pattern* left, then // The partial pattern matches the current best result if (partial) { // ran out of file this.debug('\n>>> no match, partial?', file, fr, pattern, pr); if (fr === fl) return true; } return false; }
The globstar feature is implemented to match as many directory fragments as possible
The third part is matching simple strings
// something other than ** // non-magic patterns just have to match exactly // patterns with magic have been turned into regexps. var hit; if (typeof p === 'string') { // 3. Match simple string if (options.nocase /* ignore case */) { hit = f.toLowerCase() === p.toLowerCase(); } else { hit = f === p; } this.debug('string match', p, f, hit); } else { // 4. Match regular expressions hit = f.match(p); this.debug('pattern match', p, f, hit); } if (!hit) return false; }
Finally, some boundary conditions are examined
// Note: ending in / means that we'll get a final "" // at the end of the pattern. This can only match a // corresponding "" at the end of the file. // If the file ends in /, then it can only match a // a pattern that ends in /, unless the pattern just // doesn't have any more for it. But, a/b/ should *not* // match "a/b/*", even though "" matches against the // [^/]*? pattern, except in partial mode, where it might // simply not be reached yet. // However, a/b/ should still satisfy a/* // now either we fell off the end of the pattern, or we're done. if (fi === fl && pi === pl) { // 5. Complete all fragment matching // ran out of pattern and filename at the same time. // an exact hit! return true; } else if (fi === fl) { // 6. Complete target path matching // ran out of file, but still had pattern left. // this is ok if we're doing the match as part of // a glob fs traversal. return partial; } else if (pi === pl) { // 7. Complete pattern matching = > target path ends with / // ran out of pattern, still have file left. // this is only acceptable if we're on the very last // empty segment of a file with a trailing slash. // a/* should match a/b/ var emptyFileEnd = fi === fl - 1 && file[fi] === ''; return emptyFileEnd; } // should be unreachable. // 7. Unknown path throw new Error('wtf?'); };
1.3 minimatch
Looking at the implementation of the complete minimatch core class, in fact, the minimatch exported by default is the proxy of minimatch, so most methods are the encapsulation of minimatch
1.3.1 filter function
Minimatch provides a filter function in addition to minimatch, which can filter qualified file strings
- minimatch.js (reading notes: / minimatch.js/1_1_filter.js)
minimatch.filter = filter; /* Generate filter function */ function filter(pattern, options) { options = options || {}; return function (p, i, list) { return minimatch(p, pattern, options); }; }
It returns a function similar to the match method, which means using Coriolis to bind in advance
1.3.2 braceExpand curly bracket extension
The second function braceExpand is actually equivalent to the version of Minimatch
- minimatch.js (reading notes: / minimatch.js/1_2_braceExpand.js)
// Brace expansion: // a{b,c}d -> abd acd // a{b,}c -> abc ac // a{0..3}d -> a0d a1d a2d a3d // a{b,c{d,e}f}g -> abg acdfg acefg // a{b,c}d{e,f}g -> abdeg acdeg abdeg abdfg // // Invalid sets are not expanded. // a{2..}b -> a{2..}b // a{b}c -> a{b}c /* Equivalent to Minimatch.braceExpand */ minimatch.braceExpand = function (pattern, options) { return braceExpand(pattern, options); };
1.3.3 makeRe create regular expression
Also copy Minimatch
- minimatch.js (reading notes: / minimatch.js/1_3_makeRe.js)
/* Equivalent to Minimatch.makeRe (create a new Minimatch state isolation) */ minimatch.makeRe = function (pattern, options) { return new Minimatch(pattern, options || {}).makeRe(); };
1.3.4 match matching method
- minimatch.js (reading notes: / minimatch.js/1_4_match.js)
/* Minimatch.match */ minimatch.match = function (list, pattern, options) { options = options || {}; var mm = new Minimatch(pattern, options); list = list.filter(function (f) { return mm.match(f); }); if (mm.options.nonull && !list.length) { list.push(pattern); } return list; };
1.4 default configuration
Like axios, in fact, many libraries have similar implementations, that is, they provide a defaults attribute or method to save some default configurations
1.4.1 Minimatch.defaults
- minimatch.js (reading notes: / minimatch.js/2_0_defaults.js)
/* minimatch.defaults Proxy + return self support chain */ Minimatch.defaults = function (def) { if (!def || !Object.keys(def).length) return Minimatch; return minimatch.defaults(def).Minimatch; };
Proxy to the minimatch method
1.4.2 minimatch.defaults
- minimatch.js (reading notes: / minimatch.js/1_0_defaults.js)
/* Default configuration defaults */ minimatch.defaults = function (def) { if (!def || !Object.keys(def).length) return minimatch; var orig = minimatch; var m = function minimatch(p, pattern, options) { return orig.minimatch(p, pattern, ext(def, options)); }; m.Minimatch = function Minimatch(pattern, options) { return new orig.Minimatch(pattern, ext(def, options)); }; return m; };
The Minimatch library uses a return proxy to avoid polluting the global Minimatch class, but it is trivial
Other resources
Reference connection
Title | Link |
---|---|
minimatch - npm | https://www.npmjs.com/package/minimatch |
minimatch - Github | https://github.com/isaacs/minimatch |
Bash Extended Globbing | https://www.linuxjournal.com/content/bash-extended-globbing |
glob (programming) - wikipedia | https://en.wikipedia.org/wiki/Glob_(programming) |
Reading notes reference
https://github.com/superfreeeee/Blog-code/tree/main/source_code_research/minimatch-3.0.4