Skip to content

Commit 5eceecd

Browse files
danezdoowb
andauthored
Merge commit from fork
* fix: add option for maximum nested extglobs with default depth of 2 Also find other risky globs and treat them as literals * Update test/options.maxExtglobRecursion.js Co-authored-by: Brian Woodward <brian.woodward@gmail.com> * Fix bug for single character nested star-only extglobs * Move constant to constants.js and set to 0 by default --------- Co-authored-by: Brian Woodward <brian.woodward@gmail.com>
1 parent 0db7dd7 commit 5eceecd

File tree

5 files changed

+453
-0
lines changed

5 files changed

+453
-0
lines changed

.verb.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ The following options may be used with the main `picomatch()` function or any of
106106
| `literalBrackets` | `boolean` | `undefined` | When `true`, brackets in the glob pattern will be escaped so that only literal brackets will be matched. |
107107
| `matchBase` | `boolean` | `false` | Alias for `basename` |
108108
| `maxLength` | `number` | `65536` | Limit the max length of the input string. An error is thrown if the input string is longer than this value. |
109+
| `maxExtglobRecursion` | `number\|boolean` | `0` | Limit nested quantified extglobs and other risky repeated extglob forms. When the limit is exceeded, the extglob is treated as a literal string instead of being compiled to regex. Set to `false` to disable this safeguard. |
109110
| `nobrace` | `boolean` | `false` | Disable brace matching, so that `{a,b}` and `{1..3}` would be treated as literal characters. |
110111
| `nobracket` | `boolean` | `undefined` | Disable matching with regex brackets. |
111112
| `nocase` | `boolean` | `false` | Make matching case-insensitive. Equivalent to the regex `i` flag. Note that this option is overridden by the `flags` option. |
@@ -318,6 +319,13 @@ console.log(pm.isMatch('foo.bar', '!(foo).!(bar)')); // false
318319

319320
// supports nested extglobs
320321
console.log(pm.isMatch('foo.bar', '!(!(foo)).!(!(bar))')); // true
322+
323+
// risky quantified extglobs are treated literally by default
324+
console.log(pm.makeRe('+(a|aa)'));
325+
//=> /^(?:\+\(a\|aa\))$/
326+
327+
// increase the limit to allow a small amount of nested quantified extglobs
328+
console.log(pm.isMatch('aaa', '+(+(a))', { maxExtglobRecursion: 1 })); // true
321329
```
322330

323331
#### POSIX brackets

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ The following options may be used with the main `picomatch()` function or any of
345345
| `literalBrackets` | `boolean` | `undefined` | When `true`, brackets in the glob pattern will be escaped so that only literal brackets will be matched. |
346346
| `matchBase` | `boolean` | `false` | Alias for `basename` |
347347
| `maxLength` | `number` | `65536` | Limit the max length of the input string. An error is thrown if the input string is longer than this value. |
348+
| `maxExtglobRecursion` | `number\|boolean` | `0` | Limit nested quantified extglobs and other risky repeated extglob forms. When the limit is exceeded, the extglob is treated as a literal string instead of being compiled to regex. Set to `false` to disable this safeguard. |
348349
| `nobrace` | `boolean` | `false` | Disable brace matching, so that `{a,b}` and `{1..3}` would be treated as literal characters. |
349350
| `nobracket` | `boolean` | `undefined` | Disable matching with regex brackets. |
350351
| `nocase` | `boolean` | `false` | Make matching case-insensitive. Equivalent to the regex `i` flag. Note that this option is overridden by the `flags` option. |
@@ -554,6 +555,13 @@ console.log(pm.isMatch('foo.bar', '!(foo).!(bar)')); // false
554555
555556
// supports nested extglobs
556557
console.log(pm.isMatch('foo.bar', '!(!(foo)).!(!(bar))')); // true
558+
559+
// risky quantified extglobs are treated literally by default
560+
console.log(pm.makeRe('+(a|aa)'));
561+
//=> /^(?:\+\(a\|aa\))$/
562+
563+
// increase the limit to allow a small amount of nested quantified extglobs
564+
console.log(pm.isMatch('aaa', '+(+(a))', { maxExtglobRecursion: 1 })); // true
557565
```
558566
559567
#### POSIX brackets

lib/constants.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
const WIN_SLASH = '\\\\/';
44
const WIN_NO_SLASH = `[^${WIN_SLASH}]`;
55

6+
const DEFAULT_MAX_EXTGLOB_RECURSION = 0;
7+
68
/**
79
* Posix glob regex
810
*/
@@ -86,6 +88,7 @@ const POSIX_REGEX_SOURCE = {
8688
};
8789

8890
module.exports = {
91+
DEFAULT_MAX_EXTGLOB_RECURSION,
8992
MAX_LENGTH: 1024 * 64,
9093
POSIX_REGEX_SOURCE,
9194

lib/parse.js

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,277 @@ const syntaxError = (type, char) => {
4545
return `Missing ${type}: "${char}" - use "\\\\${char}" to match literal characters`;
4646
};
4747

48+
const splitTopLevel = input => {
49+
const parts = [];
50+
let bracket = 0;
51+
let paren = 0;
52+
let quote = 0;
53+
let value = '';
54+
let escaped = false;
55+
56+
for (const ch of input) {
57+
if (escaped === true) {
58+
value += ch;
59+
escaped = false;
60+
continue;
61+
}
62+
63+
if (ch === '\\') {
64+
value += ch;
65+
escaped = true;
66+
continue;
67+
}
68+
69+
if (ch === '"') {
70+
quote = quote === 1 ? 0 : 1;
71+
value += ch;
72+
continue;
73+
}
74+
75+
if (quote === 0) {
76+
if (ch === '[') {
77+
bracket++;
78+
} else if (ch === ']' && bracket > 0) {
79+
bracket--;
80+
} else if (bracket === 0) {
81+
if (ch === '(') {
82+
paren++;
83+
} else if (ch === ')' && paren > 0) {
84+
paren--;
85+
} else if (ch === '|' && paren === 0) {
86+
parts.push(value);
87+
value = '';
88+
continue;
89+
}
90+
}
91+
}
92+
93+
value += ch;
94+
}
95+
96+
parts.push(value);
97+
return parts;
98+
};
99+
100+
const isPlainBranch = branch => {
101+
let escaped = false;
102+
103+
for (const ch of branch) {
104+
if (escaped === true) {
105+
escaped = false;
106+
continue;
107+
}
108+
109+
if (ch === '\\') {
110+
escaped = true;
111+
continue;
112+
}
113+
114+
if (/[?*+@!()[\]{}]/.test(ch)) {
115+
return false;
116+
}
117+
}
118+
119+
return true;
120+
};
121+
122+
const normalizeSimpleBranch = branch => {
123+
let value = branch.trim();
124+
let changed = true;
125+
126+
while (changed === true) {
127+
changed = false;
128+
129+
if (/^@\([^\\()[\]{}|]+\)$/.test(value)) {
130+
value = value.slice(2, -1);
131+
changed = true;
132+
}
133+
}
134+
135+
if (!isPlainBranch(value)) {
136+
return;
137+
}
138+
139+
return value.replace(/\\(.)/g, '$1');
140+
};
141+
142+
const hasRepeatedCharPrefixOverlap = branches => {
143+
const values = branches.map(normalizeSimpleBranch).filter(Boolean);
144+
145+
for (let i = 0; i < values.length; i++) {
146+
for (let j = i + 1; j < values.length; j++) {
147+
const a = values[i];
148+
const b = values[j];
149+
const char = a[0];
150+
151+
if (!char || a !== char.repeat(a.length) || b !== char.repeat(b.length)) {
152+
continue;
153+
}
154+
155+
if (a === b || a.startsWith(b) || b.startsWith(a)) {
156+
return true;
157+
}
158+
}
159+
}
160+
161+
return false;
162+
};
163+
164+
const parseRepeatedExtglob = (pattern, requireEnd = true) => {
165+
if ((pattern[0] !== '+' && pattern[0] !== '*') || pattern[1] !== '(') {
166+
return;
167+
}
168+
169+
let bracket = 0;
170+
let paren = 0;
171+
let quote = 0;
172+
let escaped = false;
173+
174+
for (let i = 1; i < pattern.length; i++) {
175+
const ch = pattern[i];
176+
177+
if (escaped === true) {
178+
escaped = false;
179+
continue;
180+
}
181+
182+
if (ch === '\\') {
183+
escaped = true;
184+
continue;
185+
}
186+
187+
if (ch === '"') {
188+
quote = quote === 1 ? 0 : 1;
189+
continue;
190+
}
191+
192+
if (quote === 1) {
193+
continue;
194+
}
195+
196+
if (ch === '[') {
197+
bracket++;
198+
continue;
199+
}
200+
201+
if (ch === ']' && bracket > 0) {
202+
bracket--;
203+
continue;
204+
}
205+
206+
if (bracket > 0) {
207+
continue;
208+
}
209+
210+
if (ch === '(') {
211+
paren++;
212+
continue;
213+
}
214+
215+
if (ch === ')') {
216+
paren--;
217+
218+
if (paren === 0) {
219+
if (requireEnd === true && i !== pattern.length - 1) {
220+
return;
221+
}
222+
223+
return {
224+
type: pattern[0],
225+
body: pattern.slice(2, i),
226+
end: i
227+
};
228+
}
229+
}
230+
}
231+
};
232+
233+
const getStarExtglobSequenceOutput = pattern => {
234+
let index = 0;
235+
const chars = [];
236+
237+
while (index < pattern.length) {
238+
const match = parseRepeatedExtglob(pattern.slice(index), false);
239+
240+
if (!match || match.type !== '*') {
241+
return;
242+
}
243+
244+
const branches = splitTopLevel(match.body).map(branch => branch.trim());
245+
if (branches.length !== 1) {
246+
return;
247+
}
248+
249+
const branch = normalizeSimpleBranch(branches[0]);
250+
if (!branch || branch.length !== 1) {
251+
return;
252+
}
253+
254+
chars.push(branch);
255+
index += match.end + 1;
256+
}
257+
258+
if (chars.length < 1) {
259+
return;
260+
}
261+
262+
const source = chars.length === 1
263+
? utils.escapeRegex(chars[0])
264+
: `[${chars.map(ch => utils.escapeRegex(ch)).join('')}]`;
265+
266+
return `${source}*`;
267+
};
268+
269+
const repeatedExtglobRecursion = pattern => {
270+
let depth = 0;
271+
let value = pattern.trim();
272+
let match = parseRepeatedExtglob(value);
273+
274+
while (match) {
275+
depth++;
276+
value = match.body.trim();
277+
match = parseRepeatedExtglob(value);
278+
}
279+
280+
return depth;
281+
};
282+
283+
const analyzeRepeatedExtglob = (body, options) => {
284+
if (options.maxExtglobRecursion === false) {
285+
return { risky: false };
286+
}
287+
288+
const max =
289+
typeof options.maxExtglobRecursion === 'number'
290+
? options.maxExtglobRecursion
291+
: constants.DEFAULT_MAX_EXTGLOB_RECURSION;
292+
293+
const branches = splitTopLevel(body).map(branch => branch.trim());
294+
295+
if (branches.length > 1) {
296+
if (
297+
branches.some(branch => branch === '') ||
298+
branches.some(branch => /^[*?]+$/.test(branch)) ||
299+
hasRepeatedCharPrefixOverlap(branches)
300+
) {
301+
return { risky: true };
302+
}
303+
}
304+
305+
for (const branch of branches) {
306+
const safeOutput = getStarExtglobSequenceOutput(branch);
307+
if (safeOutput) {
308+
return { risky: true, safeOutput };
309+
}
310+
311+
if (repeatedExtglobRecursion(branch) > max) {
312+
return { risky: true };
313+
}
314+
}
315+
316+
return { risky: false };
317+
};
318+
48319
/**
49320
* Parse the given input string.
50321
* @param {String} input
@@ -225,6 +496,8 @@ const parse = (input, options) => {
225496
token.prev = prev;
226497
token.parens = state.parens;
227498
token.output = state.output;
499+
token.startIndex = state.index;
500+
token.tokensIndex = tokens.length;
228501
const output = (opts.capture ? '(' : '') + token.open;
229502

230503
increment('parens');
@@ -234,6 +507,34 @@ const parse = (input, options) => {
234507
};
235508

236509
const extglobClose = token => {
510+
const literal = input.slice(token.startIndex, state.index + 1);
511+
const body = input.slice(token.startIndex + 2, state.index);
512+
const analysis = analyzeRepeatedExtglob(body, opts);
513+
514+
if ((token.type === 'plus' || token.type === 'star') && analysis.risky) {
515+
const safeOutput = analysis.safeOutput
516+
? (token.output ? '' : ONE_CHAR) + (opts.capture ? `(${analysis.safeOutput})` : analysis.safeOutput)
517+
: undefined;
518+
const open = tokens[token.tokensIndex];
519+
520+
open.type = 'text';
521+
open.value = literal;
522+
open.output = safeOutput || utils.escapeRegex(literal);
523+
524+
for (let i = token.tokensIndex + 1; i < tokens.length; i++) {
525+
tokens[i].value = '';
526+
tokens[i].output = '';
527+
delete tokens[i].suffix;
528+
}
529+
530+
state.output = token.output + open.output;
531+
state.backtrack = true;
532+
533+
push({ type: 'paren', extglob: true, value, output: '' });
534+
decrement('parens');
535+
return;
536+
}
537+
237538
let output = token.close + (opts.capture ? ')' : '');
238539
let rest;
239540

0 commit comments

Comments
 (0)