type Token = { type: T, string: string, } type LexerProps = { rules: { [key in T]: RegExp; }, // postprocess?: (token: Token) => } const entries = (obj: {[key in K]: V}) => { return Object.entries(obj) as Array<[K, V]>; } const lexer = (props: LexerProps) => { const {rules} = props; return (string: string): Array> => { const tokens: Array> = []; let str = string; while (str.length) { let matched = false; for (const [type, matcher] of entries(rules)) { const match = str.match(matcher); if (match && match.index === 0) { if (type !== "_skip") { tokens.push({type, string: match[0]}); } str = str.slice(match[0].length); matched = true; break; } } if (!matched) { console.log(tokens); console.log(str); throw 'Infinite Loop'; } } return tokens; } } const my_lexer = lexer({ rules: { symbol: /\w+/, punctuation: /[\.,:;()[\]{}]/, string_literal: /"([^"\\]|\\.)*"/, _skip: /\s+/, } }); console.log(my_lexer(`abc def { ghi } "string literal!" "with \\"escaped\\" quote marks"`));