Bonjour, je m'appelle Dmitry Karlovsky et avant j'utilisais aussi Perl pour le développement frontend. Regardez simplement quel code concis vous pouvez analyser, par exemple, un e-mail :
/^(?:((?:[\w!#\$%&'\*\+\/=\?\^`\{\|\}~-]){1,}(?:\.(?:[\w!#\$%&'\*\+\/=\?\^`\{\|\}~-]){1,}){0,})|("(?:((?:(?:([\u{1}-\u{8}\u{b}\u{c}\u{e}-\u{1f}\u{21}\u{23}-\u{5b}\u{5d}-\u{7f}])|(\\[\u{1}-\u{9}\u{b}\u{c}\u{e}-\u{7f}]))){0,}))"))@(?:((?:[\w!#\$%&'\*\+\/=\?\^`\{\|\}~-]){1,}(?:\.(?:[\w!#\$%&'\*\+\/=\?\^`\{\|\}~-]){1,}){0,}))$/gsu
Ici, cependant, plusieurs erreurs se sont glissées. Eh bien, rien, nous allons le corriger dans la prochaine version !
Blagues à part

En grandissant, les habitués perdent très vite de leur lucidité. Ce n'est pas pour rien qu'il existe des dizaines de services sur Internet pour les habitués du débogage. Voici quelques-uns d'entre eux:
- https://regex101.com/
- https://regexr.com/
- https://www.debuggex.com/
- https://extendsclass.com/regex-tester.html
, :
/(?<>(?<>\p{Script=Cyrillic})\p{Script=Cyrillic}+)/gimsu
, , . 5 :
/\t/
/\ci/
/\x09/
/\u0009/
/\u{9}/u
JS , ?
const text = 'lol;)'
// SyntaxError: Invalid regular expression: /^(lol;)){2}$/: Unmatched ')'
const regexp = new RegExp( `^(${ text }){2}$` )
, , :
const VISA = /(?<type>4)\d{12}(?:\d{3})?/
const MasterCard = /(?<type>5)[12345]\d{14}/
// Invalid regular expression: /(?<type>4)\d{12}(?:\d{3})?|(?<type>5)[12345]\d{14}/: Duplicate capture group name
const CardNumber = new RegExp( VISA.source + '|' + MasterCard.source )
, , , ! ?
JS. XRegExp:
, , , .
DSL, JS . PEG.js:
- .
- — .
- .
- IDE.
- 2 .
, . .
TypeScript $mol_regexp:
. - ..
- , .
const {
char_only, latin_only, decimal_only,
begin, tab, line_end, end,
repeat, repeat_greedy, from,
} = $mol_regexp
, NPM
import { $mol_regexp: {
char_only, decimal_only,
begin, tab, line_end,
repeat, from,
} } from 'mol_regexp'
// /4(?:\d){12,}?(?:(?:\d){3,}?){0,1}/gsu
const VISA = from([
'4',
repeat( decimal_only, 12 ),
[ repeat( decimal_only, 3 ) ],
])
// /5[12345](?:\d){14,}?/gsu
const MasterCard = from([
'5',
char_only( '12345' ),
repeat( decimal_only, 14 ),
])
:
- .
- .
- .
- . .
- ( ).
// /(?:(4(?:\d){12,}?(?:(?:\d){3,}?){0,1})|(5[12345](?:\d){14,}?))/gsu
const CardNumber = from({ VISA, MasterCard })
// /^(?:\t){0,}?(?:((?:(4(?:\d){12,}?(?:(?:\d){3,}?){0,1})|(5[12345](?:\d){14,}?))))(?:((?:\r){0,1}\n)|(\r))/gmsu
const CardRow = from(
[ begin, repeat( tab ), {CardNumber}, line_end ],
{ multiline: true },
)
const cards = `
3123456789012
4123456789012
551234567890123
5512345678901234
`
for( const token of cards.matchAll( CardRow ) ) {
if( !token.groups ) {
if( !token[0].trim() ) continue
console.log( ' ', token[0].trim() )
continue
}
const type = ''
|| token.groups.VISA && ' VISA'
|| token.groups.MasterCard && 'MasterCard'
console.log( type, token.groups.CardNumber )
}
, , . matchAll
, . $mol_regexp
. groups
. , , .
3123456789012 VISA 4123456789012 551234567890123 MasterCard 5512345678901234
:
const {
begin, end,
char_only, char_range,
latin_only, slash_back,
repeat_greedy, from,
} = $mol_regexp
//
const atom_char = char_only( latin_only, "!#$%&'*+/=?^`{|}~-" )
const atom = repeat_greedy( atom_char, 1 )
const dot_atom = from([ atom, repeat_greedy([ '.', atom ]) ])
//
const name_letter = char_only(
char_range( 0x01, 0x08 ),
0x0b, 0x0c,
char_range( 0x0e, 0x1f ),
0x21,
char_range( 0x23, 0x5b ),
char_range( 0x5d, 0x7f ),
)
//
const quoted_pair = from([
slash_back,
char_only(
char_range( 0x01, 0x09 ),
0x0b, 0x0c,
char_range( 0x0e, 0x7f ),
)
])
//
const name = repeat_greedy({ name_letter, quoted_pair })
const quoted_name = from([ '"', {name}, '"' ])
// :
const local_part = from({ dot_atom, quoted_name })
const domain = dot_atom
// ,
const mail = from([ begin, local_part, '@', {domain}, end ])
— . !
// SyntaxError: Wrong param: dot_atom=foo..bar
mail.generate({
dot_atom: 'foo..bar',
domain: 'example.org',
})
, … :
// foo.bar@example.org
mail.generate({
dot_atom: 'foo.bar',
domain: 'example.org',
})
:
// "foo..bar"@example.org
mail.generate({
name: 'foo..bar',
domain: 'example.org',
})
, "" /snjat-dvushku/s-remontom/v-vihino
. , :
const translit = char_only( latin_only, '-' )
const place = repeat_greedy( translit )
const action = from({ rent: 'snjat', buy: 'kupit' })
const repaired = from( 's-remontom' )
const rooms = from({
one_room: 'odnushku',
two_room: 'dvushku',
any_room: 'kvartiru',
})
const route = from([
begin,
'/', {action}, '-', {rooms},
[ '/', {repaired} ],
[ '/v-', {place} ],
end,
])
:
// `/snjat-dvushku/v-vihino`.matchAll(route).next().value.groups
{
action: "snjat",
rent: "snjat",
buy: "",
rooms: "dvushku",
one_room: "",
two_room: "dvushku",
any_room: "",
repaired: "",
place: "vihino",
}
, :
// /kupit-kvartiru/v-moskve
route.generate({
buy: true,
any_room: true,
repaired: false,
place: 'moskve',
})
true
, . false
, .
?
, , . 2 , . . groups
:
// time.source == "((\d{2}):(\d{2}))"
// time.groups == [ 'time', 'hours', 'minutes' ]
const time = from({
time: [
{ hours: repeat( decimal_only, 2 ) },
':',
{ minutes: repeat( decimal_only, 2 ) },
],
)
, exec
- groups
:
{
time: '12:34',
hours: '12,
minutes: '34',
}
, , , , :
// time.source == "((\d{2}):(\d{2}))"
// time.groups == [ 'time', 'minutes' ]
const time = wrong_from({
time: [
/(\d{2})/,
':',
{ minutes: repeat( decimal_only, 2 ) },
],
)
{
time: '12:34',
hours: '34,
minutes: undefined,
}
, , "" "0", "1" . — , , :
new RegExp( '|' + regexp.source ).exec('').length - 1
, String..match
String..matchAll
exec
. , , Symbol.match
Symbol.matchAll
. :
*[Symbol.matchAll] (str:string) {
const index = this.lastIndex
this.lastIndex = 0
while ( this.lastIndex < str.length ) {
const found = this.exec(str)
if( !found ) break
yield found
}
this.lastIndex = index
}
, , :
interface RegExpMatchArray { groups?: { [key: string]: string } }
, :
interface String {
match< RE extends RegExp >( regexp: RE ): ReturnType<
RE[ typeof Symbol.match ]
>
matchAll< RE extends RegExp >( regexp: RE ): ReturnType<
RE[ typeof Symbol.matchAll ]
>
}
TypeScript groups
, - .
- $mol_regexp.
- — MarkedText: $hyoo_marked.
- MAM NPM.
- $mol, .
— , , , - ( ) .

