Comment améliorer la lisibilité des expressions régulières en PHP

Les expressions régulières sont un outil très puissant, mais la sagesse conventionnelle veut qu'une fois écrites, elles sont très difficiles à comprendre, donc les maintenir n'est pas une expérience agréable. Vous trouverez ici des conseils pour les rendre plus lisibles.





PHP PCRE — PHP 7.3, PCRE2 — . PHP , , . PHP , ctype*, URL-, — . IDE , , , .





, , . . , - PHP ( PHP 7.3). , . , PHP, JavaScript , ES2018.





:





  • -;





  • ;





  • ;





  • ;





  • ;





  • .





-

— . -, — . :





/(foo|bar)/i
      
      



(foo|bar)



— , i



— , , /



— . /



, . , ~, !, @, #, $



. , , \



— . : {}, (), [], <>



, . , , , . - , . , , . (, ^, $,



, ), . , , , . , /



, — , , URL-. : 





preg_match('/^https:\/\/example.com\/path/i', $uri);
      
      



“#”, , :





preg_match('#^https://example.com/path#i', $uri);
      
      



- . . , .



, *



, +



, $



. , /Username: @[a-z\.0-9]/



“.” , . 





, , . , -



. , , , , .





, /[A-Z]/



, A Z. (/[A\-Z]/)



, — , A, Z . , , , . , /[AZ-]/



, /[A\-Z]/



, .





( , ), . , :





/Price: [0-9\-\$\.\+]+/
      
      







/Price: [0-9$.+-]+/
      
      



X



, ,   , . , , , , . — :





preg_match('/x\yz/X', ''); //  "y" — ,   —  
      
      



:





Warning: preg_match(): Compilation failed: unrecognized character follows \ at offset 2 in ... on line ...
      
      



, ()



, , ,   , , , .





, “Price: €24



”.





$pattern = '/Price: (£|€)(\d+)/';
$text    = 'Price: €24';
preg_match($pattern, $text, $matches);
      
      



2 , , ((£|€))



, — . , $matches



, ,  :





var_dump($matches);

array(3) {
  [0]=> string(12) "Price: €24"
  [1]=> string(3) "€"
  [2]=> string(2) "24"
}
      
      



, . , , ?:



. , , . , , (£|€)



, , : (?:£|€)



.





$pattern = '/Price: (?:£|€)(\d+)/';
$text    = 'Price: €24';
preg_match($pattern, $text, $matches);
var_dump($matches);
      
      



$matches



1 — :





array(2) {
  [0]=> string(12) "Price: €24"
  [1]=> string(2) "24"
}
      
      



, , , , , .





, . , , ,  





, , :





/Price: (?<currency>£|€)(?<price>\d+)/
      
      



, (?



, , . , (?<currency>£|€)



  — currency, (?<price>\d+)



price. , , — . , :





$pattern = '/Price: (?<currency>£|€)(?<price>\d+)/';
$text    = 'Price: €24';
preg_match($pattern, $text, $matches);
var_dump($matches);
      
      



:





array(5) {
 [0]=> string(12) "Price: €24"
["currency"]=> string(3) "€"
[1]=> string(3) "€"
["price"]=> string(2) "24"
[2]=> string(2) "24"
}
      
      



, $matches



, , .





 , , ["currency"]=> "€"



, [1]=> "€"



.





PHP , : 





Warning: preg_match(): Compilation failed: two named subpatterns have the same name (PCRE2_DUPNAMES not set) at offset ... in ... on line ....
      
      



, J



(UPD: , PHP 7.2.0, ?J



):





/Price: (?<currency>£|€)?(?<price>\d+)(?<currency>£|€)?/J
      
      



2 currency, , J



. , currency , . , , :





$pattern = '/Price: (?<currency>£|€)?(?<price>\d+)(?<currency>£|€)?/J';
$text    = 'Price: €24£';
preg_match($pattern, $text, $matches);
var_dump($matches);

array(6) {
  [0]=> string(14) "Price: €24£"
  ["currency"]=> string(2) "£"
  [1]=> string(3) "€"
  ["price"]=> string(2) "24"
  [2]=> string(2) "24"
  [3]=> string(2) "£"
}
      
      



, . , , PHP-, , . — . .





, :





$pattern  = '/Price: (?<currency>£|€)(?<price>\d+)/i';
      
      



:





$pattern  = '/Price: ';
$pattern .= '(?<currency>£|€)'; // Capture currency symbols £ or €
$pattern .= '(?<price>\d+)'; // Capture price without decimals.
$pattern .= '/i'; // Flags: Case-insensitive
      
      



. x



, , . , . :





/Price: (?<currency>£|€)(?<price>\d+)/i
      
      







/Price:  \s  (?<currency>£|€)  (?<price>\d+)  /ix
      
      



, , x



. , , , . , , \s



.





x



, #



, PHP . , . , :





/Price: (?<currency>£|€)(?<price>\d+)/i
      
      



:





/Price:           # Check for the label "Price:"
\s                # Ensure a white-space after.
(?<currency>£|€)  # Capture currency symbols £ or €
(?<price>\d+)     # Capture price without decimals.
/ix
      
      



PHP, Heredoc Nowdoc . , :





$pattern = <<<PATTERN
  /Price:           # Check for the label "Price:"
  \s                # Ensure a white-space after.
  (?<currency>£|€)  # Capture currency symbols £ or €
  (?<price>\d+)     # Capture price without decimals.
  /ix               # Flags: Case-insensitive
PATTERN;

preg_match($pattern, 'Price: £42', $matches);

      
      



, , , . , — \d



,  , [0-9]



. \D



, — , [^0-9]



. , , , , , : 





/Number: [0-9][^0-9]/
      
      



:





/Number: \d\D/
      
      



, . :





  • \w



    — , , [A-Za-z0-9_]



    ,





 





/[A-Za-z0-9_]/
      
      



:





/\w/
      
      



  • [:xdigit:]



    — , [A-Fa-f0-9]



    ,









/[a-zA-F0-9]/
      
      



:





/[[:xdigit:]]/
      
      



  • \s



    — ,  [ \t\r\n\v\f]



    ,









/ \t\r\n\v\f/
      
      







/\s/
      
      



/u



, , . \p{_}



, _



— . \p



"p" , \P{FOO}



, — , . , , , \p{Sc}



, , , , , . , : \p{Currency_Symbol}



, PHP.





:





$pattern = '/Price: \p{Sc}\d+/u';
      
      



:





$text = 'Price: ¥42';
      
      



, . , , , . , . , \p{Sinhala}



, \x{0D80}-\x{0DFF



}. , :





$pattern = '/[\x{0D80}-\x{0DFF}]/u';
      
      



, :





$pattern = '/\p{Sinhala}/u';
      
      



,





$text = 'පීඑච්පී.වොච්`;
$contains_sinhala = preg_match($pattern, $text);
      
      



, , , !





P.S. — - . , .








All Articles