D7net
Home
Console
Upload
information
Create File
Create Folder
About
Tools
:
/
proc
/
self
/
root
/
opt
/
alt
/
ruby34
/
share
/
ri
/
system
/
Regexp
/
Filename :
cdesc-Regexp.ri
back
Copy
U:RDoc::NormalClass[iI"Regexp:ET@I"Object;To:RDoc::Markup::Document:@parts[o;;[ : @fileI"$ext/json/lib/json/add/regexp.rb;T:0@omit_headings_from_table_of_contents_below0o;;[�o:RDoc::Markup::Paragraph;[I"NA {regular expression}[https://en.wikipedia.org/wiki/Regular_expression] ;TI"Y(also called a _regexp_) is a <i>match pattern</i> (also simply called a _pattern_).;To:RDoc::Markup::BlankLine o;;[I"DA common notation for a regexp uses enclosing slash characters:;T@o:RDoc::Markup::Verbatim;[I"/foo/ ;T:@format0o;;[I"8A regexp may be applied to a <i>target string</i>; ;TI"SThe part of the string (if any) that matches the pattern is called a _match_, ;TI"%and may be said <i>to match</i>:;T@o; ;[ I"re = /red/ ;TI"Gre.match?('redirect') # => true # Match at beginning of target. ;TI"Are.match?('bored') # => true # Match at end of target. ;TI">re.match?('credit') # => true # Match within target. ;TI"3re.match?('foo') # => false # No match. ;T;0S:RDoc::Markup::Heading: leveli: textI"\Regexp Uses;T@o;;[I"A regexp may be used:;T@o:RDoc::Markup::List: @type:BULLET:@items[o:RDoc::Markup::ListItem:@label0;[ o;;[I"4To extract substrings based on a given pattern:;T@o; ;[I"(re = /foo/ # => /foo/ ;TI"5re.match('food') # => #<MatchData "foo"> ;TI"&re.match('good') # => nil ;T;0o;;[I"?See sections {Method match}[rdoc-ref:Regexp@Method+match] ;TI"6and {Operator =~}[rdoc-ref:Regexp@Operator+-3D~].;T@o;;0;[ o;;[I";To determine whether a string matches a given pattern:;T@o; ;[I"!re.match?('food') # => true ;TI""re.match?('good') # => false ;T;0o;;[I"BSee section {Method match?}[rdoc-ref:Regexp@Method+match-3F].;T@o;;0;[ o;;[I"OAs an argument for calls to certain methods in other classes and modules; ;TI"Fmost such methods accept an argument that may be either a string ;TI"(or the (much more powerful) regexp.;T@o;;[I"8See {Regexp Methods}[rdoc-ref:regexp/methods.rdoc].;T@S;;i;I"\Regexp Objects;T@o;;[I"A regexp object has:;T@o;;;;[ o;;0;[o;;[I"6A source; see {Sources}[rdoc-ref:Regexp@Sources].;T@o;;0;[o;;[I"7Several modes; see {Modes}[rdoc-ref:Regexp@Modes].;T@o;;0;[o;;[I"9A timeout; see {Timeouts}[rdoc-ref:Regexp@Timeouts].;T@o;;0;[o;;[I"=An encoding; see {Encodings}[rdoc-ref:Regexp@Encodings].;T@S;;i;I"Creating a \Regexp;T@o;;[I".A regular expression may be created with:;T@o;;;;[o;;0;[o;;[I"-A regexp literal using slash characters ;TI"L(see {Regexp Literals}[rdoc-ref:syntax/literals.rdoc@Regexp+Literals]):;T@o; ;[I"$# This is a very common usage. ;TI"/foo/ # => /foo/ ;T;0o;;0;[o;;[I""A <tt>%r</tt> regexp literal ;TI"W(see {%r: Regexp Literals}[rdoc-ref:syntax/literals.rdoc@25r-3A+Regexp+Literals]):;T@o; ;[I"6# Same delimiter character at beginning and end; ;TI"/# useful for avoiding escaping characters ;TI"2%r/name\/value pair/ # => /name\/value pair/ ;TI"2%r:name/value pair: # => /name\/value pair/ ;TI"2%r|name/value pair| # => /name\/value pair/ ;TI" ;TI"6# Certain "paired" characters can be delimiters. ;TI"%r[foo] # => /foo/ ;TI"%r{foo} # => /foo/ ;TI"%r(foo) # => /foo/ ;TI"%r<foo> # => /foo/ ;T;0o;;0;[o;;[I"\Method Regexp.new.;T@S;;i;I"\Method <tt>match</tt>;T@o;;[I"FEach of the methods Regexp#match, String#match, and Symbol#match ;TI"Greturns a MatchData object if a match was found, +nil+ otherwise; ;TI"Ieach also sets {global variables}[rdoc-ref:Regexp@Global+Variables]:;T@o; ;[I"1'food'.match(/foo/) # => #<MatchData "foo"> ;TI""'food'.match(/bar/) # => nil ;T;0S;;i;I"Operator <tt>=~</tt>;T@o;;[I"?Each of the operators Regexp#=~, String#=~, and Symbol#=~ ;TI"Freturns an integer offset if a match was found, +nil+ otherwise; ;TI"Ieach also sets {global variables}[rdoc-ref:Regexp@Global+Variables]:;T@o; ;[I"/bar/ =~ 'foo bar' # => 4 ;TI"'foo bar' =~ /bar/ # => 4 ;TI"!/baz/ =~ 'foo bar' # => nil ;T;0S;;i;I"\Method <tt>match?</tt>;T@o;;[I"IEach of the methods Regexp#match?, String#match?, and Symbol#match? ;TI"=returns +true+ if a match was found, +false+ otherwise; ;TI"Dnone sets {global variables}[rdoc-ref:Regexp@Global+Variables]:;T@o; ;[I"$'food'.match?(/foo/) # => true ;TI"%'food'.match?(/bar/) # => false ;T;0S;;i;I"Global Variables;T@o;;[I"GCertain regexp-oriented methods assign values to global variables:;T@o;;;;[o;;0;[o;;[I"G<tt>#match</tt>: see {Method match}[rdoc-ref:Regexp@Method+match].;To;;0;[o;;[I"D<tt>#=~</tt>: see {Operator =~}[rdoc-ref:Regexp@Operator+-3D~].;T@o;;[I"'The affected global variables are:;T@o;;;;[o;;0;[o;;[I"7<tt>$~</tt>: Returns a MatchData object, or +nil+.;To;;0;[o;;[I"C<tt>$&</tt>: Returns the matched part of the string, or +nil+.;To;;0;[o;;[I"T<tt>$`</tt>: Returns the part of the string to the left of the match, or +nil+.;To;;0;[o;;[I"U<tt>$'</tt>: Returns the part of the string to the right of the match, or +nil+.;To;;0;[o;;[I";<tt>$+</tt>: Returns the last group matched, or +nil+.;To;;0;[o;;[ I"F<tt>$1</tt>, <tt>$2</tt>, etc.: Returns the first, second, etc., ;TI"matched group, or +nil+. ;TI"/Note that <tt>$0</tt> is quite different; ;TI"<it returns the name of the currently executing program.;T@o;;[I"Examples:;T@o; ;[!I".# Matched string, but no matched groups. ;TI"$'foo bar bar baz'.match('bar') ;TI" $~ # => #<MatchData "bar"> ;TI"$& # => "bar" ;TI"$` # => "foo " ;TI"$' # => " bar baz" ;TI"$+ # => nil ;TI"$1 # => nil ;TI" ;TI"# Matched groups. ;TI"'/s(\w{2}).*(c)/.match('haystack') ;TI".$~ # => #<MatchData "stac" 1:"ta" 2:"c"> ;TI"$& # => "stac" ;TI"$` # => "hay" ;TI"$' # => "k" ;TI"$+ # => "c" ;TI"$1 # => "ta" ;TI"$2 # => "c" ;TI"$3 # => nil ;TI" ;TI"# No match. ;TI"'foo'.match('bar') ;TI"$~ # => nil ;TI"$& # => nil ;TI"$` # => nil ;TI"$' # => nil ;TI"$+ # => nil ;TI"$1 # => nil ;T;0o;;[I"?Note that Regexp#match?, String#match?, and Symbol#match? ;TI"!do not set global variables.;T@S;;i;I"Sources;T@o;;[I"PAs seen above, the simplest regexp uses a literal expression as its source:;T@o; ;[I"(re = /foo/ # => /foo/ ;TI"5re.match('food') # => #<MatchData "foo"> ;TI"&re.match('good') # => nil ;T;0o;;[I"5A rich collection of available _subexpressions_ ;TI"2gives the regexp great power and flexibility:;T@o;;;;[o;;0;[o;;[I"={Special characters}[rdoc-ref:Regexp@Special+Characters];To;;0;[o;;[I"7{Source literals}[rdoc-ref:Regexp@Source+Literals];To;;0;[o;;[I";{Character classes}[rdoc-ref:Regexp@Character+Classes];To;;0;[o;;[I"O{Shorthand character classes}[rdoc-ref:Regexp@Shorthand+Character+Classes];To;;0;[o;;[I"'{Anchors}[rdoc-ref:Regexp@Anchors];To;;0;[o;;[I"/{Alternation}[rdoc-ref:Regexp@Alternation];To;;0;[o;;[I"/{Quantifiers}[rdoc-ref:Regexp@Quantifiers];To;;0;[o;;[I"?{Groups and captures}[rdoc-ref:Regexp@Groups+and+Captures];To;;0;[o;;[I"'{Unicode}[rdoc-ref:Regexp@Unicode];To;;0;[o;;[I"K{POSIX Bracket Expressions}[rdoc-ref:Regexp@POSIX+Bracket+Expressions];To;;0;[o;;[I"){Comments}[rdoc-ref:Regexp@Comments];T@S;;i;I"Special Characters;T@o;;[I":\Regexp special characters, called _metacharacters_, ;TI"0have special meanings in certain contexts; ;TI"Bdepending on the context, these are sometimes metacharacters:;T@o; ;[I"#. ? - + * ^ \ | $ ( ) [ ] { } ;T;0o;;[I"=To match a metacharacter literally, backslash-escape it:;T@o; ;[ I"+# Matches one or more 'o' characters. ;TI"//o+/.match('foo') # => #<MatchData "oo"> ;TI"# Would match 'o+'. ;TI"!/o\+/.match('foo') # => nil ;T;0o;;[I"9To match a backslash literally, backslash-escape it:;T@o; ;[I"-/\./.match('\.') # => #<MatchData "."> ;TI"//\\./.match('\.') # => #<MatchData "\\."> ;T;0o;;[I"4Method Regexp.escape returns an escaped string:;T@o; ;[I"&Regexp.escape('.?-+*^\|$()[]{}') ;TI";# => "\\.\\?\\-\\+\\*\\^\\\\\\|\\$\\(\\)\\[\\]\\{\\}" ;T;0S;;i;I"Source Literals;T@o;;[I"EThe source literal largely behaves like a double-quoted string; ;TI"fsee {Double-Quoted String Literals}[rdoc-ref:syntax/literals.rdoc@Double-Quoted+String+Literals].;T@o;;[I"JIn particular, a source literal may contain interpolated expressions:;T@o; ;[ I""s = 'foo' # => "foo" ;TI""/#{s}/ # => /foo/ ;TI""/#{s.capitalize}/ # => /Foo/ ;TI" /#{2 + 2}/ # => /4/ ;T;0o;;[I"TThere are differences between an ordinary string literal and a source literal; ;TI"Tsee {Shorthand Character Classes}[rdoc-ref:Regexp@Shorthand+Character+Classes].;T@o;;;;[o;;0;[o;;[I"S<tt>\s</tt> in an ordinary string literal is equivalent to a space character; ;TI"Min a source literal, it's shorthand for matching a whitespace character.;To;;0;[o;;[I"OIn an ordinary string literal, these are (needlessly) escaped characters; ;TI"Nin a source literal, they are shorthands for various matching characters:;T@o; ;[I"\w \W \d \D \h \H \S \R ;T;0S;;i;I"Character Classes;T@o;;[I"?A <i>character class</i> is delimited by square brackets; ;TI"Vit specifies that certain characters match at a given point in the target string:;T@o; ;[ I"2# This character class will match any vowel. ;TI"re = /B[aeiou]rd/ ;TI"/re.match('Bird') # => #<MatchData "Bird"> ;TI"/re.match('Bard') # => #<MatchData "Bard"> ;TI"re.match('Byrd') # => nil ;T;0o;;[I"UA character class may contain hyphen characters to specify ranges of characters:;T@o; ;[ I"+# These regexps have the same effect. ;TI"3/[abcdef]/.match('foo') # => #<MatchData "f"> ;TI"3/[a-f]/.match('foo') # => #<MatchData "f"> ;TI"3/[a-cd-f]/.match('foo') # => #<MatchData "f"> ;T;0o;;[I"LWhen the first character of a character class is a caret (<tt>^</tt>), ;TI"[the sense of the class is inverted: it matches any character _except_ those specified.;T@o; ;[I"2/[^a-eg-z]/.match('f') # => #<MatchData "f"> ;T;0o;;[I"<A character class may contain another character class. ;TI"=By itself this isn't useful because <tt>[a-z[0-9]]</tt> ;TI"1describes the same set as <tt>[a-z0-9]</tt>.;T@o;;[I"GHowever, character classes also support the <tt>&&</tt> operator, ;TI"7which performs set intersection on its arguments. ;TI"(The two can be combined as follows:;T@o; ;[I"2/[a-w&&[^c-g]z]/ # ([a-w] AND ([^c-g] OR z)) ;T;0o;;[I"This is equivalent to:;T@o; ;[I"/[abh-w]/ ;T;0S;;i;I" Shorthand Character Classes;T@o;;[I"@Each of the following metacharacters serves as a shorthand ;TI"for a character class:;T@o;;;;[o;;0;[o;;[I":<tt>/./</tt>: Matches any character except a newline:;T@o; ;[I",/./.match('foo') # => #<MatchData "f"> ;TI"/./.match("\n") # => nil ;T;0o;;0;[o;;[I"@<tt>/./m</tt>: Matches any character, including a newline; ;TI":see {Multiline Mode}[rdoc-ref:Regexp@Multiline+Mode]:;T@o; ;[I"-/./m.match("\n") # => #<MatchData "\n"> ;T;0o;;0;[o;;[I"R<tt>/\w/</tt>: Matches a word character: equivalent to <tt>[a-zA-Z0-9_]</tt>:;T@o; ;[I"./\w/.match(' foo') # => #<MatchData "f"> ;TI"./\w/.match(' _') # => #<MatchData "_"> ;TI"!/\w/.match(' ') # => nil ;T;0o;;0;[o;;[I"W<tt>/\W/</tt>: Matches a non-word character: equivalent to <tt>[^a-zA-Z0-9_]</tt>:;T@o; ;[I"+/\W/.match(' ') # => #<MatchData " "> ;TI"/\W/.match('_') # => nil ;T;0o;;0;[o;;[I"L<tt>/\d/</tt>: Matches a digit character: equivalent to <tt>[0-9]</tt>:;T@o; ;[I"1/\d/.match('THX1138') # => #<MatchData "1"> ;TI"$/\d/.match('foo') # => nil ;T;0o;;0;[o;;[I"Q<tt>/\D/</tt>: Matches a non-digit character: equivalent to <tt>[^0-9]</tt>:;T@o; ;[I"2/\D/.match('123Jump!') # => #<MatchData "J"> ;TI"%/\D/.match('123') # => nil ;T;0o;;0;[o;;[I"U<tt>/\h/</tt>: Matches a hexdigit character: equivalent to <tt>[0-9a-fA-F]</tt>:;T@o; ;[I">/\h/.match('xyz fedcba9876543210') # => #<MatchData "f"> ;TI"1/\h/.match('xyz') # => nil ;T;0o;;0;[o;;[I"Z<tt>/\H/</tt>: Matches a non-hexdigit character: equivalent to <tt>[^0-9a-fA-F]</tt>:;T@o; ;[I"=/\H/.match('fedcba9876543210xyz') # => #<MatchData "x"> ;TI"0/\H/.match('fedcba9876543210') # => nil ;T;0o;;0;[o;;[I"[<tt>/\s/</tt>: Matches a whitespace character: equivalent to <tt>/[ \t\r\n\f\v]/</tt>:;T@o; ;[I"1/\s/.match('foo bar') # => #<MatchData " "> ;TI"$/\s/.match('foo') # => nil ;T;0o;;0;[o;;[I"`<tt>/\S/</tt>: Matches a non-whitespace character: equivalent to <tt>/[^ \t\r\n\f\v]/</tt>:;T@o; ;[I"9/\S/.match(" \t\r\n\f\v foo") # => #<MatchData "f"> ;TI",/\S/.match(" \t\r\n\f\v") # => nil ;T;0o;;0;[o;;[I"@<tt>/\R/</tt>: Matches a linebreak, platform-independently:;T@o; ;[ I"L/\R/.match("\r") # => #<MatchData "\r"> # Carriage return (CR) ;TI"D/\R/.match("\n") # => #<MatchData "\n"> # Newline (LF) ;TI"E/\R/.match("\f") # => #<MatchData "\f"> # Formfeed (FF) ;TI"I/\R/.match("\v") # => #<MatchData "\v"> # Vertical tab (VT) ;TI"</\R/.match("\r\n") # => #<MatchData "\r\n"> # CRLF ;TI"G/\R/.match("\u0085") # => #<MatchData "\u0085"> # Next line (NEL) ;TI"M/\R/.match("\u2028") # => #<MatchData "\u2028"> # Line separator (LSEP) ;TI"R/\R/.match("\u2029") # => #<MatchData "\u2029"> # Paragraph separator (PSEP) ;T;0S;;i;I"Anchors;T@o;;[I"LAn anchor is a metasequence that matches a zero-width position between ;TI"%characters in the target string.;T@o;;[I")For a subexpression with no anchor, ;TI"6matching may begin anywhere in the target string:;T@o; ;[I"9/real/.match('surrealist') # => #<MatchData "real"> ;T;0o;;[I")For a subexpression with an anchor, ;TI"/matching must begin at the matched anchor.;T@S;;i ;I"Boundary Anchors;T@o;;[I".Each of these anchors matches a boundary:;T@o;;;;[ o;;0;[o;;[I"1<tt>^</tt>: Matches the beginning of a line:;T@o; ;[I"6/^bar/.match("foo\nbar") # => #<MatchData "bar"> ;TI"'/^ar/.match("foo\nbar") # => nil ;T;0o;;0;[o;;[I"+<tt>$</tt>: Matches the end of a line:;T@o; ;[I"6/bar$/.match("foo\nbar") # => #<MatchData "bar"> ;TI"'/ba$/.match("foo\nbar") # => nil ;T;0o;;0;[o;;[I"6<tt>\A</tt>: Matches the beginning of the string:;T@o; ;[I"7/\Afoo/.match('foo bar') # => #<MatchData "foo"> ;TI"(/\Afoo/.match(' foo bar') # => nil ;T;0o;;0;[o;;[I"1<tt>\Z</tt>: Matches the end of the string; ;TI"+if string ends with a single newline, ;TI"/it matches just before the ending newline:;T@o; ;[ I":/foo\Z/.match('bar foo') # => #<MatchData "foo"> ;TI"+/foo\Z/.match('foo bar') # => nil ;TI":/foo\Z/.match("bar foo\n") # => #<MatchData "foo"> ;TI"+/foo\Z/.match("bar foo\n\n") # => nil ;T;0o;;0;[o;;[I"0<tt>\z</tt>: Matches the end of the string:;T@o; ;[I"8/foo\z/.match('bar foo') # => #<MatchData "foo"> ;TI")/foo\z/.match('foo bar') # => nil ;TI")/foo\z/.match("bar foo\n") # => nil ;T;0o;;0;[o;;[I"B<tt>\b</tt>: Matches word boundary when not inside brackets; ;TI">matches backspace (<tt>"0x08"</tt>) when inside brackets:;T@o; ;[I"6/foo\b/.match('foo bar') # => #<MatchData "foo"> ;TI"'/foo\b/.match('foobar') # => nil ;T;0o;;0;[o;;[I",<tt>\B</tt>: Matches non-word boundary:;T@o; ;[I"6/foo\B/.match('foobar') # => #<MatchData "foo"> ;TI"'/foo\B/.match('foo bar') # => nil ;T;0o;;0;[ o;;[I"2<tt>\G</tt>: Matches first matching position:;T@o;;[I"PIn methods like String#gsub and String#scan, it changes on each iteration. ;TI"}It initially matches the beginning of subject, and in each following iteration it matches where the last match finished.;T@o; ;[I"3" a b c".gsub(/ /, '_') # => "____a_b_c" ;TI"3" a b c".gsub(/\G /, '_') # => "____a b c" ;T;0o;;[I"3In methods like Regexp#match and String#match ;TI"Fthat take an optional offset, it matches where the search begins.;T@o; ;[I":"hello, world".match(/,/, 3) # => #<MatchData ","> ;TI"-"hello, world".match(/\G,/, 3) # => nil ;T;0S;;i ;I"Lookaround Anchors;T@o;;[I"Lookahead anchors:;T@o;;;;[o;;0;[o;;[I"7<tt>(?=_pat_)</tt>: Positive lookahead assertion: ;TI"8ensures that the following characters match _pat_, ;TI"Cbut doesn't include those characters in the matched substring.;T@o;;0;[o;;[I"7<tt>(?!_pat_)</tt>: Negative lookahead assertion: ;TI"Fensures that the following characters <i>do not</i> match _pat_, ;TI"Cbut doesn't include those characters in the matched substring.;T@o;;[I"Lookbehind anchors:;T@o;;;;[o;;0;[o;;[I"9<tt>(?<=_pat_)</tt>: Positive lookbehind assertion: ;TI"<ensures that the preceding characters match _pat_, but ;TI"?doesn't include those characters in the matched substring.;T@o;;0;[o;;[I"9<tt>(?<!_pat_)</tt>: Negative lookbehind assertion: ;TI"8ensures that the preceding characters do not match ;TI"J_pat_, but doesn't include those characters in the matched substring.;T@o;;[I"PThe pattern below uses positive lookahead and positive lookbehind to match ;TI"9text appearing in <tt><b></tt>...<tt></b></tt> tags ;TI"-without including the tags in the match:;T@o; ;[I"E/(?<=<b>)\w+(?=<\/b>)/.match("Fortune favors the <b>bold</b>.") ;TI"# => #<MatchData "bold"> ;T;0S;;i ;I"Match-Reset Anchor;T@o;;;;[o;;0;[ o;;[I"<tt>\K</tt>: Match reset: ;TI"Zthe matched content preceding <tt>\K</tt> in the regexp is excluded from the result. ;TI"BFor example, the following two regexps are almost equivalent:;T@o; ;[I"3/ab\Kc/.match('abc') # => #<MatchData "c"> ;TI"3/(?<=ab)c/.match('abc') # => #<MatchData "c"> ;T;0o;;[I"BThese match same string and <tt>$&</tt> equals <tt>'c'</tt>, ;TI"-while the matched position is different.;T@o;;[I"&As are the following two regexps:;T@o; ;[I"/(a)\K(b)\Kc/ ;TI"/(?<=(?<=(a))(b))c/ ;T;0S;;i;I"Alternation;T@o;;[I"PThe vertical bar metacharacter (<tt>|</tt>) may be used within parentheses ;TI"to express alternation: ;TI"Itwo or more subexpressions any of which may match the target string.;T@o;;[I"Two alternatives:;T@o; ;[I"re = /(a|b)/ ;TI"re.match('foo') # => nil ;TI"1re.match('bar') # => #<MatchData "b" 1:"b"> ;T;0o;;[I"Four alternatives:;T@o; ;[I"re = /(a|b|c|d)/ ;TI"4re.match('shazam') # => #<MatchData "a" 1:"a"> ;TI"4re.match('cold') # => #<MatchData "c" 1:"c"> ;T;0o;;[I"VEach alternative is a subexpression, and may be composed of other subexpressions:;T@o; ;[I"re = /([a-c]|[x-z])/ ;TI"1re.match('bar') # => #<MatchData "b" 1:"b"> ;TI"1re.match('ooz') # => #<MatchData "z" 1:"z"> ;T;0o;;[I"A\Method Regexp.union provides a convenient way to construct ;TI" a regexp with alternatives.;T@S;;i;I"Quantifiers;T@o;;[I"+A simple regexp matches one character:;T@o; ;[I"0/\w/.match('Hello') # => #<MatchData "H"> ;T;0o;;[I"NAn added _quantifier_ specifies how many matches are required or allowed:;T@o;;;;[o;;0;[o;;[I"-<tt>*</tt> - Matches zero or more times:;T@o; ;[I"/\w*/.match('') ;TI"# => #<MatchData ""> ;TI"/\w*/.match('x') ;TI"# => #<MatchData "x"> ;TI"/\w*/.match('xyz') ;TI"# => #<MatchData "yz"> ;T;0o;;0;[o;;[I",<tt>+</tt> - Matches one or more times:;T@o; ;[I"!/\w+/.match('') # => nil ;TI"./\w+/.match('x') # => #<MatchData "x"> ;TI"0/\w+/.match('xyz') # => #<MatchData "xyz"> ;T;0o;;0;[o;;[I",<tt>?</tt> - Matches zero or one times:;T@o; ;[I"-/\w?/.match('') # => #<MatchData ""> ;TI"./\w?/.match('x') # => #<MatchData "x"> ;TI"./\w?/.match('xyz') # => #<MatchData "x"> ;T;0o;;0;[o;;[I"9<tt>{</tt>_n_<tt>}</tt> - Matches exactly _n_ times:;T@o; ;[I"#/\w{2}/.match('') # => nil ;TI"#/\w{2}/.match('x') # => nil ;TI"1/\w{2}/.match('xyz') # => #<MatchData "xy"> ;T;0o;;0;[o;;[I"><tt>{</tt>_min_<tt>,}</tt> - Matches _min_ or more times:;T@o; ;[ I"$/\w{2,}/.match('') # => nil ;TI"$/\w{2,}/.match('x') # => nil ;TI"2/\w{2,}/.match('xy') # => #<MatchData "xy"> ;TI"3/\w{2,}/.match('xyz') # => #<MatchData "xyz"> ;T;0o;;0;[o;;[I"?<tt>{,</tt>_max_<tt>}</tt> - Matches _max_ or fewer times:;T@o; ;[I"0/\w{,2}/.match('') # => #<MatchData ""> ;TI"1/\w{,2}/.match('x') # => #<MatchData "x"> ;TI"2/\w{,2}/.match('xyz') # => #<MatchData "xy"> ;T;0o;;0;[o;;[I"0<tt>{</tt>_min_<tt>,</tt>_max_<tt>}</tt> - ;TI":Matches at least _min_ times and at most _max_ times:;T@o; ;[I"%/\w{1,2}/.match('') # => nil ;TI"2/\w{1,2}/.match('x') # => #<MatchData "x"> ;TI"3/\w{1,2}/.match('xyz') # => #<MatchData "xy"> ;T;0S;;i ;I")Greedy, Lazy, or Possessive Matching;T@o;;[I"<Quantifier matching may be greedy, lazy, or possessive:;T@o;;;;[o;;0;[o;;[ I"GIn _greedy_ matching, as many occurrences as possible are matched ;TI"8while still allowing the overall match to succeed. ;TI"=Greedy quantifiers: <tt>*</tt>, <tt>+</tt>, <tt>?</tt>, ;TI"*<tt>{min, max}</tt> and its variants.;To;;0;[o;;[I"HIn _lazy_ matching, the minimum number of occurrences are matched. ;TI">Lazy quantifiers: <tt>*?</tt>, <tt>+?</tt>, <tt>??</tt>, ;TI"+<tt>{min, max}?</tt> and its variants.;To;;0;[o;;[ I"PIn _possessive_ matching, once a match is found, there is no backtracking; ;TI"Gthat match is retained, even if it jeopardises the overall match. ;TI"DPossessive quantifiers: <tt>*+</tt>, <tt>++</tt>, <tt>?+</tt>. ;TI"YNote that <tt>{min, max}</tt> and its variants do _not_ support possessive matching.;T@o;;[I" More:;T@o;;;;[o;;0;[o;;[I")About greedy and lazy matching, see ;TI"{Choosing Minimal or Maximal Repetition}[https://doc.lagout.org/programmation/Regular%20Expressions/Regular%20Expressions%20Cookbook_%20Detailed%20Solutions%20in%20Eight%20Programming%20Languages%20%282nd%20ed.%29%20%5BGoyvaerts%20%26%20Levithan%202012-09-06%5D.pdf#tutorial-backtrack].;To;;0;[o;;[I"$About possessive matching, see ;TI"{Eliminate Needless Backtracking}[https://doc.lagout.org/programmation/Regular%20Expressions/Regular%20Expressions%20Cookbook_%20Detailed%20Solutions%20in%20Eight%20Programming%20Languages%20%282nd%20ed.%29%20%5BGoyvaerts%20%26%20Levithan%202012-09-06%5D.pdf#tutorial-backtrack].;T@S;;i;I"Groups and Captures;T@o;;[I"-A simple regexp has (at most) one match:;T@o; ;[ I"re = /\d\d\d\d-\d\d-\d\d/ ;TI"@re.match('1943-02-04') # => #<MatchData "1943-02-04"> ;TI"(re.match('1943-02-04').size # => 1 ;TI"*re.match('foo') # => nil ;T;0o;;[I"JAdding one or more pairs of parentheses, <tt>(_subexpression_)</tt>, ;TI"Hdefines _groups_, which may result in multiple matched substrings, ;TI"called _captures_:;T@o; ;[I"%re = /(\d\d\d\d)-(\d\d)-(\d\d)/ ;TI"Wre.match('1943-02-04') # => #<MatchData "1943-02-04" 1:"1943" 2:"02" 3:"04"> ;TI"(re.match('1943-02-04').size # => 4 ;T;0o;;[I"5The first capture is the entire matched string; ;TI"Cthe other captures are the matched substrings from the groups.;T@o;;[I"BA group may have a {quantifier}[rdoc-ref:Regexp@Quantifiers]:;T@o; ;[I"re = /July 4(th)?/ ;TI";re.match('July 4') # => #<MatchData "July 4" 1:nil> ;TI">re.match('July 4th') # => #<MatchData "July 4th" 1:"th"> ;TI" ;TI"re = /(foo)*/ ;TI"3re.match('') # => #<MatchData "" 1:nil> ;TI"8re.match('foo') # => #<MatchData "foo" 1:"foo"> ;TI";re.match('foofoo') # => #<MatchData "foofoo" 1:"foo"> ;TI" ;TI"re = /(foo)+/ ;TI"!re.match('') # => nil ;TI"8re.match('foo') # => #<MatchData "foo" 1:"foo"> ;TI";re.match('foofoo') # => #<MatchData "foofoo" 1:"foo"> ;T;0o;;[I"KThe returned \MatchData object gives access to the matched substrings:;T@o; ;[I"%re = /(\d\d\d\d)-(\d\d)-(\d\d)/ ;TI"!md = re.match('1943-02-04') ;TI";# => #<MatchData "1943-02-04" 1:"1943" 2:"02" 3:"04"> ;TI"md[0] # => "1943-02-04" ;TI"md[1] # => "1943" ;TI"md[2] # => "02" ;TI"md[3] # => "04" ;T;0S;;i ;I"Non-Capturing Groups;T@o;;[I"(A group may be made non-capturing; ;TI"Dit is still a group (and, for example, can have a quantifier), ;TI"Cbut its matching substring is not included among the captures.;T@o;;[I"LA non-capturing group begins with <tt>?:</tt> (inside the parentheses):;T@o; ;[I"# Don't capture the year. ;TI"'re = /(?:\d\d\d\d)-(\d\d)-(\d\d)/ ;TI"Nmd = re.match('1943-02-04') # => #<MatchData "1943-02-04" 1:"02" 2:"04"> ;T;0S;;i ;I"Backreferences;T@o;;[I"DA group match may also be referenced within the regexp itself; ;TI"2such a reference is called a +backreference+:;T@o; ;[I"</[csh](..) [csh]\1 in/.match('The cat sat in the hat') ;TI"+# => #<MatchData "cat sat in" 1:"at"> ;T;0o;;[I"AThis table shows how each subexpression in the regexp above ;TI".matches a substring in the target string:;T@o; ;[ I"I| Subexpression in Regexp | Matching Substring in Target String | ;TI"I|---------------------------|-------------------------------------| ;TI"I| First '[csh]' | Character 'c' | ;TI"I| '(..)' | First substring 'at' | ;TI"I| First space ' ' | First space character ' ' | ;TI"I| Second '[csh]' | Character 's' | ;TI"I| '\1' (backreference 'at') | Second substring 'at' | ;TI"I| ' in' | Substring ' in' | ;T;0o;;[I"/A regexp may contain any number of groups:;T@o;;;;[o;;0;[o;;[I""For a large number of groups:;T@o;;;;[o;;0;[o;;[I"OThe ordinary <tt>\\_n_</tt> notation applies only for _n_ in range (1..9).;To;;0;[o;;[I"KThe <tt>MatchData[_n_]</tt> notation applies for any non-negative _n_.;T@o;;0;[o;;[I"U<tt>\0</tt> is a special backreference, referring to the entire matched string; ;TI"2it may not be used within the regexp itself, ;TI"Mbut may be used outside it (for example, in a substitution method call):;T@o; ;[I"5'The cat sat in the hat'.gsub(/[csh]at/, '\0s') ;TI"&# => "The cats sats in the hats" ;T;0S;;i ;I"Named Captures;T@o;;[ I"@As seen above, a capture can be referred to by its number. ;TI"%A capture can also have a name, ;TI";prefixed as <tt>?<_name_></tt> or <tt>?'_name_'</tt>, ;TI"Oand the name (symbolized) may be used as an index in <tt>MatchData[]</tt>:;T@o; ;[I"<md = /\$(?<dollars>\d+)\.(?'cents'\d+)/.match("$3.67") ;TI"6# => #<MatchData "$3.67" dollars:"3" cents:"67"> ;TI"md[:dollars] # => "3" ;TI"md[:cents] # => "67" ;TI",# The capture numbers are still valid. ;TI"md[2] # => "67" ;T;0o;;[I"KWhen a regexp contains a named capture, there are no unnamed captures:;T@o; ;[I"//\$(?<dollars>\d+)\.(\d+)/.match("$3.67") ;TI"+# => #<MatchData "$3.67" dollars:"3"> ;T;0o;;[I"@A named group may be backreferenced as <tt>\k<_name_></tt>:;T@o; ;[I">/(?<vowel>[aeiou]).\k<vowel>.\k<vowel>/.match('ototomy') ;TI")# => #<MatchData "ototo" vowel:"o"> ;T;0o;;[I"AWhen (and only when) a regexp contains named capture groups ;TI"2and appears before the <tt>=~</tt> operator, ;TI"Vthe captured substrings are assigned to local variables with corresponding names:;T@o; ;[I"3/\$(?<dollars>\d+)\.(?<cents>\d+)/ =~ '$3.67' ;TI"dollars # => "3" ;TI"cents # => "67" ;T;0o;;[I"W\Method Regexp#named_captures returns a hash of the capture names and substrings; ;TI"?method Regexp#names returns an array of the capture names.;T@S;;i ;I"Atomic Grouping;T@o;;[I"MA group may be made _atomic_ with <tt>(?></tt>_subexpression_<tt>)</tt>.;T@o;;[ I"1This causes the subexpression to be matched ;TI"2independently of the rest of the expression, ;TI"Qso that the matched substring becomes fixed for the remainder of the match, ;TI"Runless the entire subexpression must be abandoned and subsequently revisited.;T@o;;[I"FIn this way _subexpression_ is treated as a non-divisible whole. ;TI"<Atomic grouping is typically used to optimise patterns ;TI"'to prevent needless backtracking .;T@o;;[I"'Example (without atomic grouping):;T@o; ;[I";/".*"/.match('"Quote"') # => #<MatchData "\"Quote\""> ;T;0o;;[I"Analysis:;T@o;;:NUMBER;[ o;;0;[o;;[I"UThe leading subexpression <tt>"</tt> in the pattern matches the first character ;TI"%<tt>"</tt> in the target string.;To;;0;[o;;[I"UThe next subexpression <tt>.*</tt> matches the next substring <tt>Quote“</tt> ;TI"+(including the trailing double-quote).;To;;0;[o;;[I"=Now there is nothing left in the target string to match ;TI";the trailing subexpression <tt>"</tt> in the pattern; ;TI"0this would cause the overall match to fail.;To;;0;[o;;[I"JThe matched substring is backtracked by one position: <tt>Quote</tt>.;To;;0;[o;;[I"TThe final subexpression <tt>"</tt> now matches the final substring <tt>"</tt>, ;TI"$and the overall match succeeds.;T@o;;[I"9If subexpression <tt>.*</tt> is grouped atomically, ;TI"?the backtracking is disabled, and the overall match fails:;T@o; ;[I"*/"(?>.*)"/.match('"Quote"') # => nil ;T;0o;;[I"-Atomic grouping can affect performance; ;TI"Jsee {Atomic Group}[https://www.regular-expressions.info/atomic.html].;T@S;;i ;I"Subexpression Calls;T@o;;[ I"ZAs seen above, a backreference number (<tt>\\_n_</tt>) or name (<tt>\k<_name_></tt>) ;TI"-gives access to a captured _substring_; ;TI"Dthe corresponding regexp _subexpression_ may also be accessed, ;TI"Ivia the number (<tt>\\g<i>n</i></tt>) or name (<tt>\g<_name_></tt>):;T@o; ;[I"3/\A(?<paren>\(\g<paren>*\))*\z/.match('(())') ;TI" # ^1 ;TI"# ^2 ;TI"# ^3 ;TI"# ^4 ;TI"# ^5 ;TI"# ^6 ;TI"# ^7 ;TI" # ^8 ;TI" # ^9 ;TI"%# ^10 ;T;0o;;[I"The pattern:;T@o;;;;[o;;0;[o;;[I"MMatches at the beginning of the string, i.e. before the first character.;To;;0;[o;;[I""Enters a named group +paren+.;To;;0;[o;;[I"=Matches the first character in the string, <tt>'('</tt>.;To;;0;[o;;[I"KCalls the +paren+ group again, i.e. recurses back to the second step.;To;;0;[o;;[I"!Re-enters the +paren+ group.;To;;0;[o;;[I">Matches the second character in the string, <tt>'('</tt>.;To;;0;[o;;[I",Attempts to call +paren+ a third time, ;TI"Jbut fails because doing so would prevent an overall successful match.;To;;0;[o;;[I">Matches the third character in the string, <tt>')'</tt>; ;TI"/marks the end of the second recursive call;To;;0;[o;;[I">Matches the fourth character in the string, <tt>')'</tt>.;To;;0;[o;;[I"#Matches the end of the string.;T@o;;[I"�See {Subexpression calls}[https://learnbyexample.github.io/Ruby_Regexp/groupings-and-backreferences.html?highlight=subexpression#subexpression-calls].;T@S;;i ;I"Conditionals;T@o;;[I"TThe conditional construct takes the form <tt>(?(_cond_)_yes_|_no_)</tt>, where:;T@o;;;;[o;;0;[o;;[I",_cond_ may be a capture number or name.;To;;0;[o;;[I"=The match to be applied is _yes_ if _cond_ is captured; ;TI"/otherwise the match to be applied is _no_.;To;;0;[o;;[I"2If not needed, <tt>|_no_</tt> may be omitted.;T@o;;[I"Examples:;T@o; ;[I"$re = /\A(foo)?(?(1)(T)|(F))\z/ ;TI"Cre.match('fooT') # => #<MatchData "fooT" 1:"foo" 2:"T" 3:nil> ;TI">re.match('F') # => #<MatchData "F" 1:nil 2:nil 3:"F"> ;TI"re.match('fooF') # => nil ;TI"re.match('T') # => nil ;TI" ;TI"2re = /\A(?<xyzzy>foo)?(?(<xyzzy>)(T)|(F))\z/ ;TI";re.match('fooT') # => #<MatchData "fooT" xyzzy:"foo"> ;TI"6re.match('F') # => #<MatchData "F" xyzzy:nil> ;TI"re.match('fooF') # => nil ;TI"re.match('T') # => nil ;T;0S;;i ;I"Absence Operator;T@o;;[I"wThe absence operator is a special group that matches anything which does _not_ match the contained subexpressions.;T@o; ;[I"?/(?~real)/.match('surrealist') # => #<MatchData "surrea"> ;TI"B/(?~real)ist/.match('surrealist') # => #<MatchData "ealist"> ;TI"3/sur(?~real)ist/.match('surrealist') # => nil ;T;0S;;i;I"Unicode;T@S;;i ;I"Unicode Properties;T@o;;[ I"GThe <tt>/\p{_property_name_}/</tt> construct (with lowercase +p+) ;TI"7matches characters using a Unicode property name, ;TI""much like a character class; ;TI"6property +Alpha+ specifies alphabetic characters:;T@o; ;[I"2/\p{Alpha}/.match('a') # => #<MatchData "a"> ;TI"%/\p{Alpha}/.match('1') # => nil ;T;0o;;[I" A property can be inverted ;TI"?by prefixing the name with a caret character (<tt>^</tt>):;T@o; ;[I"3/\p{^Alpha}/.match('1') # => #<MatchData "1"> ;TI"&/\p{^Alpha}/.match('a') # => nil ;T;0o;;[I"-Or by using <tt>\P</tt> (uppercase +P+):;T@o; ;[I"2/\P{Alpha}/.match('1') # => #<MatchData "1"> ;TI"%/\P{Alpha}/.match('a') # => nil ;T;0o;;[I"GSee {Unicode Properties}[rdoc-ref:regexp/unicode_properties.rdoc] ;TI"2for regexps based on the numerous properties.;T@o;;[I"KSome commonly-used properties correspond to POSIX bracket expressions:;T@o;;;;[o;;0;[o;;[I";<tt>/\p{Alnum}/</tt>: Alphabetic and numeric character;To;;0;[o;;[I"/<tt>/\p{Alpha}/</tt>: Alphabetic character;To;;0;[o;;[I"'<tt>/\p{Blank}/</tt>: Space or tab;To;;0;[o;;[I",<tt>/\p{Cntrl}/</tt>: Control character;To;;0;[o;;[I"!<tt>/\p{Digit}/</tt>: Digit ;TI"characters, and similar);To;;0;[o;;[I";<tt>/\p{Lower}/</tt>: Lowercase alphabetical character;To;;0;[o;;[I"T<tt>/\p{Print}/</tt>: Like <tt>\p{Graph}</tt>, but includes the space character;To;;0;[o;;[I"0<tt>/\p{Punct}/</tt>: Punctuation character;To;;0;[o;;[I"N<tt>/\p{Space}/</tt>: Whitespace character (<tt>[:blank:]</tt>, newline, ;TI"carriage return, etc.);To;;0;[o;;[I"1<tt>/\p{Upper}/</tt>: Uppercase alphabetical;To;;0;[o;;[I"S<tt>/\p{XDigit}/</tt>: Digit allowed in a hexadecimal number (i.e., 0-9a-fA-F);T@o;;[I""These are also commonly used:;T@o;;;;[o;;0;[o;;[I")<tt>/\p{Emoji}/</tt>: Unicode emoji.;To;;0;[o;;[I"_<tt>/\p{Graph}/</tt>: Characters excluding <tt>/\p{Cntrl}/</tt> and <tt>/\p{Space}/</tt>. ;TI"6Note that invisible characters under the Unicode ;TI"V{"Format"}[https://www.compart.com/en/unicode/category/Cf] category are included.;To;;0;[o;;[I"E<tt>/\p{Word}/</tt>: A member in one of these Unicode character ;TI"Fcategories (see below) or having one of these Unicode properties:;T@o;;;;[o;;0;[o;;[I"Unicode categories:;To;;;;[o;;0;[o;;[I"+Mark+ (+M+).;To;;0;[o;;[I"#<tt>Decimal Number</tt> (+Nd+);To;;0;[o;;[I"+<tt>Connector Punctuation</tt> (+Pc+).;T@o;;0;[o;;[I"Unicode properties:;To;;;;[o;;0;[o;;[I"+Alpha+;To;;0;[o;;[I"<tt>Join_Control</tt>;T@o;;0;[o;;[I"B<tt>/\p{ASCII}/</tt>: A character in the ASCII character set.;To;;0;[o;;[I"Q<tt>/\p{Any}/</tt>: Any Unicode character (including unassigned characters).;To;;0;[o;;[I"4<tt>/\p{Assigned}/</tt>: An assigned character.;T@S;;i ;I"!Unicode Character Categories;T@o;;[I"'A Unicode character category name:;T@o;;;;[o;;0;[o;;[I"9May be either its full name or its abbreviated name.;To;;0;[o;;[I"Is case-insensitive.;To;;0;[o;;[I"?Treats a space, a hyphen, and an underscore as equivalent.;T@o;;[I"Examples:;T@o; ;[ I"+/\p{lu}/ # => /\p{lu}/ ;TI"+/\p{LU}/ # => /\p{LU}/ ;TI"9/\p{Uppercase Letter}/ # => /\p{Uppercase Letter}/ ;TI"9/\p{Uppercase_Letter}/ # => /\p{Uppercase_Letter}/ ;TI"9/\p{UPPERCASE-LETTER}/ # => /\p{UPPERCASE-LETTER}/ ;T;0o;;[I"GBelow are the Unicode character category abbreviations and names. ;TI"BEnumerations of characters in each category are at the links.;T@o;;[I" Letters:;T@o;;;;[o;;0;[o;;[I"(+L+, +Letter+: +LC+, +Lm+, or +Lo+.;To;;0;[o;;[I"/+LC+, +Cased_Letter+: +Ll+, +Lt+, or +Lu+.;To;;0;[o;;[I"L{Lu, Lowercase_Letter}[https://www.compart.com/en/unicode/category/Ll].;To;;0;[o;;[I"K{Lu, Modifier_Letter}[https://www.compart.com/en/unicode/category/Lm].;To;;0;[o;;[I"H{Lu, Other_Letter}[https://www.compart.com/en/unicode/category/Lo].;To;;0;[o;;[I"L{Lu, Titlecase_Letter}[https://www.compart.com/en/unicode/category/Lt].;To;;0;[o;;[I"L{Lu, Uppercase_Letter}[https://www.compart.com/en/unicode/category/Lu].;T@o;;[I"Marks:;T@o;;;;[ o;;0;[o;;[I"&+M+, +Mark+: +Mc+, +Me+, or +Mn+.;To;;0;[o;;[I"H{Mc, Spacing_Mark}[https://www.compart.com/en/unicode/category/Mc].;To;;0;[o;;[I"J{Me, Enclosing_Mark}[https://www.compart.com/en/unicode/category/Me].;To;;0;[o;;[I"K{Mn, Nonapacing_Mark}[https://www.compart.com/en/unicode/category/Mn].;T@o;;[I" Numbers:;T@o;;;;[ o;;0;[o;;[I"(+N+, +Number+: +Nd+, +Nl+, or +No+.;To;;0;[o;;[I"J{Nd, Decimal_Number}[https://www.compart.com/en/unicode/category/Nd].;To;;0;[o;;[I"I{Nl, Letter_Number}[https://www.compart.com/en/unicode/category/Nl].;To;;0;[o;;[I"H{No, Other_Number}[https://www.compart.com/en/unicode/category/No].;T@o;;[I"Punctuation:;T@o;;;;[o;;0;[o;;[I"E+P+, +Punctuation+: +Pc+, +Pd+, +Pe+, +Pf+, +Pi+, +Po+, or +Ps+.;To;;0;[o;;[I"Q{Pc, Connector_Punctuation}[https://www.compart.com/en/unicode/category/Pc].;To;;0;[o;;[I"L{Pd, Dash_Punctuation}[https://www.compart.com/en/unicode/category/Pd].;To;;0;[o;;[I"M{Pe, Close_Punctuation}[https://www.compart.com/en/unicode/category/Pe].;To;;0;[o;;[I"M{Pf, Final_Punctuation}[https://www.compart.com/en/unicode/category/Pf].;To;;0;[o;;[I"O{Pi, Initial_Punctuation}[https://www.compart.com/en/unicode/category/Pi].;To;;0;[o;;[I"M{Po, Other_Punctuation}[https://www.compart.com/en/unicode/category/Po].;To;;0;[o;;[I"L{Ps, Open_Punctuation}[https://www.compart.com/en/unicode/category/Ps].;T@o;;0;[o;;[I".+S+, +Symbol+: +Sc+, +Sk+, +Sm+, or +So+.;To;;0;[o;;[I"K{Sc, Currency_Symbol}[https://www.compart.com/en/unicode/category/Sc].;To;;0;[o;;[I"K{Sk, Modifier_Symbol}[https://www.compart.com/en/unicode/category/Sk].;To;;0;[o;;[I"G{Sm, Math_Symbol}[https://www.compart.com/en/unicode/category/Sm].;To;;0;[o;;[I"H{So, Other_Symbol}[https://www.compart.com/en/unicode/category/So].;T@o;;0;[o;;[I"++Z+, +Separator+: +Zl+, +Zp+, or +Zs+.;To;;0;[o;;[I"J{Zl, Line_Separator}[https://www.compart.com/en/unicode/category/Zl].;To;;0;[o;;[I"O{Zp, Paragraph_Separator}[https://www.compart.com/en/unicode/category/Zp].;To;;0;[o;;[I"K{Zs, Space_Separator}[https://www.compart.com/en/unicode/category/Zs].;T@o;;0;[o;;[I"3+C+, +Other+: +Cc+, +Cf+, +Cn+, +Co+, or +Cs+.;To;;0;[o;;[I"C{Cc, Control}[https://www.compart.com/en/unicode/category/Cc].;To;;0;[o;;[I"B{Cf, Format}[https://www.compart.com/en/unicode/category/Cf].;To;;0;[o;;[I"F{Cn, Unassigned}[https://www.compart.com/en/unicode/category/Cn].;To;;0;[o;;[I"G{Co, Private_Use}[https://www.compart.com/en/unicode/category/Co].;To;;0;[o;;[I"E{Cs, Surrogate}[https://www.compart.com/en/unicode/category/Cs].;T@S;;i ;I"Unicode Scripts and Blocks;T@o;;[I"&Among the Unicode properties are:;T@o;;;;[o;;0;[o;;[I"H{Unicode scripts}[https://en.wikipedia.org/wiki/Script_(Unicode)]; ;TI"Nsee {supported scripts}[https://www.unicode.org/standard/supported.html].;To;;0;[o;;[I"D{Unicode blocks}[https://en.wikipedia.org/wiki/Unicode_block]; ;TI"Nsee {supported blocks}[http://www.unicode.org/Public/UNIDATA/Blocks.txt].;T@S;;i;I"POSIX Bracket Expressions;T@o;;[I"MA POSIX <i>bracket expression</i> is also similar to a character class. ;TI"DThese expressions provide a portable alternative to the above, ;TI"Awith the added benefit of encompassing non-ASCII characters:;T@o;;;;[o;;0;[o;;[I"E<tt>/\d/</tt> matches only ASCII decimal digits +0+ through +9+.;To;;0;[o;;[I"A<tt>/[[:digit:]]/</tt> matches any character in the Unicode ;TI".<tt>Decimal Number</tt> (+Nd+) category; ;TI"see below.;T@o;;[I"#The POSIX bracket expressions:;T@o;;;;[o;;0;[o;;[I"g<tt>/[[:digit:]]/</tt>: Matches a {Unicode digit}[https://www.compart.com/en/unicode/category/Nd]:;T@o; ;[I":/[[:digit:]]/.match('9') # => #<MatchData "9"> ;TI":/[[:digit:]]/.match("\u1fbf9") # => #<MatchData "9"> ;T;0o;;0;[o;;[I"O<tt>/[[:xdigit:]]/</tt>: Matches a digit allowed in a hexadecimal number; ;TI"(equivalent to <tt>[0-9a-fA-F]</tt>.;T@o;;0;[o;;[I"r<tt>/[[:upper:]]/</tt>: Matches a {Unicode uppercase letter}[https://www.compart.com/en/unicode/category/Lu]:;T@o; ;[I"9/[[:upper:]]/.match('A') # => #<MatchData "A"> ;TI":/[[:upper:]]/.match("\u00c6") # => #<MatchData "Æ"> ;T;0o;;0;[o;;[I"r<tt>/[[:lower:]]/</tt>: Matches a {Unicode lowercase letter}[https://www.compart.com/en/unicode/category/Ll]:;T@o; ;[I"9/[[:lower:]]/.match('a') # => #<MatchData "a"> ;TI":/[[:lower:]]/.match("\u01fd") # => #<MatchData "ǽ"> ;T;0o;;0;[o;;[I"V<tt>/[[:alpha:]]/</tt>: Matches <tt>/[[:upper:]]/</tt> or <tt>/[[:lower:]]/</tt>.;T@o;;0;[o;;[I"V<tt>/[[:alnum:]]/</tt>: Matches <tt>/[[:alpha:]]/</tt> or <tt>/[[:digit:]]/</tt>.;T@o;;0;[o;;[I"o<tt>/[[:space:]]/</tt>: Matches {Unicode space character}[https://www.compart.com/en/unicode/category/Zs]:;T@o; ;[I"9/[[:space:]]/.match(' ') # => #<MatchData " "> ;TI";/[[:space:]]/.match("\u2005") # => #<MatchData " "> ;T;0o;;0;[o;;[I"M<tt>/[[:blank:]]/</tt>: Matches <tt>/[[:space:]]/</tt> or tab character:;T@o; ;[I"9/[[:blank:]]/.match(' ') # => #<MatchData " "> ;TI";/[[:blank:]]/.match("\u2005") # => #<MatchData " "> ;TI":/[[:blank:]]/.match("\t") # => #<MatchData "\t"> ;T;0o;;0;[o;;[I"q<tt>/[[:cntrl:]]/</tt>: Matches {Unicode control character}[https://www.compart.com/en/unicode/category/Cc]:;T@o; ;[I">/[[:cntrl:]]/.match("\u0000") # => #<MatchData "\u0000"> ;TI">/[[:cntrl:]]/.match("\u009f") # => #<MatchData "\u009F"> ;T;0o;;0;[o;;[I"3<tt>/[[:graph:]]/</tt>: Matches any character ;TI"=except <tt>/[[:space:]]/</tt> or <tt>/[[:cntrl:]]/</tt>.;T@o;;0;[o;;[I"O<tt>/[[:print:]]/</tt>: Matches <tt>/[[:graph:]]/</tt> or space character.;T@o;;0;[o;;[I"y<tt>/[[:punct:]]/</tt>: Matches any (Unicode punctuation character}[https://www.compart.com/en/unicode/category/Po]:;T@o;;[I">Ruby also supports these (non-POSIX) bracket expressions:;T@o;;;;[o;;0;[o;;[I"L<tt>/[[:ascii:]]/</tt>: Matches a character in the ASCII character set.;To;;0;[o;;[I"R<tt>/[[:word:]]/</tt>: Matches a character in one of these Unicode character ;TI":categories or having one of these Unicode properties:;T@o;;;;[o;;0;[o;;[I"Unicode categories:;To;;;;[o;;0;[o;;[I"+Mark+ (+M+).;To;;0;[o;;[I"#<tt>Decimal Number</tt> (+Nd+);To;;0;[o;;[I"+<tt>Connector Punctuation</tt> (+Pc+).;T@o;;0;[o;;[I"Unicode properties:;To;;;;[o;;0;[o;;[I"+Alpha+;To;;0;[o;;[I"<tt>Join_Control</tt>;T@S;;i;I" Comments;T@o;;[ I"3A comment may be included in a regexp pattern ;TI":using the <tt>(?#</tt>_comment_<tt>)</tt> construct, ;TI";where _comment_ is a substring that is to be ignored. ;TI"1arbitrary text ignored by the regexp engine:;T@o; ;[I"F/foo(?#Ignore me)bar/.match('foobar') # => #<MatchData "foobar"> ;T;0o;;[I"CThe comment may not include an unescaped terminator character.;T@o;;[I"=See also {Extended Mode}[rdoc-ref:Regexp@Extended+Mode].;T@S;;i;I" Modes;T@o;;[I"8Each of these modifiers sets a mode for the regexp:;T@o;;;;[ o;;0;[o;;[I"%+i+: <tt>/_pattern_/i</tt> sets ;TI"D{Case-Insensitive Mode}[rdoc-ref:Regexp@Case-Insensitive+Mode].;To;;0;[o;;[I"%+m+: <tt>/_pattern_/m</tt> sets ;TI"6{Multiline Mode}[rdoc-ref:Regexp@Multiline+Mode].;To;;0;[o;;[I"%+x+: <tt>/_pattern_/x</tt> sets ;TI"4{Extended Mode}[rdoc-ref:Regexp@Extended+Mode].;To;;0;[o;;[I"%+o+: <tt>/_pattern_/o</tt> sets ;TI">{Interpolation Mode}[rdoc-ref:Regexp@Interpolation+Mode].;T@o;;[I"/Any, all, or none of these may be applied.;T@o;;[I"BModifiers +i+, +m+, and +x+ may be applied to subexpressions:;T@o;;;;[ o;;0;[o;;[I"J<tt>(?_modifier_)</tt> turns the mode "on" for ensuing subexpressions;To;;0;[o;;[I"L<tt>(?-_modifier_)</tt> turns the mode "off" for ensuing subexpressions;To;;0;[o;;[I"V<tt>(?_modifier_:_subexp_)</tt> turns the mode "on" for _subexp_ within the group;To;;0;[o;;[I"X<tt>(?-_modifier_:_subexp_)</tt> turns the mode "off" for _subexp_ within the group;T@o;;[I" Example:;T@o; ;[I"re = /(?i)te(?-i)st/ ;TI"/re.match('test') # => #<MatchData "test"> ;TI"/re.match('TEst') # => #<MatchData "TEst"> ;TI"re.match('TEST') # => nil ;TI"re.match('teST') # => nil ;TI" ;TI"re = /t(?i:e)st/ ;TI"/re.match('test') # => #<MatchData "test"> ;TI"/re.match('tEst') # => #<MatchData "tEst"> ;TI"re.match('tEST') # => nil ;T;0o;;[I"C\Method Regexp#options returns an integer whose value showing ;TI"Qthe settings for case-insensitivity mode, multiline mode, and extended mode.;T@S;;i;I"Case-Insensitive Mode;T@o;;[I",By default, a regexp is case-sensitive:;T@o; ;[I""/foo/.match('FOO') # => nil ;T;0o;;[I"0Modifier +i+ enables case-insensitive mode:;T@o; ;[I"/foo/i.match('FOO') ;TI"# => #<MatchData "FOO"> ;T;0o;;[I"K\Method Regexp#casefold? returns whether the mode is case-insensitive.;T@S;;i;I"Multiline Mode;T@o;;[I"LThe multiline-mode in Ruby is what is commonly called a "dot-all mode":;T@o;;;;[o;;0;[o;;[I"TWithout the +m+ modifier, the subexpression <tt>.</tt> does not match newlines:;T@o; ;[I"#/a.c/.match("a\nc") # => nil ;T;0o;;0;[o;;[I"&With the modifier, it does match:;T@o; ;[I"3/a.c/m.match("a\nc") # => #<MatchData "a\nc"> ;T;0o;;[I"eUnlike other languages, the modifier +m+ does not affect the anchors <tt>^</tt> and <tt>$</tt>. ;TI";These anchors always match at line-boundaries in Ruby.;T@S;;i;I"Extended Mode;T@o;;[I":Modifier +x+ enables extended mode, which means that:;T@o;;;;[o;;0;[o;;[I"9Literal white space in the pattern is to be ignored.;To;;0;[o;;[I"SCharacter <tt>#</tt> marks the remainder of its containing line as a comment, ;TI"7which is also to be ignored for matching purposes.;T@o;;[I";In extended mode, whitespace and comments may be used ;TI"&to form a self-documented regexp.;T@o;;[I"?Regexp not in extended mode (matches some Roman numerals):;T@o; ;[I"Jpattern = '^M{0,3}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$' ;TI"re = /#{pattern}/ ;TI"Mre.match('MCMXLIII') # => #<MatchData "MCMXLIII" 1:"CM" 2:"XL" 3:"III"> ;T;0o;;[I"Regexp in extended mode:;T@o; ;[I"pattern = <<-EOT ;TI"1 ^ # beginning of string ;TI"3 M{0,3} # thousands - 0 to 3 Ms ;TI"O (CM|CD|D?C{0,3}) # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 Cs), ;TI"N # or 500-800 (D, followed by 0 to 3 Cs) ;TI"H (XC|XL|L?X{0,3}) # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 Xs), ;TI"H # or 50-80 (L, followed by 0 to 3 Xs) ;TI"E (IX|IV|V?I{0,3}) # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 Is), ;TI"F # or 5-8 (V, followed by 0 to 3 Is) ;TI"+ $ # end of string ;TI" EOT ;TI"re = /#{pattern}/x ;TI"Mre.match('MCMXLIII') # => #<MatchData "MCMXLIII" 1:"CM" 2:"XL" 3:"III"> ;T;0S;;i;I"Interpolation Mode;T@o;;[I"QModifier +o+ means that the first time a literal regexp with interpolations ;TI"is encountered, ;TI"Nthe generated Regexp object is saved and used for all future evaluations ;TI"of that literal regexp. ;TI">Without modifier +o+, the generated Regexp is not saved, ;TI"Lso each evaluation of the literal regexp generates a new Regexp object.;T@o;;[I"Without modifier +o+:;T@o; ;[ I"-def letters; sleep 5; /[A-Z][a-z]/; end ;TI"words = %w[abc def xyz] ;TI"start = Time.now ;TI":words.each {|word| word.match(/\A[#{letters}]+\z/) } ;TI"&Time.now - start # => 15.0174892 ;T;0o;;[I"With modifier +o+:;T@o; ;[I"start = Time.now ;TI";words.each {|word| word.match(/\A[#{letters}]+\z/o) } ;TI"%Time.now - start # => 5.0010866 ;T;0o;;[I"CNote that if the literal regexp does not have interpolations, ;TI"%the +o+ behavior is the default.;T@S;;i;I"Encodings;T@o;;[I"NBy default, a regexp with only US-ASCII characters has US-ASCII encoding:;T@o; ;[I"re = /foo/ ;TI"2re.source.encoding # => #<Encoding:US-ASCII> ;TI"2re.encoding # => #<Encoding:US-ASCII> ;T;0o;;[I"=A regular expression containing non-US-ASCII characters ;TI",is assumed to use the source encoding. ;TI"@This can be overridden with one of the following modifiers.;T@o;;;;[ o;;0;[o;;[I"I<tt>/_pat_/n</tt>: US-ASCII if only containing US-ASCII characters, ;TI"otherwise ASCII-8BIT:;T@o; ;[I"3/foo/n.encoding # => #<Encoding:US-ASCII> ;TI"5/foo\xff/n.encoding # => #<Encoding:ASCII-8BIT> ;TI"3/foo\x7f/n.encoding # => #<Encoding:US-ASCII> ;T;0o;;0;[o;;[I"<tt>/_pat_/u</tt>: UTF-8;T@o; ;[I",/foo/u.encoding # => #<Encoding:UTF-8> ;T;0o;;0;[o;;[I"<tt>/_pat_/e</tt>: EUC-JP;T@o; ;[I"-/foo/e.encoding # => #<Encoding:EUC-JP> ;T;0o;;0;[o;;[I"#<tt>/_pat_/s</tt>: Windows-31J;T@o; ;[I"2/foo/s.encoding # => #<Encoding:Windows-31J> ;T;0o;;[I"AA regexp can be matched against a target string when either:;T@o;;;;[o;;0;[o;;[I"!They have the same encoding.;To;;0;[o;;[ I">The regexp's encoding is a fixed encoding and the string ;TI"%contains only ASCII characters. ;TI">Method Regexp#fixed_encoding? returns whether the regexp ;TI"!has a <i>fixed</i> encoding.;T@o;;[I"?If a match between incompatible encodings is attempted an ;TI"?<tt>Encoding::CompatibilityError</tt> exception is raised.;T@o;;[I" Example:;T@o; ;[ I"6re = eval("# encoding: ISO-8859-1\n/foo\\xff?/") ;TI"=re.encoding # => #<Encoding:ISO-8859-1> ;TI"(re =~ "foo".encode("UTF-8") # => 0 ;TI"Gre =~ "foo\u0100" # Raises Encoding::CompatibilityError ;T;0o;;[I"MThe encoding may be explicitly fixed by including Regexp::FIXEDENCODING ;TI"+in the second argument for Regexp.new:;T@o; ;[I"(# Regexp with encoding ISO-8859-1. ;TI"Nre = Regexp.new("a".force_encoding('iso-8859-1'), Regexp::FIXEDENCODING) ;TI".re.encoding # => #<Encoding:ISO-8859-1> ;TI"*# Target string with encoding UTF-8. ;TI"s = "a\u3042" ;TI")s.encoding # => #<Encoding:UTF-8> ;TI"9re.match(s) # Raises Encoding::CompatibilityError. ;T;0S;;i;I" Timeouts;T@o;;[I"PWhen either a regexp source or a target string comes from untrusted input, ;TI"?malicious values could become a denial-of-service attack; ;TI"<to prevent such an attack, it is wise to set a timeout.;T@o;;[I"$\Regexp has two timeout values:;T@o;;;;[o;;0;[o;;[I"QA class default timeout, used for a regexp whose instance timeout is +nil+; ;TI"Othis default is initially +nil+, and may be set by method Regexp.timeout=:;T@o; ;[I"Regexp.timeout # => nil ;TI"Regexp.timeout = 3.0 ;TI"Regexp.timeout # => 3.0 ;T;0o;;0;[o;;[I"OAn instance timeout, which defaults to +nil+ and may be set in Regexp.new:;T@o; ;[I"*re = Regexp.new('foo', timeout: 5.0) ;TI"re.timeout # => 5.0 ;T;0o;;[I"RWhen regexp.timeout is +nil+, the timeout "falls through" to Regexp.timeout; ;TI"Fwhen regexp.timeout is non-+nil+, that value controls timing out:;T@o; ;[ I"S| regexp.timeout Value | Regexp.timeout Value | Result | ;TI"S|----------------------|----------------------|-----------------------------| ;TI"S| nil | nil | Never times out. | ;TI"S| nil | Float | Times out in Float seconds. | ;TI"S| Float | Any | Times out in Float seconds. | ;T;0S;;i;I"Optimization;T@o;;[I":For certain values of the pattern and target string, ;TI"Ymatching time can grow polynomially or exponentially in relation to the input size; ;TI"�the potential vulnerability arising from this is the {regular expression denial-of-service}[https://en.wikipedia.org/wiki/ReDoS] (ReDoS) attack.;T@o;;[I"J\Regexp matching can apply an optimization to prevent ReDoS attacks. ;TI"lWhen the optimization is applied, matching time increases linearly (not polynomially or exponentially) ;TI"Gin relation to the input size, and a ReDoS attach is not possible.;T@o;;[I"FThis optimization is applied if the pattern meets these criteria:;T@o;;;;[ o;;0;[o;;[I"No backreferences.;To;;0;[o;;[I"No subexpression calls.;To;;0;[o;;[I"3No nested lookaround anchors or atomic groups.;To;;0;[o;;[I"GNo nested quantifiers with counting (i.e. no nested <tt>{n}</tt>, ;TI"O<tt>{min,}</tt>, <tt>{,max}</tt>, or <tt>{min,max}</tt> style quantifiers);T@o;;[I"`You can use method Regexp.linear_time? to determine whether a pattern meets these criteria:;T@o; ;[I"-Regexp.linear_time?(/a*/) # => true ;TI"-Regexp.linear_time?('a*') # => true ;TI".Regexp.linear_time?(/(a*)\1/) # => false ;T;0o;;[I"UHowever, an untrusted source may not be safe even if the method returns +true+, ;TI"[because the optimization uses memoization (which may invoke large memory consumption).;T@S;;i;I"References;T@o;;[I"Read (online PDF books):;T@o;;;;[o;;0;[o;;[I"�{Mastering Regular Expressions}[https://ia902508.us.archive.org/10/items/allitebooks-02/Mastering%20Regular%20Expressions%2C%203rd%20Edition.pdf] ;TI"by Jeffrey E.F. Friedl.;To;;0;[o;;[I"{Regular Expressions Cookbook}[https://doc.lagout.org/programmation/Regular%20Expressions/Regular%20Expressions%20Cookbook_%20Detailed%20Solutions%20in%20Eight%20Programming%20Languages%20%282nd%20ed.%29%20%5BGoyvaerts%20%26%20Levithan%202012-09-06%5D.pdf] ;TI"(by Jan Goyvaerts & Steven Levithan.;T@o;;[I"/Explore, test (interactive online editor):;T@o;;;;[o;;0;[o;;[I"%{Rubular}[https://rubular.com/].;T; I" re.c;T; 0; 0; 0[ [ U:RDoc::Constant[i I"NOENCODING;TI"Regexp::NOENCODING;T:public0o;;[o;;[I"&see Regexp.options and Regexp.new;T@; @, ; 0I",ext/psych/lib/psych/visitors/to_ruby.rb;T@cRDoc::NormalClass0U;[i I"IGNORECASE;TI"Regexp::IGNORECASE;T;0o;;[o;;[I"&see Regexp.options and Regexp.new;T@; @, ; 0@, @@9 0U;[i I" EXTENDED;TI"Regexp::EXTENDED;T;0o;;[o;;[I"&see Regexp.options and Regexp.new;T@; @, ; 0@, @@9 0U;[i I"MULTILINE;TI"Regexp::MULTILINE;T;0o;;[o;;[I"&see Regexp.options and Regexp.new;T@; @, ; 0@, @@9 0U;[i I"FIXEDENCODING;TI"Regexp::FIXEDENCODING;T;0o;;[o;;[I"&see Regexp.options and Regexp.new;T@; @, ; 0@, @@9 0[ [[I" class;T[[;[[I"compile;TI" re.c;T[I"escape;T@g [I"json_create;TI"$ext/json/lib/json/add/regexp.rb;T[I"last_match;T@g [I"linear_time?;T@g [I"new;T@g [I" quote;T@g [I"timeout;T@g [I" timeout=;T@g [I"try_convert;T@g [I" union;T@g [:protected[ [:private[ [I" instance;T[[;[[I"==;T@g [I"===;T@g [I"=~;T@g [I"~;T@g [I"as_json;T@l [I"casefold?;T@g [I" encoding;T@g [I" eql?;T@g [I"fixed_encoding?;T@g [I" hash;T@g [I"inspect;T@g [I" match;T@g [I"match?;T@g [I"named_captures;T@g [I" names;T@g [I"options;T@g [I"source;T@g [I"timeout;T@g [I"to_json;T@l [I" to_s;T@g [;[ [;[ [ [U:RDoc::Context::Section[i 0o;;[ ; 0; 0[@ @8 I"lib/optparse.rb;TI"lib/prism/node_ext.rb;TI"0lib/rubygems/commands/dependency_command.rb;TI"lib/uri/rfc2396_parser.rb;T@, @, cRDoc::TopLevel