D7net
Home
Console
Upload
information
Create File
Create Folder
About
Tools
:
/
opt
/
alt
/
ruby34
/
share
/
ri
/
system
/
Filename :
page-encodings_rdoc.ri
back
Copy
U:RDoc::TopLevel[ i I"encodings.rdoc:ETcRDoc::Parser::Simpleo:RDoc::Markup::Document:@parts[�S:RDoc::Markup::Heading: leveli: textI"Encodings;To:RDoc::Markup::BlankLine S; ; i;I"The Basics;T@ o:RDoc::Markup::Paragraph;[I"OA {character encoding}[https://en.wikipedia.org/wiki/Character_encoding], ;TI"9often shortened to _encoding_, is a mapping between:;T@ o:RDoc::Markup::List: @type:BULLET:@items[o:RDoc::Markup::ListItem:@label0;[o; ;[I"HA sequence of 8-bit bytes (each byte in the range <tt>0..255</tt>).;To;;0;[o; ;[I",Characters in a specific character set.;T@ o; ;[I"9Some character sets contain only 1-byte characters; ;TI"^{US-ASCII}[https://en.wikipedia.org/wiki/ASCII], for example, has 256 1-byte characters. ;TI"WThis string, encoded in US-ASCII, has six characters that are stored as six bytes:;T@ o:RDoc::Markup::Verbatim;[I"4s = 'Hello!'.encode('US-ASCII') # => "Hello!" ;TI"@s.encoding # => #<Encoding:US-ASCII> ;TI"Hs.bytes # => [72, 101, 108, 108, 111, 33] ;T:@format0o; ;[ I"8Other encodings may involve multi-byte characters. ;TI"@{UTF-8}[https://en.wikipedia.org/wiki/UTF-8], for example, ;TI"Sencodes more than one million characters, encoding each in one to four bytes. ;TI"KThe lowest-valued of these characters correspond to ASCII characters, ;TI""and so are 1-byte characters:;T@ o;;[I" s = 'Hello!' # => "Hello!" ;TI"4s.bytes # => [72, 101, 108, 108, 111, 33] ;T;0o; ;[I"?Other characters, such as the Euro symbol, are multi-byte:;T@ o;;[I"s = "\u20ac" # => "€" ;TI"'s.bytes # => [226, 130, 172] ;T;0S; ; i;I"The \Encoding \Class;T@ S; ; i;I"\Encoding Objects;T@ o; ;[I"ARuby encodings are defined by constants in class \Encoding. ;TI"NThere can be only one instance of \Encoding for each of these constants. ;TI"Y\Method Encoding.list returns an array of \Encoding objects (one for each constant):;T@ o;;[ I"(Encoding.list.size # => 103 ;TI"-Encoding.list.first.class # => Encoding ;TI"Encoding.list.take(3) ;TI"L# => [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>, #<Encoding:US-ASCII>] ;T;0S; ; i;I"Names and Aliases;T@ o; ;[I"<\Method Encoding#name returns the name of an \Encoding:;T@ o;;[I"2Encoding::ASCII_8BIT.name # => "ASCII-8BIT" ;TI"3Encoding::WINDOWS_31J.name # => "Windows-31J" ;T;0o; ;[I"3An \Encoding object has zero or more aliases; ;TI"Pmethod Encoding#names returns an array containing the name and all aliases:;T@ o;;[ I" Encoding::ASCII_8BIT.names ;TI"## => ["ASCII-8BIT", "BINARY"] ;TI"!Encoding::WINDOWS_31J.names ;TI"A#=> ["Windows-31J", "CP932", "csWindows31J", "SJIS", "PCK"] ;T;0o; ;[I"E\Method Encoding.aliases returns a hash of all alias/name pairs:;T@ o;;[I"#Encoding.aliases.size # => 71 ;TI"Encoding.aliases.take(3) ;TI"O# => [["BINARY", "ASCII-8BIT"], ["CP437", "IBM437"], ["CP720", "IBM720"]] ;T;0o; ;[I"W\Method Encoding.name_list returns an array of all the encoding names and aliases:;T@ o;;[I"&Encoding.name_list.size # => 175 ;TI" Encoding.name_list.take(3) ;TI".# => ["ASCII-8BIT", "UTF-8", "US-ASCII"] ;T;0o; ;[I"A\Method +name_list+ returns more entries than method +list+ ;TI":because it includes both the names and their aliases.;T@ o; ;[I"Y\Method Encoding.find returns the \Encoding for a given name or alias, if it exists:;T@ o;;[I"?Encoding.find("US-ASCII") # => #<Encoding:US-ASCII> ;TI"3Encoding.find("US-ASCII").class # => Encoding ;T;0S; ; i;I"Default Encodings;T@ o; ;[I"D\Method Encoding.find, above, also returns a default \Encoding ;TI"%for each of these special names:;T@ o;;;;[ o;;0;[o; ;[I"0+external+: the default external \Encoding:;T@ o;;[I"6Encoding.find("external") # => #<Encoding:UTF-8> ;T;0o;;0;[o; ;[I"?+internal+: the default internal \Encoding (may be +nil+):;T@ o;;[I"(Encoding.find("internal") # => nil ;T;0o;;0;[o; ;[I"G+locale+: the default \Encoding for a string from the environment:;T@ o;;[I"=Encoding.find("locale") # => #<Encoding:UTF-8> # Linux ;TI"?Encoding.find("locale") # => #<Encoding:IBM437> # Windows ;T;0o;;0;[o; ;[I"J+filesystem+: the default \Encoding for a string from the filesystem:;T@ o;;[I"8Encoding.find("filesystem") # => #<Encoding:UTF-8> ;T;0o; ;[I"N\Method Encoding.default_external returns the default external \Encoding:;T@ o;;[I"6Encoding.default_external # => #<Encoding:UTF-8> ;T;0o; ;[I"8\Method Encoding.default_external= sets that value:;T@ o;;[I"<Encoding.default_external = 'US-ASCII' # => "US-ASCII" ;TI"FEncoding.default_external # => #<Encoding:US-ASCII> ;T;0o; ;[I"N\Method Encoding.default_internal returns the default internal \Encoding:;T@ o;;[I"(Encoding.default_internal # => nil ;T;0o; ;[I"L\Method Encoding.default_internal= sets the default internal \Encoding:;T@ o;;[I"<Encoding.default_internal = 'US-ASCII' # => "US-ASCII" ;TI"FEncoding.default_internal # => #<Encoding:US-ASCII> ;T;0S; ; i;I"Compatible Encodings;T@ o; ;[I"\\Method Encoding.compatible? returns whether two given objects are encoding-compatible ;TI"2(that is, whether they can be concatenated); ;TI"Preturns the \Encoding of the concatenated string, or +nil+ if incompatible:;T@ o;;[I"!rus = "\u{442 435 441 442}" ;TI"eng = 'text' ;TI";Encoding.compatible?(rus, eng) # => #<Encoding:UTF-8> ;TI" ;TI"Bs0 = "\xa1\xa1".force_encoding('iso-8859-1') # => "\xA1\xA1" ;TI"Bs1 = "\xa1\xa1".force_encoding('euc-jp') # => "\x{A1A1}" ;TI";Encoding.compatible?(s0, s1) # => nil ;T;0S; ; i;I"\String \Encoding;T@ o; ;[I"RA Ruby String object has an encoding that is an instance of class \Encoding. ;TI"=The encoding may be retrieved by method String#encoding.;T@ o; ;[I"GThe default encoding for a string literal is the script encoding; ;TI"Dsee {Script Encoding}[rdoc-ref:encodings.rdoc@Script+Encoding].;T@ o;;[I")'s'.encoding # => #<Encoding:UTF-8> ;T;0o; ;[I"IThe default encoding for a string created with method String.new is:;T@ o;;;;[o;;0;[o; ;[I"@For a \String object argument, the encoding of that string.;To;;0;[o; ;[I"0For a string literal, the script encoding; ;TI"Dsee {Script Encoding}[rdoc-ref:encodings.rdoc@Script+Encoding].;T@ o; ;[I"3In either case, any encoding may be specified:;T@ o;;[ I";s = String.new(encoding: 'UTF-8') # => "" ;TI"Js.encoding # => #<Encoding:UTF-8> ;TI">s = String.new('foo', encoding: 'ASCII-8BIT') # => "foo" ;TI"Os.encoding # => #<Encoding:ASCII-8BIT> ;T;0o; ;[I".The encoding for a string may be changed:;T@ o;;[ I"4s = "R\xC3\xA9sum\xC3\xA9" # => "Résumé" ;TI";s.encoding # => #<Encoding:UTF-8> ;TI"@s.force_encoding('ISO-8859-1') # => "R\xC3\xA9sum\xC3\xA9" ;TI"@s.encoding # => #<Encoding:ISO-8859-1> ;T;0o; ;[I"NChanging the assigned encoding does not alter the content of the string; ;TI">it changes only the way the content is to be interpreted:;T@ o;;[I";s # => "R\xC3\xA9sum\xC3\xA9" ;TI"/s.force_encoding('UTF-8') # => "Résumé" ;T;0o; ;[I"9The actual content of a string may also be altered; ;TI"=see {Transcoding a String}[#label-Transcoding+a+String].;T@ o; ;[I"/Here are a couple of useful query methods:;T@ o;;[I":s = "abc".force_encoding("UTF-8") # => "abc" ;TI"9s.ascii_only? # => true ;TI"=s = "abc\u{6666}".force_encoding("UTF-8") # => "abc晦" ;TI":s.ascii_only? # => false ;TI" ;TI"6s = "\xc2\xa1".force_encoding("UTF-8") # => "¡" ;TI"6s.valid_encoding? # => true ;TI"8s = "\xc2".force_encoding("UTF-8") # => "\xC2" ;TI"7s.valid_encoding? # => false ;T;0S; ; i;I""\Symbol and \Regexp Encodings;T@ o; ;[I"JThe string stored in a Symbol or Regexp object also has an encoding; ;TI"Pthe encoding may be retrieved by method Symbol#encoding or Regexp#encoding.;T@ o; ;[I"1The default encoding for these, however, is:;T@ o;;;;[o;;0;[o; ;[I".US-ASCII, if all characters are US-ASCII.;To;;0;[o; ;[I"%The script encoding, otherwise; ;TI"Dsee (Script Encoding)[rdoc-ref:encodings.rdoc@Script+Encoding].;T@ S; ; i;I"Filesystem \Encoding;T@ o; ;[I"WThe filesystem encoding is the default \Encoding for a string from the filesystem:;T@ o;;[I"8Encoding.find("filesystem") # => #<Encoding:UTF-8> ;T;0S; ; i;I"Locale \Encoding;T@ o; ;[I"TThe locale encoding is the default encoding for a string from the environment, ;TI"$other than from the filesystem:;T@ o;;[I"5Encoding.find('locale') # => #<Encoding:IBM437> ;T;0S; ; i;I"Stream Encodings;T@ o; ;[I"WCertain stream objects can have two encodings; these objects include instances of:;T@ o;;;;[ o;;0;[o; ;[I"IO.;To;;0;[o; ;[I" File.;To;;0;[o; ;[I" ARGF.;To;;0;[o; ;[I"StringIO.;T@ o; ;[I"The two encodings are:;T@ o;;;;[o;;0;[o; ;[I"KAn _external_ _encoding_, which identifies the encoding of the stream.;To;;0;[o; ;[I"KAn _internal_ _encoding_, which (if not +nil+) specifies the encoding ;TI";to be used for the string constructed from the stream.;T@ S; ; i;I"External \Encoding;T@ o; ;[I"SThe external encoding, which is an \Encoding object, specifies how bytes read ;TI"9from the stream are to be interpreted as characters.;T@ o; ;[I"&The default external encoding is:;T@ o;;;;[o;;0;[o; ;[I"UTF-8 for a text stream.;To;;0;[o; ;[I"$ASCII-8BIT for a binary stream.;T@ o; ;[I"TThe default external encoding is returned by method Encoding.default_external, ;TI"and may be set by:;T@ o;;;;[o;;0;[o; ;[I"KRuby command-line options <tt>--external_encoding</tt> or <tt>-E</tt>.;T@ o; ;[I"]You can also set the default external encoding using method Encoding.default_external=, ;TI"Rbut doing so may cause problems; strings created before and after the change ;TI"$may have a different encodings.;T@ o; ;[I"EFor an \IO or \File object, the external encoding may be set by:;T@ o;;;;[o;;0;[o; ;[I"QOpen options +external_encoding+ or +encoding+, when the object is created; ;TI"2see {Open Options}[rdoc-ref:IO@Open+Options].;T@ o; ;[I"XFor an \IO, \File, \ARGF, or \StringIO object, the external encoding may be set by:;T@ o;;;;[o;;0;[o; ;[I"I\Methods +set_encoding+ or (except for \ARGF) +set_encoding_by_bom+.;T@ S; ; i;I"Internal \Encoding;T@ o; ;[ I"CThe internal encoding, which is an \Encoding object or +nil+, ;TI"3specifies how characters read from the stream ;TI"Aare to be converted to characters in the internal encoding; ;TI"Uthose characters become a string whose encoding is set to the internal encoding.;T@ o; ;[I"=The default internal encoding is +nil+ (no conversion). ;TI"9It is returned by method Encoding.default_internal, ;TI"and may be set by:;T@ o;;;;[o;;0;[o; ;[I"KRuby command-line options <tt>--internal_encoding</tt> or <tt>-E</tt>.;T@ o; ;[I"]You can also set the default internal encoding using method Encoding.default_internal=, ;TI"Rbut doing so may cause problems; strings created before and after the change ;TI"$may have a different encodings.;T@ o; ;[I"EFor an \IO or \File object, the internal encoding may be set by:;T@ o;;;;[o;;0;[o; ;[I"QOpen options +internal_encoding+ or +encoding+, when the object is created; ;TI"2see {Open Options}[rdoc-ref:IO@Open+Options].;T@ o; ;[I"XFor an \IO, \File, \ARGF, or \StringIO object, the internal encoding may be set by:;T@ o;;;;[o;;0;[o; ;[I"\Method +set_encoding+.;T@ S; ; i;I"Script \Encoding;T@ o; ;[I"DA Ruby script has a script encoding, which may be retrieved by:;T@ o;;[I")__ENCODING__ # => #<Encoding:UTF-8> ;T;0o; ;[ I"+The default script encoding is UTF-8; ;TI"Ia Ruby source file may set its script encoding with a magic comment ;TI"Yon the first line of the file (or second line, if there is a shebang on the first). ;TI"?The comment must contain the word +coding+ or +encoding+, ;TI"?followed by a colon, space and the Encoding name or alias:;T@ o;;[I"# encoding: ISO-8859-1 ;TI"-__ENCODING__ #=> #<Encoding:ISO-8859-1> ;T;0S; ; i;I"Transcoding;T@ o; ;[I"G_Transcoding_ is the process of changing a sequence of characters ;TI""from one encoding to another.;T@ o; ;[I"9As far as possible, the characters remain the same, ;TI"2but the bytes that represent them may change.;T@ o; ;[I"XThe handling for characters that cannot be represented in the destination encoding ;TI"+may be specified by @Encoding+Options.;T@ S; ; i;I"Transcoding a \String;T@ o; ;[I"/Each of these methods transcodes a string:;T@ o;;;;[o;;0;[o; ;[I"8String#encode: Transcodes +self+ into a new string ;TI".according to given encodings and options.;To;;0;[o; ;[I"HString#encode!: Like String#encode, but transcodes +self+ in place.;To;;0;[o; ;[I"7String#scrub: Transcodes +self+ into a new string ;TI"Tby replacing invalid byte sequences with a given or default replacement string.;To;;0;[o; ;[I"FString#scrub!: Like String#scrub, but transcodes +self+ in place.;To;;0;[o; ;[I"CString#unicode_normalize: Transcodes +self+ into a new string ;TI"(according to Unicode normalization.;To;;0;[o; ;[I"?String#unicode_normalize!: Like String#unicode_normalize, ;TI"$but transcodes +self+ in place.;T@ S; ; i;I"Transcoding a Stream;T@ o; ;[I"3Each of these methods may transcode a stream; ;TI"Gwhether it does so depends on the external and internal encodings:;T@ o;;;;[ o;;0;[o; ;[I"?IO.foreach: Yields each line of given stream to the block.;To;;0;[o; ;[I"XIO.new: Creates and returns a new \IO object for the given integer file descriptor.;To;;0;[o; ;[I"'IO.open: Creates a new \IO object.;To;;0;[o; ;[I"HIO.pipe: Creates a connected pair of reader and writer \IO objects.;To;;0;[o; ;[I"CIO.popen: Creates an \IO object to interact with a subprocess.;To;;0;[o; ;[I"SIO.read: Returns a string with all or a subset of bytes from the given stream.;To;;0;[o; ;[I"ZIO.readlines: Returns an array of strings, which are the lines from the given stream.;To;;0;[o; ;[I"9IO.write: Writes a given string to the given stream.;T@ o; ;[I"HThis example writes a string to a file, encoding it as ISO-8859-1, ;TI"Athen reads the file into a new string, encoding it as UTF-8:;T@ o;;[I"s = "R\u00E9sum\u00E9" ;TI"path = 't.tmp' ;TI"ext_enc = 'ISO-8859-1' ;TI"int_enc = 'UTF-8' ;TI" ;TI"5File.write(path, s, external_encoding: ext_enc) ;TI"#raw_text = File.binread(path) ;TI" ;TI"_transcoded_text = File.read(path, external_encoding: ext_enc, internal_encoding: int_enc) ;TI" ;TI"p raw_text ;TI"p transcoded_text ;T;0o; ;[I"Output:;T@ o;;[I""R\xE9sum\xE9" ;TI""Résumé" ;T;0S; ; i;I"\Encoding Options;T@ o; ;[I"WA number of methods in the Ruby core accept keyword arguments as encoding options.;T@ o; ;[ I"QSome of the options specify or utilize a _replacement_ _string_, to be used ;TI"(in certain transcoding operations. ;TI"GA replacement string may be in any encoding that can be converted ;TI"/to the encoding of the destination string.;T@ o; ;[I"8These keyword-value pairs specify encoding options:;T@ o;;;;[o;;0;[o; ;[I""For an invalid byte sequence:;T@ o;;;;[o;;0;[o; ;[I"7<tt>:invalid: nil</tt> (default): Raise exception.;To;;0;[o; ;[I"E<tt>:invalid: :replace</tt>: Replace each invalid byte sequence ;TI"!with the replacement string.;T@ o; ;[I"Examples:;T@ o;;[I"s = "\x80foo\x80" ;TI"Is.encode('ISO-8859-3') # Raises Encoding::InvalidByteSequenceError. ;TI"<s.encode('ISO-8859-3', invalid: :replace) # => "?foo?" ;T;0o;;0;[o; ;[I" For an undefined character:;T@ o;;;;[o;;0;[o; ;[I"5<tt>:undef: nil</tt> (default): Raise exception.;To;;0;[o; ;[I"A<tt>:undef: :replace</tt>: Replace each undefined character ;TI"!with the replacement string.;T@ o; ;[I"Examples:;T@ o;;[I"s = "\x80foo\x80" ;TI"W"\x80".encode('UTF-8', 'ASCII-8BIT') # Raises Encoding::UndefinedConversionError. ;TI"Gs.encode('UTF-8', 'ASCII-8BIT', undef: :replace) # => "�foo�" ;T;0o;;0;[o; ;[I"Replacement string:;T@ o;;;;[o;;0;[o; ;[I"P<tt>:replace: nil</tt> (default): Set replacement string to default value: ;TI"N<tt>"\uFFFD"</tt> ("�") for a Unicode encoding, <tt>'?'</tt> otherwise.;To;;0;[o; ;[I"Z<tt>:replace: _some_string_</tt>: Set replacement string to the given +some_string+; ;TI"overrides +:fallback+.;T@ o; ;[I"Examples:;T@ o;;[I"s = "\xA5foo\xA5" ;TI"9options = {:undef => :replace, :replace => 'xyzzy'} ;TI"Es.encode('UTF-8', 'ISO-8859-3', **options) # => "xyzzyfooxyzzy" ;T;0o;;0;[ o; ;[I"Replacement fallback:;T@ o; ;[I"#One of these may be specified:;T@ o;;;;[ o;;0;[o; ;[I"@<tt>:fallback: nil</tt> (default): No replacement fallback.;To;;0;[o; ;[I"S<tt>:fallback: _hash_like_object_</tt>: Set replacement fallback to the given ;TI"R+hash_like_object+; the replacement string is <tt>_hash_like_object_[X]</tt>.;To;;0;[o; ;[I"I<tt>:fallback: _method_</tt>: Set replacement fallback to the given ;TI">+method+; the replacement string is <tt>_method_(X)</tt>.;To;;0;[o; ;[I"G<tt>:fallback: _proc_</tt>: Set replacement fallback to the given ;TI":+proc+; the replacement string is <tt>_proc_[X]</tt>.;T@ o; ;[I"Examples:;T@ o;;[I"s = "\u3042foo\u3043" ;TI" ;TI""hash = {"\u3042" => 'xyzzy'} ;TI"hash.default = 'XYZZY' ;TI"<s.encode('ASCII', fallback: hash) # => "xyzzyfooXYZZY" ;TI" ;TI")def (fallback = "U+%.4X").escape(x) ;TI" self % x.unpack("U") ;TI" end ;TI"U"\u{3042}".encode("US-ASCII", fallback: fallback.method(:escape)) # => "U+3042" ;TI" ;TI">proc = Proc.new {|x| x == "\u3042" ? 'xyzzy' : 'XYZZY' } ;TI"<s.encode('ASCII', fallback: proc) # => "XYZZYfooXYZZY" ;T;0o;;0;[ o; ;[I"XML entities:;T@ o; ;[I"#One of these may be specified:;T@ o;;;;[o;;0;[o; ;[I"@<tt>:xml: nil</tt> (default): No handling for XML entities.;To;;0;[o; ;[ I"5<tt>:xml: :text</tt>: Treat source text as XML; ;TI"&replace each undefined character ;TI"Awith its upper-case hexdecimal numeric character reference, ;TI"except that:;T@ o;;;;[o;;0;[o; ;[I"0<tt>&</tt> is replaced with <tt>&</tt>.;To;;0;[o; ;[I"/<tt><</tt> is replaced with <tt><</tt>.;To;;0;[o; ;[I"/<tt>></tt> is replaced with <tt>></tt>.;T@ o;;0;[o; ;[ I"E<tt>:xml: :attr</tt>: Treat source text as XML attribute value; ;TI"&replace each undefined character ;TI"Awith its upper-case hexdecimal numeric character reference, ;TI"except that:;T@ o;;;;[ o;;0;[o; ;[I"GThe replacement string <tt>r</tt> is double-quoted (<tt>"r"</tt>).;To;;0;[o; ;[I"AEach embedded double-quote is replaced with <tt>"</tt>.;To;;0;[o; ;[I"0<tt>&</tt> is replaced with <tt>&</tt>.;To;;0;[o; ;[I"/<tt><</tt> is replaced with <tt><</tt>.;To;;0;[o; ;[I"/<tt>></tt> is replaced with <tt>></tt>.;T@ o; ;[I"Examples:;T@ o;;[I""s = 'foo"<&>"bar' + "\u3042" ;TI"Js.encode('ASCII', xml: :text) # => "foo\"<&>\"barあ" ;TI"Vs.encode('ASCII', xml: :attr) # => "\"foo"<&>"barあ\"" ;T;0o;;0;[ o; ;[I"Newlines:;T@ o; ;[I"#One of these may be specified:;T@ o;;;;[o;;0;[o; ;[I"R<tt>:cr_newline: true</tt>: Replace each line-feed character (<tt>"\n"</tt>) ;TI"6with a carriage-return character (<tt>"\r"</tt>).;To;;0;[o; ;[I"T<tt>:crlf_newline: true</tt>: Replace each line-feed character (<tt>"\n"</tt>) ;TI"?with a carriage-return/line-feed string (<tt>"\r\n"</tt>).;To;;0;[o; ;[I"E<tt>:universal_newline: true</tt>: Replace each carriage-return ;TI"Icharacter (<tt>"\r"</tt>) and each carriage-return/line-feed string ;TI"B(<tt>"\r\n"</tt>) with a line-feed character (<tt>"\n"</tt>).;T@ o; ;[I"Examples:;T@ o;;[ I"Bs = "\n \r \r\n" # => "\n \r \r\n" ;TI"Bs.encode('ASCII', cr_newline: true) # => "\r \r \r\r" ;TI"Fs.encode('ASCII', crlf_newline: true) # => "\r\n \r \r\r\n" ;TI"?s.encode('ASCII', universal_newline: true) # => "\n \n \n";T;0: @file@:0@omit_headings_from_table_of_contents_below0