@@ -154,22 +154,41 @@ def self.parse_literal(input, **options)
154154 end
155155 end
156156
157+ # cache constants to optimize escaping the escape chars in self.unescape
158+ ESCAPE_CHARS_ESCAPED = ESCAPE_CHARS . each_with_object ( { } ) do |escape , memo |
159+ memo [ escape . inspect [ 1 ...-1 ] ] = escape
160+ end . freeze
161+ ESCAPE_CHARS_ESCAPED_REGEXP = Regexp . union (
162+ ESCAPE_CHARS_ESCAPED . keys
163+ ) . freeze
164+
157165 ##
158166 # @param [String] string
159167 # @return [String]
160168 # @see http://www.w3.org/TR/rdf-testcases/#ntrip_strings
161169 # @see http://blog.grayproductions.net/articles/understanding_m17n
162170 # @see http://yehudakatz.com/2010/05/17/encodings-unabridged/
163171 def self . unescape ( string )
164- string = string . dup . force_encoding ( Encoding ::UTF_8 )
172+ # Note: avoiding copying the input string when no escaping is needed
173+ # greatly reduces the number of allocations and the processing time.
174+ unless string . encoding == Encoding ::UTF_8
175+ string = string . dup . force_encoding ( Encoding ::UTF_8 )
176+ end
177+
178+ has_escape_chars = ESCAPE_CHARS_ESCAPED_REGEXP . match? ( string )
179+ has_uchar = UCHAR . match? ( string )
165180
166- # Decode \t|\n|\r|\"|\\ character escapes:
167- ESCAPE_CHARS . each { |escape | string . gsub! ( escape . inspect [ 1 ...-1 ] , escape ) }
181+ string = string . dup if has_escape_chars || has_uchar
182+
183+ # Decode \t|\n|\r|\"|\\ character escapes using Regexp:
184+ string . gsub! ( ESCAPE_CHARS_ESCAPED_REGEXP ) do
185+ ESCAPE_CHARS_ESCAPED . fetch ( $~[ 0 ] )
186+ end if has_escape_chars
168187
169188 # Decode \uXXXX and \UXXXXXXXX code points:
170189 string . gsub! ( UCHAR ) do
171190 [ ( $1 || $2) . hex ] . pack ( 'U*' )
172- end
191+ end if has_uchar
173192
174193 string
175194 end
0 commit comments