11# coding: utf-8
2+ # frozen_string_literal: true
23require 'cgi'
34
45module RDF
@@ -116,7 +117,17 @@ class URI
116117 # Note: not all reserved characters need to be escaped in SPARQL/Turtle, but they must be unescaped when encountered
117118 PN_ESCAPE_CHARS = /[~\. !\$ &'\( \) \* \+ ,;=\/ \? \# @%]/ . freeze
118119 PN_ESCAPES = /\\ #{ Regexp . union ( PN_ESCAPE_CHARS , /[\- _]/ ) } / . freeze
119-
120+
121+ # For URI encoding
122+ ENCODE_USER = Regexp . compile ( "[^#{ IUNRESERVED } #{ SUB_DELIMS } ]" ) . freeze
123+ ENCODE_PASSWORD = Regexp . compile ( "[^#{ IUNRESERVED } #{ SUB_DELIMS } ]" ) . freeze
124+ ENCODE_ISEGMENT = Regexp . compile ( "[^#{ IPCHAR } ]" ) . freeze
125+ ENCODE_ISEGMENT_NC = Regexp . compile ( "[^#{ IUNRESERVED } |#{ PCT_ENCODED } |[#{ SUB_DELIMS } ]|@]" ) . freeze
126+ ENCODE_IQUERY = Regexp . compile ( "[^#{ IQUERY } ]" ) . freeze
127+ ENCODE_IFRAGMENT = Regexp . compile ( "[^#{ IFRAGMENT } ]" ) . freeze
128+ ENCODE_PORT = Regexp . compile ( '[^\d]' ) . freeze
129+ ENCODE_IHOST = Regexp . compile ( "(?:#{ IP_literal } )|(?:#{ IREG_NAME } )" ) . freeze
130+
120131 ##
121132 # Cache size may be set through {RDF.config} using `uri_cache_size`.
122133 #
@@ -170,7 +181,7 @@ def self.parse(str)
170181 # @return [String] normalized path
171182 # @see http://tools.ietf.org/html/rfc3986#section-5.2.4
172183 def self . normalize_path ( path )
173- output , input = "" , path . to_s
184+ output , input = String . new , path . to_s
174185 if input . encoding != Encoding ::ASCII_8BIT
175186 input = input . dup . force_encoding ( Encoding ::ASCII_8BIT )
176187 end
@@ -353,7 +364,7 @@ def length
353364 # @return [Boolean] `true` or `false`
354365 # @since 0.3.9
355366 def valid?
356- RDF ::URI ::IRI . match ( to_s ) || false
367+ RDF ::URI ::IRI . match? ( to_s ) || false
357368 end
358369
359370 ##
@@ -920,7 +931,7 @@ def scheme=(value)
920931 # Return normalized version of scheme, if any
921932 # @return [String]
922933 def normalized_scheme
923- normalize_segment ( scheme . strip , SCHEME , true ) if scheme
934+ scheme . strip . downcase if scheme
924935 end
925936
926937 ##
@@ -946,7 +957,7 @@ def user=(value)
946957 # Normalized version of user
947958 # @return [String]
948959 def normalized_user
949- URI . encode ( CGI . unescape ( user ) , /[^ #{ IUNRESERVED } | #{ SUB_DELIMS } ]/ ) . force_encoding ( Encoding ::UTF_8 ) if user
960+ URI . encode ( CGI . unescape ( user ) , ENCODE_USER ) . force_encoding ( Encoding ::UTF_8 ) if user
950961 end
951962
952963 ##
@@ -972,7 +983,7 @@ def password=(value)
972983 # Normalized version of password
973984 # @return [String]
974985 def normalized_password
975- URI . encode ( CGI . unescape ( password ) , /[^ #{ IUNRESERVED } | #{ SUB_DELIMS } ]/ ) . force_encoding ( Encoding ::UTF_8 ) if password
986+ URI . encode ( CGI . unescape ( password ) , ENCODE_PASSWORD ) . force_encoding ( Encoding ::UTF_8 ) if password
976987 end
977988
978989 HOST_FROM_AUTHORITY_RE = /(?:[^@]+@)?([^:]+)(?::.*)?$/ . freeze
@@ -1000,7 +1011,7 @@ def host=(value)
10001011 # @return [String]
10011012 def normalized_host
10021013 # Remove trailing '.' characters
1003- normalize_segment ( host , IHOST , true ) . chomp ( '.' ) if host
1014+ host . sub ( / \. *$/ , '' ) . downcase if host
10041015 end
10051016
10061017 PORT_FROM_AUTHORITY_RE = /:(\d +)$/ . freeze
@@ -1028,12 +1039,8 @@ def port=(value)
10281039 # @return [String]
10291040 def normalized_port
10301041 if port
1031- np = normalize_segment ( port . to_s , PORT )
1032- if PORT_MAPPING [ normalized_scheme ] == np . to_i
1033- nil
1034- else
1035- np . to_i
1036- end
1042+ np = port . to_i
1043+ PORT_MAPPING [ normalized_scheme ] != np ? np : nil
10371044 end
10381045 end
10391046
@@ -1069,25 +1076,25 @@ def normalized_path
10691076 norm_segs = case
10701077 when authority
10711078 # ipath-abempty
1072- segments . map { |s | normalize_segment ( s , ISEGMENT ) }
1079+ segments . map { |s | normalize_segment ( s , ENCODE_ISEGMENT ) }
10731080 when segments [ 0 ] . nil?
10741081 # ipath-absolute
10751082 res = [ nil ]
1076- res << normalize_segment ( segments [ 1 ] , ISEGMENT_NZ ) if segments . length > 1
1077- res += segments [ 2 ..-1 ] . map { |s | normalize_segment ( s , ISEGMENT ) } if segments . length > 2
1083+ res << normalize_segment ( segments [ 1 ] , ENCODE_ISEGMENT ) if segments . length > 1
1084+ res += segments [ 2 ..-1 ] . map { |s | normalize_segment ( s , ENCODE_ISEGMENT ) } if segments . length > 2
10781085 res
10791086 when segments [ 0 ] . to_s . index ( ':' )
10801087 # ipath-noscheme
10811088 res = [ ]
1082- res << normalize_segment ( segments [ 0 ] , ISEGMENT_NZ_NC )
1083- res += segments [ 1 ..-1 ] . map { |s | normalize_segment ( s , ISEGMENT ) } if segments . length > 1
1089+ res << normalize_segment ( segments [ 0 ] , ENCODE_ISEGMENT_NC )
1090+ res += segments [ 1 ..-1 ] . map { |s | normalize_segment ( s , ENCODE_ISEGMENT ) } if segments . length > 1
10841091 res
10851092 when segments [ 0 ]
10861093 # ipath-rootless
10871094 # ipath-noscheme
10881095 res = [ ]
1089- res << normalize_segment ( segments [ 0 ] , ISEGMENT_NZ )
1090- res += segments [ 1 ..-1 ] . map { |s | normalize_segment ( s , ISEGMENT ) } if segments . length > 1
1096+ res << normalize_segment ( segments [ 0 ] , ENCODE_ISEGMENT )
1097+ res += segments [ 1 ..-1 ] . map { |s | normalize_segment ( s , ENCODE_ISEGMENT ) } if segments . length > 1
10911098 res
10921099 else
10931100 # Should be empty
@@ -1096,7 +1103,7 @@ def normalized_path
10961103
10971104 res = self . class . normalize_path ( norm_segs . join ( "/" ) )
10981105 # Special rules for specific protocols having empty paths
1099- normalize_segment ( res . empty? ? ( %w( http https ftp tftp ) . include? ( normalized_scheme ) ? '/' : "" ) : res , IHIER_PART )
1106+ res = ( res . empty? && %w( http https ftp tftp ) . include? ( normalized_scheme ) ) ? '/' : res
11001107 end
11011108
11021109 ##
@@ -1120,7 +1127,7 @@ def query=(value)
11201127 # Normalized version of query
11211128 # @return [String]
11221129 def normalized_query
1123- normalize_segment ( query , IQUERY ) if query
1130+ normalize_segment ( query , ENCODE_IQUERY ) if query
11241131 end
11251132
11261133 ##
@@ -1144,7 +1151,7 @@ def fragment=(value)
11441151 # Normalized version of fragment
11451152 # @return [String]
11461153 def normalized_fragment
1147- normalize_segment ( fragment , IFRAGMENT ) if fragment
1154+ normalize_segment ( fragment , ENCODE_IFRAGMENT ) if fragment
11481155 end
11491156
11501157 ##
@@ -1274,15 +1281,15 @@ def query_values=(value)
12741281 self . query = case value
12751282 when Array , Hash
12761283 value . map do |( k , v ) |
1277- k = normalize_segment ( k . to_s , UNRESERVED )
1284+ k = normalize_segment ( k . to_s , /[^A-Za-z0-9 \. _~-]/ )
12781285 if v . nil?
12791286 k
12801287 else
12811288 Array ( v ) . map do |vv |
12821289 if vv === TrueClass
12831290 k
12841291 else
1285- "#{ k } =#{ normalize_segment ( vv . to_s , UNRESERVED ) } "
1292+ "#{ k } =#{ normalize_segment ( vv . to_s , /[^A-Za-z0-9 \. _~-]/ ) } "
12861293 end
12871294 end . join ( "&" )
12881295 end
@@ -1331,15 +1338,15 @@ def self._load(data)
13311338 # Normalize a segment using a character range
13321339 #
13331340 # @param [String] value
1334- # @param [Regexp] expr
1341+ # @param [Regexp] expr matches characters to be encoded
13351342 # @param [Boolean] downcase
13361343 # @return [String]
13371344 def normalize_segment ( value , expr , downcase = false )
13381345 if value
13391346 value = value . dup . force_encoding ( Encoding ::UTF_8 )
13401347 decoded = CGI . unescape ( value )
13411348 decoded . downcase! if downcase
1342- URI . encode ( decoded , /[^(?: #{ expr } )]/ ) . force_encoding ( Encoding ::UTF_8 )
1349+ URI . encode ( decoded , expr ) . force_encoding ( Encoding ::UTF_8 )
13431350 end
13441351 end
13451352
@@ -1364,7 +1371,7 @@ def format_authority
13641371 def self . encode ( str , expr )
13651372 str . gsub ( expr ) do
13661373 us = $&
1367- tmp = ''
1374+ tmp = String . new
13681375 us . each_byte do |uc |
13691376 tmp << sprintf ( '%%%02X' , uc )
13701377 end
0 commit comments