@@ -29,27 +29,27 @@ class URI
2929 include RDF ::Resource
3030
3131 # IRI components
32- UCSCHAR = Regexp . compile ( <<-EOS . gsub ( / \s +/ , '' ) )
33- [ \\ u00A0-\\ uD7FF]|[ \\ uF900-\\ uFDCF]|[ \\ uFDF0-\\ uFFEF]|
34- [ \\ u{10000}-\\ u{1FFFD}]|[ \\ u{20000}-\\ u{2FFFD}]|[ \\ u{30000}-\\ u{3FFFD}]|
35- [ \\ u{40000}-\\ u{4FFFD}]|[ \\ u{50000}-\\ u{5FFFD}]|[ \\ u{60000}-\\ u{6FFFD}]|
36- [ \\ u{70000}-\\ u{7FFFD}]|[ \\ u{80000}-\\ u{8FFFD}]|[ \\ u{90000}-\\ u{9FFFD}]|
37- [ \\ u{A0000}-\\ u{AFFFD}]|[ \\ u{B0000}-\\ u{BFFFD}]|[ \\ u{C0000}-\\ u{CFFFD}]|
38- [ \\ u{D0000}-\\ u{DFFFD}]|[ \\ u{E1000}-\\ u{EFFFD}]
39- EOS
40- IPRIVATE = Regexp . compile ( "[\\ uE000-\\ uF8FF]|[ \\ u{F0000}-\\ u{FFFFD}]|[ \\ u100000 -\\ u10FFFD ]" ) . freeze
32+ UCSCHAR = %(
33+ \\ u00A0-\\ uD7FF\\ uF900-\\ uFDCF\\ uFDF0-\\ uFFEF
34+ \\ u{10000}-\\ u{1FFFD}\\ u{20000}-\\ u{2FFFD}\\ u{30000}-\\ u{3FFFD}
35+ \\ u{40000}-\\ u{4FFFD}\\ u{50000}-\\ u{5FFFD}\\ u{60000}-\\ u{6FFFD}
36+ \\ u{70000}-\\ u{7FFFD}\\ u{80000}-\\ u{8FFFD}\\ u{90000}-\\ u{9FFFD}
37+ \\ u{A0000}-\\ u{AFFFD}\\ u{B0000}-\\ u{BFFFD}\\ u{C0000}-\\ u{CFFFD}
38+ \\ u{D0000}-\\ u{DFFFD}\\ u{E1000}-\\ u{EFFFD}
39+ ) . gsub ( / \s +/ , '' )
40+ IPRIVATE = Regexp . compile ( "[\\ uE000-\\ uF8FF\\ u{F0000}-\\ u{FFFFD}\\ u{100000} -\\ u{10FFFD} ]" ) . freeze
4141 SCHEME = Regexp . compile ( "[A-Za-z](?:[A-Za-z0-9+-\. ])*" ) . freeze
4242 PORT = Regexp . compile ( "[0-9]*" ) . freeze
4343 IP_literal = Regexp . compile ( "\\ [[0-9A-Fa-f:\\ .]*\\ ]" ) . freeze # Simplified, no IPvFuture
4444 PCT_ENCODED = Regexp . compile ( "%[0-9A-Fa-f][0-9A-Fa-f]" ) . freeze
45- GEN_DELIMS = Regexp . compile ( " [:/\\ ? \\ # \\ [ \\ ]@]" ) . freeze
46- SUB_DELIMS = Regexp . compile ( " [!\\ $&'\\ ( \\ ) \\ * \\ +,;=]" ) . freeze
47- RESERVED = Regexp . compile ( "(?: #{ GEN_DELIMS } | #{ SUB_DELIMS } )" ) . freeze
45+ GEN_DELIMS = Regexp . compile ( %q{ [:/\?\#\[\ ]@]} ) . freeze
46+ SUB_DELIMS = Regexp . compile ( %q{ [!\$&'\(\)\*\ +,;=]} ) . freeze
47+ RESERVED = Regexp . union ( GEN_DELIMS , SUB_DELIMS ) . freeze
4848 UNRESERVED = Regexp . compile ( "[A-Za-z0-9\. _~-]" ) . freeze
4949
50- IUNRESERVED = Regexp . compile ( "[A-Za-z0-9 \. _~-]| #{ UCSCHAR } " ) . freeze
50+ IUNRESERVED = Regexp . union ( UNRESERVED , Regexp . compile ( "[#{ UCSCHAR } ]" ) ) . freeze
5151
52- IPCHAR = Regexp . compile ( "(?: #{ IUNRESERVED } | #{ PCT_ENCODED } | #{ SUB_DELIMS } | :|@)" ) . freeze
52+ IPCHAR = Regexp . union ( IUNRESERVED , PCT_ENCODED , SUB_DELIMS , /[ :|@]/ ) . freeze
5353
5454 IQUERY = Regexp . compile ( "(?:#{ IPCHAR } |#{ IPRIVATE } |/|\\ ?)*" ) . freeze
5555
@@ -66,7 +66,7 @@ class URI
6666 IPATH_EMPTY = Regexp . compile ( "" ) . freeze
6767
6868 IREG_NAME = Regexp . compile ( "(?:(?:#{ IUNRESERVED } )|(?:#{ PCT_ENCODED } )|(?:#{ SUB_DELIMS } ))*" ) . freeze
69- IHOST = Regexp . compile ( "(?: #{ IP_literal } )|(?: #{ IREG_NAME } )" ) . freeze
69+ IHOST = Regexp . union ( IP_literal , IREG_NAME ) . freeze
7070 IUSERINFO = Regexp . compile ( "(?:(?:#{ IUNRESERVED } )|(?:#{ PCT_ENCODED } )|(?:#{ SUB_DELIMS } )|:)*" ) . freeze
7171 IAUTHORITY = Regexp . compile ( "(?:#{ IUSERINFO } @)?#{ IHOST } (?::#{ PORT } )?" ) . freeze
7272
@@ -119,14 +119,18 @@ class URI
119119 PN_ESCAPES = /\\ #{ Regexp . union ( PN_ESCAPE_CHARS , /[\- _]/ ) } / . freeze
120120
121121 # For URI encoding
122- ENCODE_USER = Regexp . compile ( "[^#{ IUNRESERVED } #{ SUB_DELIMS } ]" ) . freeze
123- ENCODE_PASSWORD = Regexp . compile ( "[^#{ IUNRESERVED } #{ SUB_DELIMS } ]" ) . freeze
124- ENCODE_ISEGMENT = Regexp . compile ( "[^#{ IPCHAR } ]" ) . freeze
125- ENCODE_ISEGMENT_NC = Regexp . compile ( "[^#{ IUNRESERVED } |#{ PCT_ENCODED } |[#{ SUB_DELIMS } ]|@]" ) . freeze
126- ENCODE_IQUERY = Regexp . compile ( "[^#{ IQUERY } ]" ) . freeze
127- ENCODE_IFRAGMENT = Regexp . compile ( "[^#{ IFRAGMENT } ]" ) . freeze
128- ENCODE_PORT = Regexp . compile ( '[^\d]' ) . freeze
129- ENCODE_IHOST = Regexp . compile ( "(?:#{ IP_literal } )|(?:#{ IREG_NAME } )" ) . freeze
122+ # iuserinfo = *( iunreserved / pct-encoded / sub-delims / ":" )
123+ ENCODE_USER =
124+ ENCODE_PASSWORD = Regexp . compile ( "[^A-Za-z0-9\. _~#{ UCSCHAR } !$&'\( \) \* \+ ,;=:-]" ) . freeze
125+ # isegment = *ipchar
126+ # ipchar = iunreserved / pct-encoded / sub-delims / ":" / "@"
127+ ENCODE_ISEGMENT = Regexp . compile ( "[^A-Za-z0-9\. _~#{ UCSCHAR } !$&'\( \) \* \+ ,;=:-]" ) . freeze
128+ # isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims / "@" )
129+ ENCODE_ISEGMENT_NC = Regexp . compile ( "[^A-Za-z0-9\. _~#{ UCSCHAR } !$&'\( \) \* \+ ,;=-]" ) . freeze
130+ # iquery = *( ipchar / iprivate / "/" / "?" )
131+ ENCODE_IQUERY = Regexp . compile ( "[^A-Za-z0-9\. _~#{ UCSCHAR } \\ uE000-\\ uF8FF\\ u{F0000}-\\ u{FFFFD}\\ u{100000}-\\ u{10FFFD}/?=]" ) . freeze
132+ # ifragment = *( ipchar / "/" / "?" )
133+ ENCODE_IFRAGMENT = Regexp . compile ( "[^A-Za-z0-9\. _~#{ UCSCHAR } /?]" ) . freeze
130134
131135 ##
132136 # Cache size may be set through {RDF.config} using `uri_cache_size`.
@@ -1071,6 +1075,12 @@ def path=(value)
10711075 # Normalized version of path
10721076 # @return [String]
10731077 def normalized_path
1078+ if normalized_scheme == "urn"
1079+ # Special-case URI. Normalize the NID component only
1080+ nid , p = path . to_s . split ( ':' , 2 )
1081+ return "#{ nid . downcase } :#{ p } "
1082+ end
1083+
10741084 segments = path . to_s . split ( '/' , -1 ) # preserve null segments
10751085
10761086 norm_segs = case
@@ -1103,7 +1113,7 @@ def normalized_path
11031113
11041114 res = self . class . normalize_path ( norm_segs . join ( "/" ) )
11051115 # Special rules for specific protocols having empty paths
1106- res = ( res . empty? && %w( http https ftp tftp ) . include? ( normalized_scheme ) ) ? '/' : res
1116+ ( res . empty? && %w( http https ftp tftp ) . include? ( normalized_scheme ) ) ? '/' : res
11071117 end
11081118
11091119 ##
0 commit comments