Skip to content

Commit f859e03

Browse files
committed
Change N-Triples literal output encoding to limit the number of ECHAR escapes used based on [Canonical form of N-Triples](https://www.w3.org/TR/n-triples/#canonical-ntriples):
> Within STRING_LITERAL_QUOTE, only the characters `U+0022`, `U+005C`, `U+000A`, `U+000D` are encoded using `ECHAR`. `ECHAR **must not** be used for characters that are allowed directly in STRING_LITERAL_QUOTE.
1 parent e753d0c commit f859e03

2 files changed

Lines changed: 13 additions & 14 deletions

File tree

lib/rdf/ntriples/writer.rb

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ def self.escape_unicode(u, encoding)
116116
# sequences, otherwise, assume the test-cases escape sequences. Otherwise,
117117
# the N-Triples recommendation includes `\b` and `\f` escape sequences.
118118
#
119+
# Within STRING_LITERAL_QUOTE, only the characters `U+0022`, `U+005C`, `U+000A`, `U+000D` are encoded using `ECHAR`. `ECHAR` must not be used for characters that are allowed directly in STRING_LITERAL_QUOTE.
120+
#
119121
# @param [Integer, #ord] u
120122
# @return [String]
121123
# @raise [ArgumentError] if `u` is not a valid Unicode codepoint
@@ -124,15 +126,10 @@ def self.escape_unicode(u, encoding)
124126
def self.escape_ascii(u, encoding)
125127
case (u = u.ord)
126128
when (0x00..0x07) then escape_utf16(u)
127-
when (0x08) then (encoding && encoding == Encoding::ASCII ? escape_utf16(u) : "\\b")
128-
when (0x09) then "\\t"
129129
when (0x0A) then "\\n"
130-
when (0x0B) then escape_utf16(u)
131-
when (0x0C) then (encoding && encoding == Encoding::ASCII ? escape_utf16(u) : "\\f")
132130
when (0x0D) then "\\r"
133131
when (0x0E..0x1F) then escape_utf16(u)
134132
when (0x22) then "\\\""
135-
when (0x27) then "\\'"
136133
when (0x5C) then "\\\\"
137134
when (0x7F) then escape_utf16(u)
138135
when (0x00..0x7F) then u.chr

spec/ntriples_spec.rb

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -672,16 +672,18 @@
672672

673673
# @see http://www.w3.org/TR/rdf-testcases/#ntrip_strings
674674
it "should correctly escape ASCII characters (#x0-#x7F)" do
675-
(0x00..0x08).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
676-
expect(writer.escape(0x09.chr, encoding)).to eq "\\t"
675+
(0x00..0x07).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
676+
expect(writer.escape(0x08.chr, encoding)).to eq "\b"
677+
expect(writer.escape(0x09.chr, encoding)).to eq "\t"
677678
expect(writer.escape(0x0A.chr, encoding)).to eq "\\n"
678-
(0x0B..0x0C).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
679+
expect(writer.escape(0x0B.chr, encoding)).to eq "\v"
680+
expect(writer.escape(0x0C.chr, encoding)).to eq "\f"
679681
expect(writer.escape(0x0D.chr, encoding)).to eq "\\r"
680682
(0x0E..0x1F).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
681683
(0x20..0x21).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
682684
expect(writer.escape(0x22.chr, encoding)).to eq "\\\""
683685
(0x23..0x26).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
684-
expect(writer.escape(0x27.chr, encoding)).to eq "\\'"
686+
expect(writer.escape(0x27.chr, encoding)).to eq "'"
685687
(0x28..0x5B).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
686688
expect(writer.escape(0x5C.chr, encoding)).to eq "\\\\"
687689
(0x5D..0x7E).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
@@ -733,17 +735,17 @@
733735
# @see http://www.w3.org/TR/rdf-testcases/#ntrip_strings
734736
it "should correctly escape ASCII characters (#x0-#x7F)" do
735737
(0x00..0x07).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
736-
expect(writer.escape(0x08.chr, encoding)).to eq (encoding ? "\\b" : "\\u0008")
737-
expect(writer.escape(0x09.chr, encoding)).to eq "\\t"
738+
expect(writer.escape(0x08.chr, encoding)).to eq "\b"
739+
expect(writer.escape(0x09.chr, encoding)).to eq "\t"
738740
expect(writer.escape(0x0A.chr, encoding)).to eq "\\n"
739-
(0x0B..0x0B).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
740-
expect(writer.escape(0x0C.chr, encoding)).to eq (encoding ? "\\f" : "\\u000C")
741+
expect(writer.escape(0x0B.chr, encoding)).to eq "\v"
742+
expect(writer.escape(0x0C.chr, encoding)).to eq "\f"
741743
expect(writer.escape(0x0D.chr, encoding)).to eq "\\r"
742744
(0x0E..0x1F).each { |u| expect(writer.escape(u.chr, encoding)).to eq "\\u#{u.to_s(16).upcase.rjust(4, '0')}" }
743745
(0x20..0x21).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
744746
expect(writer.escape(0x22.chr, encoding)).to eq "\\\""
745747
(0x23..0x26).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
746-
expect(writer.escape(0x27.chr, encoding)).to eq "\\'"
748+
expect(writer.escape(0x27.chr, encoding)).to eq "'"
747749
(0x28..0x5B).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }
748750
expect(writer.escape(0x5C.chr, encoding)).to eq "\\\\"
749751
(0x5D..0x7E).each { |u| expect(writer.escape(u.chr, encoding)).to eq u.chr }

0 commit comments

Comments
 (0)