Finish 3.0.4

gkellogg · gkellogg · commit 4a7a92d43879 · 2018-09-20T10:22:58.000-07:00
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-3.0.3
+3.0.4
diff --git a/lib/rdf.rb b/lib/rdf.rb
@@ -4,6 +4,7 @@
 require 'time'
 
 require 'rdf/version'
+require 'rdf/extensions'
 
 module RDF
   # RDF mixins
@@ -197,7 +198,7 @@ def self.StrictVocabulary(prefix)
   # @return [#to_s] property
   # @return [URI]
   def self.[](property)
-    property.to_s =~ %r{_\d+} ? RDF::URI("#{to_uri}#{property}") : RDF::RDFV[property]
+    property.to_s.match?(%r{_\d+}) ? RDF::URI("#{to_uri}#{property}") : RDF::RDFV[property]
   end
 
   ##
@@ -227,7 +228,7 @@ def self.respond_to?(method, include_all = false)
   def self.method_missing(property, *args, &block)
     if args.empty?
       # Special-case rdf:_n for all integers
-      RDF_N_REGEXP.match(property) ? RDF::URI("#{to_uri}#{property}") : RDF::RDFV.send(property)
+      RDF_N_REGEXP.match?(property) ? RDF::URI("#{to_uri}#{property}") : RDF::RDFV.send(property)
     else
       super
     end
diff --git a/lib/rdf/extensions.rb b/lib/rdf/extensions.rb
@@ -0,0 +1,22 @@
+##
+# This file provides compatibility monkeypatches to standard library classes
+# Implementation taken from MIT-licensed https://github.com/marcandre/backports
+#
+
+# https://github.com/marcandre/backports/blob/master/lib/backports/2.4.0/regexp/match.rb
+unless Regexp.method_defined? :match?
+  class Regexp
+    def match?(*args)
+      !match(*args).nil?
+    end
+  end
+end
+
+# https://github.com/marcandre/backports/blob/master/lib/backports/2.4.0/string/match.rb
+unless String.method_defined? :match?
+  class String
+    def match?(*args)
+      !match(*args).nil?
+    end
+  end
+end
diff --git a/lib/rdf/model/list.rb b/lib/rdf/model/list.rb
@@ -100,11 +100,12 @@ def list?
 
     ##
     # Validate the list ensuring that
+    # * each node is referenced exactly once (except for the head, which may have no reference)
     # * rdf:rest values are all BNodes are nil
     # * each subject has exactly one value for `rdf:first` and
     #   `rdf:rest`.
     # * The value of `rdf:rest` must be either a BNode or `rdf:nil`.
-    # * All other properties are ignored.
+    # * only the list head may have any other properties
     # @return [Boolean]
     def valid?
       li = subject
@@ -123,12 +124,25 @@ def valid?
             rest = st.object
             return false unless rest.node? || rest == RDF.nil
             rests += 1
+          when RDF.type
+          else
+            # It may have no other properties
+            return false unless li == subject
           end
         end
         return false unless firsts == 1 && rests == 1
         li = rest
       end
-      true
+
+      # All elements other than the head must be referenced exactly once
+      return list_nodes.all? do |li|
+        refs = @graph.query(object: li).count
+        case refs
+        when 0 then li == subject
+        when 1 then true
+        else        false
+        end
+      end
     end
 
     # @!attribute [r] subject
diff --git a/lib/rdf/model/literal.rb b/lib/rdf/model/literal.rb
@@ -363,7 +363,7 @@ def valid?
       return false if language? && language.to_s !~ /^[a-zA-Z]+(-[a-zA-Z0-9]+)*$/
       return false if datatype? && datatype.invalid?
       grammar = self.class.const_get(:GRAMMAR) rescue nil
-      grammar.nil? || !!(value =~ grammar)
+      grammar.nil? || value.match?(grammar)
     end
 
     ##
diff --git a/lib/rdf/model/uri.rb b/lib/rdf/model/uri.rb
@@ -1006,7 +1006,7 @@ def path
     def path=(value)
       if value
         # Always lead with a slash
-        value = "/#{value}" if host && value.to_s =~ /^[^\/]/
+        value = "/#{value}" if host && value.to_s.match?(/^[^\/]/)
         object[:path] = value.to_s.force_encoding(Encoding::UTF_8)
       else
         object[:path] = nil
diff --git a/lib/rdf/nquads.rb b/lib/rdf/nquads.rb
@@ -36,7 +36,7 @@ class Format < RDF::Format
       # @param [String] sample Beginning several bytes (about 1K) of input.
       # @return [Boolean]
       def self.detect(sample)
-        !!sample.match(%r(
+        sample.match?(%r(
           (?:\s*(?:<[^>]*>) | (?:_:\w+))                          # Subject
           \s*
           (?:\s*<[^>]*>)                                          # Predicate
@@ -46,8 +46,8 @@ def self.detect(sample)
           (?:\s*(?:<[^>]*>) | (?:_:\w+))                          # Graph Name
           \s*\.
         )x) && !(
-          sample.match(%r(@(base|prefix|keywords)|\{)) ||         # Not Turtle/N3/TriG
-          sample.match(%r(<(html|rdf))i)                          # Not HTML or XML
+          sample.match?(%r(@(base|prefix|keywords)|\{)) ||         # Not Turtle/N3/TriG
+          sample.match?(%r(<(html|rdf))i)                          # Not HTML or XML
         )
       end
 
diff --git a/lib/rdf/ntriples/format.rb b/lib/rdf/ntriples/format.rb
@@ -32,16 +32,16 @@ class Format < RDF::Format
     # @param [String] sample Beginning several bytes (about 1K) of input.
     # @return [Boolean]
     def self.detect(sample)
-      !!sample.match(%r(
+      sample.match?(%r(
         (?:(?:<[^>]*>) | (?:_:\w+))                             # Subject
         \s*
         (?:<[^>]*>)                                             # Predicate
         \s*
         (?:(?:<[^>]*>) | (?:_:\w+) | (?:"[^"\n]*"(?:^^|@\S+)?)) # Object
         \s*\.
       )x) && !(
-        sample.match(%r(@(base|prefix|keywords)|\{)) ||         # Not Turtle/N3/TriG
-        sample.match(%r(<(html|rdf))i)                          # Not HTML or XML
+        sample.match?(%r(@(base|prefix|keywords)|\{)) ||         # Not Turtle/N3/TriG
+        sample.match?(%r(<(html|rdf))i)                          # Not HTML or XML
       ) && !RDF::NQuads::Format.detect(sample)
     end
 
diff --git a/lib/rdf/ntriples/reader.rb b/lib/rdf/ntriples/reader.rb
@@ -154,22 +154,41 @@ def self.parse_literal(input, **options)
       end
     end
 
+    # cache constants to optimize escaping the escape chars in self.unescape
+    ESCAPE_CHARS_ESCAPED = ESCAPE_CHARS.each_with_object({}) do |escape, memo|
+      memo[escape.inspect[1...-1]] = escape
+    end.freeze
+    ESCAPE_CHARS_ESCAPED_REGEXP = Regexp.union(
+      ESCAPE_CHARS_ESCAPED.keys
+    ).freeze
+
     ##
     # @param  [String] string
     # @return [String]
     # @see    http://www.w3.org/TR/rdf-testcases/#ntrip_strings
     # @see    http://blog.grayproductions.net/articles/understanding_m17n
     # @see    http://yehudakatz.com/2010/05/17/encodings-unabridged/
     def self.unescape(string)
-      string = string.dup.force_encoding(Encoding::UTF_8)
+      # Note: avoiding copying the input string when no escaping is needed
+      # greatly reduces the number of allocations and the processing time.
+      unless string.encoding == Encoding::UTF_8
+        string = string.dup.force_encoding(Encoding::UTF_8)
+      end
+
+      has_escape_chars = ESCAPE_CHARS_ESCAPED_REGEXP.match?(string)
+      has_uchar = UCHAR.match?(string)
 
-      # Decode \t|\n|\r|\"|\\ character escapes:
-      ESCAPE_CHARS.each { |escape| string.gsub!(escape.inspect[1...-1], escape) }
+      string = string.dup if has_escape_chars || has_uchar
+
+      # Decode \t|\n|\r|\"|\\ character escapes using Regexp:
+      string.gsub!(ESCAPE_CHARS_ESCAPED_REGEXP) do
+        ESCAPE_CHARS_ESCAPED.fetch($~[0])
+      end if has_escape_chars
 
       # Decode \uXXXX and \UXXXXXXXX code points:
       string.gsub!(UCHAR) do
         [($1 || $2).hex].pack('U*')
-      end
+      end if has_uchar
 
       string
     end
diff --git a/lib/rdf/ntriples/writer.rb b/lib/rdf/ntriples/writer.rb
@@ -56,7 +56,7 @@ class Writer < RDF::Writer
     # @see    http://www.w3.org/TR/rdf-testcases/#ntrip_strings
     def self.escape(string, encoding = nil)
       ret = case
-        when string =~ ESCAPE_PLAIN # a shortcut for the simple case
+        when string.match?(ESCAPE_PLAIN) # a shortcut for the simple case
           string
         when string.ascii_only?
           StringIO.open do |buffer|
@@ -164,7 +164,7 @@ def self.escape_utf32(u)
     # @return [String]
     # @raise  [ArgumentError] if `value` is not an `RDF::Statement` or `RDF::Term`
     def self.serialize(value)
-      writer = self.new
+      writer = (@serialize_writer_memo ||= self.new)
       case value
         when nil then nil
         when FalseClass then value.to_s
@@ -256,7 +256,7 @@ def format_node(node, unique_bnodes: false, **options)
     def format_uri(uri, **options)
       string = uri.to_s
       iriref = case
-        when string =~ ESCAPE_PLAIN_U # a shortcut for the simple case
+        when string.match?(ESCAPE_PLAIN_U) # a shortcut for the simple case
           string
         when string.ascii_only? || (encoding && encoding != Encoding::ASCII)
           StringIO.open do |buffer|
diff --git a/lib/rdf/query/solution.rb b/lib/rdf/query/solution.rb
@@ -24,7 +24,7 @@ class Solution
     # Undefine all superfluous instance methods:
     undef_method(*instance_methods.
                   map(&:to_s).
-                  select {|m| m =~ /^\w+$/}.
+                  select {|m| m.match?(/^\w+$/)}.
                   reject {|m| %w(object_id dup instance_eval inspect to_s private_methods class should should_not pretty_print).include?(m) || m[0,2] == '__'}.
                   map(&:to_sym))
 
diff --git a/lib/rdf/reader.rb b/lib/rdf/reader.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 module RDF
   ##
   # The base class for RDF parsers.
@@ -610,7 +611,8 @@ def current_line
     def readline
       @line = @line_rest || @input.readline
       @line, @line_rest = @line.split("\r", 2)
-      @line = @line.to_s.chomp
+      @line = String.new if @line.nil? # not frozen
+      @line.chomp!
       begin
         @line.encode!(encoding)
       rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError, Encoding::ConverterNotFoundError
diff --git a/lib/rdf/util/file.rb b/lib/rdf/util/file.rb
@@ -290,7 +290,7 @@ def self.open_file(filename_or_url, proxy: nil, headers: {}, verify_none: false,
       filename_or_url = $1 if filename_or_url.to_s.match(/^file:(.*)$/)
       remote_document = nil
 
-      if filename_or_url.to_s =~ /^https?/
+      if filename_or_url.to_s.match?(/^https?/)
         base_uri = filename_or_url.to_s
 
         remote_document = self.http_adapter(!!options[:use_net_http]).
diff --git a/lib/rdf/util/logger.rb b/lib/rdf/util/logger.rb
@@ -1,5 +1,6 @@
 # coding: utf-8
 require 'logger'
+require 'delegate'
 
 module RDF; module Util
   ##
@@ -8,6 +9,9 @@ module RDF; module Util
   # Modules must provide `@logger`, which returns an instance of `Logger`, or something responding to `#<<`. Logger may also be specified using an `@options` hash containing a `:logger` entry.
   # @since 2.0.0
   module Logger
+    # The IOWrapper class is used to store per-logger state while wrapping an IO such as $stderr.
+    IOWrapper = DelegateClass(IO)
+
     ##
     # Logger instance, found using `options[:logger]`, `@logger`, or `@options[:logger]`
     # @param [Hash{Symbol => Object}] options
@@ -18,7 +22,7 @@ def logger(**options)
       logger = @options[:logger] if logger.nil? && @options
       if logger.nil?
         # Unless otherwise specified, use $stderr
-        logger = (@options || options)[:logger] = $stderr
+        logger = (@options || options)[:logger] = IOWrapper.new($stderr)
 
         # Reset log_statistics so that it's not inherited across different instances
         logger.log_statistics.clear if logger.respond_to?(:log_statistics)
diff --git a/lib/rdf/vocabulary.rb b/lib/rdf/vocabulary.rb
@@ -620,7 +620,7 @@ def props; @properties ||= {}; end
     # Undefine all superfluous instance methods:
     undef_method(*instance_methods.
                   map(&:to_s).
-                  select {|m| m =~ /^\w+$/}.
+                  select {|m| m.match?(/^\w+$/)}.
                   reject {|m| %w(object_id dup instance_eval inspect to_s class send public_send).include?(m) || m[0,2] == '__'}.
                   map(&:to_sym))
 
@@ -892,7 +892,7 @@ def dup
       # @since 0.3.9
       def valid?
         # Validate relative to RFC3987
-        node? || RDF::URI::IRI.match(to_s) || false
+        node? || RDF::URI::IRI.match?(to_s) || false
       end
 
       ##
@@ -927,7 +927,7 @@ def restriction?
       # Is this neither a class, property or datatype term?
       # @return [Boolean]
       def other?
-        Array(self.type).none? {|t| t.to_s =~ /(Class|Property|Datatype|Restriction)/}
+        Array(self.type).none? {|t| t.to_s.match?(/(Class|Property|Datatype|Restriction)/)}
       end
 
       ##
diff --git a/spec/model_list_spec.rb b/spec/model_list_spec.rb
@@ -121,6 +121,7 @@
         n = RDF::Node.new
         graph.insert(RDF::Statement(n, RDF.first, "foo"))
         graph.insert(RDF::Statement(n, RDF.rest, RDF.nil))
+        RDF::List.new(subject: n, graph: graph).valid?
         expect(RDF::List.new(subject: n, graph: graph)).to be_valid
       end
 
@@ -163,9 +164,17 @@
           RDF::Statement(:node2, RDF.first, "b"),
           RDF::Statement(:node2, RDF.rest, RDF.nil),
         ],
-        "list with other properties within" => [
+        "list node types rdf:List" => [
+          RDF::Statement(:node1, RDF.first, "a"),
+          RDF::Statement(:node1, RDF.rest, :node2),
+          RDF::Statement(:node2, RDF.first, "b"),
+          RDF::Statement(:node2, RDF.rest, RDF.nil),
+          RDF::Statement(:node2, RDF.type, RDF.List),
+        ],
+        "list node types owl:Class" => [
           RDF::Statement(:node1, RDF.first, "a"),
           RDF::Statement(:node1, RDF.rest, :node2),
+          RDF::Statement(:node1, RDF.type, RDF::OWL.Class),
           RDF::Statement(:node2, RDF.first, "b"),
           RDF::Statement(:node2, RDF.rest, RDF.nil),
           RDF::Statement(:node2, RDF.type, RDF::OWL.Class),
@@ -206,6 +215,32 @@
           RDF::Statement(:node2, RDF.first, "b"),
           RDF::Statement(:node2, RDF.rest, :node1),
         ],
+        "list with other properties within" => [
+          RDF::Statement(:node1, RDF.first, "a"),
+          RDF::Statement(:node1, RDF.rest, :node2),
+          RDF::Statement(:node2, RDF.first, "b"),
+          RDF::Statement(:node2, RDF.rest, RDF.nil),
+          RDF::Statement(:node2, RDF::RDFS.label, "bar"),
+        ],
+        "list without rdf:nil" => [
+          RDF::Statement(:node1, RDF.first, "a"),
+          RDF::Statement(:node1, RDF.rest, :node2),
+          RDF::Statement(:node2, RDF.first, "b"),
+        ],
+        "list URI rdf:rest" => [
+          RDF::Statement(:node1, RDF.first, "a"),
+          RDF::Statement(:node1, RDF.rest, RDF::URI("node2")),
+          RDF::Statement(RDF::URI("node2"), RDF.first, "b"),
+          RDF::Statement(RDF::URI("node2"), RDF.rest, RDF.nil),
+        ],
+        "list extra internal references" => [
+          RDF::Statement(:node1, RDF.first, "a"),
+          RDF::Statement(:node1, RDF.rest, :node2),
+          RDF::Statement(:node2, RDF.first, "b"),
+          RDF::Statement(:node2, RDF.rest, RDF.nil),
+          RDF::Statement(:node3, RDF.first, "c"),
+          RDF::Statement(:node3, RDF.rest, :node2),
+        ],
       }.each do |name, list|
         it name do
           if list.is_a?(Array)
diff --git a/spec/ntriples_spec.rb b/spec/ntriples_spec.rb
@@ -852,6 +852,27 @@
       include_examples "c14n", st, result
     end
   end
+
+  context "logging behavior when dumping invalid statements multiple times in a row" do
+    before do
+      allow($stderr).to receive(:write)
+    end
+
+    it "raises each time an invalid statement is dumped (not only the first time)" do
+      g = RDF::Graph.new
+      g.from_ntriples('<http://rubygems.org/gems/rdf/resource/0cb45b70-4c37-4270-9955-350c636496fc> <http://rubygems.org/gems/rdf/ontology/xxx/1.1#testDate> "2018-06-01T16:30:00Z"^^<http://www.w3.org/2001/XMLSchema#date> .')
+
+      errors = (1..5).map do |_|
+        begin
+          g.dump(:ntriples)
+          'noraise'
+        rescue RDF::WriterError
+          'raise'
+        end
+      end
+      expect(errors).to eq(['raise'] * 5)
+    end
+  end
 end
 
 describe RDF::NTriples do
diff --git a/spec/util_logger_spec.rb b/spec/util_logger_spec.rb