@@ -32,32 +32,32 @@ def test_trafilatura_extractor(self):
3232 self .assertEqual (isinstance (result , ExtractionResult ), True )
3333 self .assertEqual (result .success in [True , False ], True )
3434
35- # def test_magic_html_extractor(self):
36- # # 测试 Magic HTML 抽取器
37- # try:
38- # extractor = ExtractorFactory.create("magic-html")
39- # html_content = """
40- # <html>
41- # <body>
42- # <h1 cc-select="true">Python编程教程</h1>
43- # <p cc-select="true">这是一个Python基础教程,展示如何定义函数。</p>
44- # <pre cc-select="true"><code>def greet(name):
45- # ""问候函数""
46- # return f"Hello, {name}!"
47- #
48- # # 使用示例
49- # result = greet("World")
50- # print(result)</code></pre>
51- # <p cc-select="true">这个函数可以用来问候任何人。</p>
52- # </body>
53- # </html>
54- # """
55- # result = extractor.extract(html_content)
56- # self.assertEqual(isinstance(result, ExtractionResult), True)
57- # self.assertEqual(result.success in [True, False], True)
58- # except ValueError as e:
59- # # 如果抽取器未注册,跳过测试
60- # self.skipTest(f"Magic HTML 抽取器未注册: {e}")
35+ def test_magic_html_extractor (self ):
36+ # 测试 Magic HTML 抽取器
37+ try :
38+ extractor = ExtractorFactory .create ("magic-html" )
39+ html_content = """
40+ <html>
41+ <body>
42+ <h1 cc-select="true">Python编程教程</h1>
43+ <p cc-select="true">这是一个Python基础教程,展示如何定义函数。</p>
44+ <pre cc-select="true"><code>def greet(name):
45+ ""问候函数""
46+ return f"Hello, {name}!"
47+
48+ # 使用示例
49+ result = greet("World")
50+ print(result)</code></pre>
51+ <p cc-select="true">这个函数可以用来问候任何人。</p>
52+ </body>
53+ </html>
54+ """
55+ result = extractor .extract (html_content )
56+ self .assertEqual (isinstance (result , ExtractionResult ), True )
57+ self .assertEqual (result .success in [True , False ], True )
58+ except ValueError as e :
59+ # 如果抽取器未注册,跳过测试
60+ self .skipTest (f"Magic HTML 抽取器未注册: { e } " )
6161
6262 def test_resiliparse_extractor (self ):
6363 # 测试 Resiliparse 抽取器
0 commit comments