Skip to content

Commit b9e0d6e

Browse files
author
Mario Hros
committed
add optional list support
1 parent a58537e commit b9e0d6e

2 files changed

Lines changed: 24 additions & 2 deletions

File tree

html2text.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ var numericEntityRE = regexp.MustCompile(`(?i)^#(x?[a-f0-9]+)$`)
2424
type options struct {
2525
lbr string
2626
linksInnerText bool
27+
listSupport bool
2728
}
2829

2930
func newOptions() *options {
@@ -51,6 +52,13 @@ func WithLinksInnerText() Option {
5152
}
5253
}
5354

55+
// WithListSupport formats <ul> and <li> lists with dashes
56+
func WithListSupport() Option {
57+
return func(o *options) {
58+
o.listSupport = true
59+
}
60+
}
61+
5462
func parseHTMLEntity(entName string) (string, bool) {
5563
if r, ok := entity[entName]; ok {
5664
return string(r), true
@@ -231,10 +239,14 @@ func HTML2TextWithOptions(html string, reqOpts ...Option) string {
231239
tag := html[tagStart:i]
232240
tagNameLowercase := strings.ToLower(tag)
233241

234-
if tagNameLowercase == "/ul" {
242+
if tagNameLowercase == "/ul" || tagNameLowercase == "/ol" {
235243
outBuf.WriteString(opts.lbr)
236244
} else if tagNameLowercase == "li" || tagNameLowercase == "li/" {
237-
outBuf.WriteString(opts.lbr)
245+
if opts.listSupport {
246+
outBuf.WriteString(opts.lbr + "- ")
247+
} else {
248+
outBuf.WriteString(opts.lbr)
249+
}
238250
} else if headersRE.MatchString(tagNameLowercase) {
239251
if canPrintNewline {
240252
outBuf.WriteString(opts.lbr + opts.lbr)

html2text_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ func TestHTML2Text(t *testing.T) {
102102
So(HTML2Text(``), ShouldEqual, "")
103103
So(HTML2Text(`<html><head><title>Good</title></head><body>x</body>`), ShouldEqual, "x")
104104
So(HTML2Text(`<html><head href="foo"><title>Good</title></head><body>x</body>`), ShouldEqual, "x")
105+
So(HTML2Text(`<htMl><hEad><titLe>Good</Title></head><boDy>x</Body>`), ShouldEqual, "x")
105106
So(HTML2Text(`we are not <script type="javascript"></script>interested in scripts`),
106107
ShouldEqual, "we are not interested in scripts")
107108
})
@@ -122,6 +123,15 @@ func TestHTML2Text(t *testing.T) {
122123
So(HTML2TextWithOptions(`<p>two</p><p>paragraphs</p>`), ShouldEqual, "two\r\n\r\nparagraphs")
123124
})
124125

126+
Convey("No list support by default (original behavior)", func() {
127+
So(HTML2Text(`list of items<ul><li>One</li><li>Two</li><li>Three</li></ul>`), ShouldEqual, "list of items\r\nOne\r\nTwo\r\nThree\r\n")
128+
})
129+
130+
Convey("Optional list support", func() {
131+
So(HTML2TextWithOptions(`list of items<ul><li>One</li><li>Two</li><li>Three</li></ul>`, WithListSupport()), ShouldEqual, "list of items\r\n- One\r\n- Two\r\n- Three\r\n")
132+
So(HTML2TextWithOptions(`list of items<ol><li>One</li><li>Two</li><li>Three</li></ol>`, WithListSupport()), ShouldEqual, "list of items\r\n- One\r\n- Two\r\n- Three\r\n")
133+
})
134+
125135
Convey("Custom HTML Tags", func() {
126136
So(HTML2Text(`<aa>hello</aa>`), ShouldEqual, "hello")
127137
So(HTML2Text(`<aa >hello</aa>`), ShouldEqual, "hello")

0 commit comments

Comments
 (0)