Skip to content

Commit c09b98f

Browse files
authored
Merge pull request #4 from TheBookPeople/headings
Add support for headings
2 parents 6555c3d + dfcd0de commit c09b98f

2 files changed

Lines changed: 22 additions & 1 deletion

File tree

html2text.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
var badTagnamesRE = regexp.MustCompile(`^(head|script|style|a)($|\s*)`)
1010
var linkTagRE = regexp.MustCompile(`a.*href=('([^']*?)'|"([^"]*?)")`)
1111
var badLinkHrefRE = regexp.MustCompile(`#|javascript:`)
12+
var headersRE = regexp.MustCompile(`^(\/)?h[1-6]`)
1213

1314
func parseHTMLEntity(entName string) (string, bool) {
1415
entName = strings.ToLower(entName)
@@ -160,7 +161,12 @@ func HTML2Text(html string) string {
160161
shouldOutput = true
161162
tagName := strings.ToLower(html[tagStart:i])
162163

163-
if tagName == "br" || tagName == "br/" {
164+
if headersRE.MatchString(tagName) {
165+
if canPrintNewline {
166+
outBuf.WriteString("\r\n\r\n")
167+
}
168+
canPrintNewline = false
169+
} else if tagName == "br" || tagName == "br/" {
164170
// new line
165171
outBuf.WriteString("\r\n")
166172
} else if tagName == "p" || tagName == "/p" {

html2text_test.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,21 @@ func TestHTML2Text(t *testing.T) {
2626
So(HTML2Text(`<p>two</p><p>paragraphs</p>`), ShouldEqual, "two\r\nparagraphs")
2727
})
2828

29+
Convey("Headings", func() {
30+
So(HTML2Text("<h1>First</h1>main text"), ShouldEqual, "First\r\n\r\nmain text")
31+
So(HTML2Text("First<h2>Second</h2>next section"), ShouldEqual, "First\r\n\r\nSecond\r\n\r\nnext section")
32+
So(HTML2Text("<h2>Second</h2>next section"), ShouldEqual, "Second\r\n\r\nnext section")
33+
So(HTML2Text("Second<h3>Third</h3>next section"), ShouldEqual, "Second\r\n\r\nThird\r\n\r\nnext section")
34+
So(HTML2Text("<h3>Third</h3>next section"), ShouldEqual, "Third\r\n\r\nnext section")
35+
So(HTML2Text("Third<h4>Fourth</h4>next section"), ShouldEqual, "Third\r\n\r\nFourth\r\n\r\nnext section")
36+
So(HTML2Text("<h4>Fourth</h4>next section"), ShouldEqual, "Fourth\r\n\r\nnext section")
37+
So(HTML2Text("Fourth<h5>Fifth</h5>next section"), ShouldEqual, "Fourth\r\n\r\nFifth\r\n\r\nnext section")
38+
So(HTML2Text("<h5>Fifth</h5>next section"), ShouldEqual, "Fifth\r\n\r\nnext section")
39+
So(HTML2Text("Fifth<h6>Sixth</h6>next section"), ShouldEqual, "Fifth\r\n\r\nSixth\r\n\r\nnext section")
40+
So(HTML2Text("<h6>Sixth</h6>next section"), ShouldEqual, "Sixth\r\n\r\nnext section")
41+
So(HTML2Text("<h7>Not Header</h7>next section"), ShouldEqual, "Not Headernext section")
42+
})
43+
2944
Convey("HTML entities", func() {
3045
So(HTML2Text(`two&nbsp;&nbsp;spaces`), ShouldEqual, "two spaces")
3146
So(HTML2Text(`&copy; 2017 K3A`), ShouldEqual, "© 2017 K3A")

0 commit comments

Comments
 (0)