diff options
Diffstat (limited to 'gopls/internal/lsp/source/comment.go')
-rw-r--r-- | gopls/internal/lsp/source/comment.go | 384 |
1 files changed, 384 insertions, 0 deletions
diff --git a/gopls/internal/lsp/source/comment.go b/gopls/internal/lsp/source/comment.go new file mode 100644 index 000000000..beed328ae --- /dev/null +++ b/gopls/internal/lsp/source/comment.go @@ -0,0 +1,384 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !go1.19 +// +build !go1.19 + +package source + +import ( + "bytes" + "io" + "regexp" + "strings" + "unicode" + "unicode/utf8" +) + +// CommentToMarkdown converts comment text to formatted markdown. +// The comment was prepared by DocReader, +// so it is known not to have leading, trailing blank lines +// nor to have trailing spaces at the end of lines. +// The comment markers have already been removed. +// +// Each line is converted into a markdown line and empty lines are just converted to +// newlines. Heading are prefixed with `### ` to make it a markdown heading. +// +// A span of indented lines retains a 4 space prefix block, with the common indent +// prefix removed unless empty, in which case it will be converted to a newline. +// +// URLs in the comment text are converted into links. +func CommentToMarkdown(text string, _ *Options) string { + buf := &bytes.Buffer{} + commentToMarkdown(buf, text) + return buf.String() +} + +var ( + mdNewline = []byte("\n") + mdHeader = []byte("### ") + mdIndent = []byte(" ") + mdLinkStart = []byte("[") + mdLinkDiv = []byte("](") + mdLinkEnd = []byte(")") +) + +func commentToMarkdown(w io.Writer, text string) { + blocks := blocks(text) + for i, b := range blocks { + switch b.op { + case opPara: + for _, line := range b.lines { + emphasize(w, line, true) + } + case opHead: + // The header block can consist of only one line. + // However, check the number of lines, just in case. + if len(b.lines) == 0 { + // Skip this block. + continue + } + header := b.lines[0] + + w.Write(mdHeader) + commentEscape(w, header, true) + // Header doesn't end with \n unlike the lines of other blocks. + w.Write(mdNewline) + case opPre: + for _, line := range b.lines { + if isBlank(line) { + w.Write(mdNewline) + continue + } + w.Write(mdIndent) + w.Write([]byte(line)) + } + } + + if i < len(blocks)-1 { + w.Write(mdNewline) + } + } +} + +const ( + ulquo = "“" + urquo = "”" +) + +var ( + markdownEscape = regexp.MustCompile(`([\\\x60*{}[\]()#+\-.!_>~|"$%&'\/:;<=?@^])`) + + unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo) +) + +// commentEscape escapes comment text for markdown. If nice is set, +// also turn double ` and ' into “ and ”. +func commentEscape(w io.Writer, text string, nice bool) { + if nice { + text = convertQuotes(text) + } + text = escapeRegex(text) + w.Write([]byte(text)) +} + +func convertQuotes(text string) string { + return unicodeQuoteReplacer.Replace(text) +} + +func escapeRegex(text string) string { + return markdownEscape.ReplaceAllString(text, `\$1`) +} + +func emphasize(w io.Writer, line string, nice bool) { + for { + m := matchRx.FindStringSubmatchIndex(line) + if m == nil { + break + } + // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx) + + // write text before match + commentEscape(w, line[0:m[0]], nice) + + // adjust match for URLs + match := line[m[0]:m[1]] + if strings.Contains(match, "://") { + m0, m1 := m[0], m[1] + for _, s := range []string{"()", "{}", "[]"} { + open, close := s[:1], s[1:] // E.g., "(" and ")" + // require opening parentheses before closing parentheses (#22285) + if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) { + m1 = m0 + i + match = line[m0:m1] + } + // require balanced pairs of parentheses (#5043) + for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ { + m1 = strings.LastIndexAny(line[:m1], s) + match = line[m0:m1] + } + } + if m1 != m[1] { + // redo matching with shortened line for correct indices + m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)]) + } + } + + // Following code has been modified from go/doc since words is always + // nil. All html formatting has also been transformed into markdown formatting + + // analyze match + url := "" + if m[2] >= 0 { + url = match + } + + // write match + if len(url) > 0 { + w.Write(mdLinkStart) + } + + commentEscape(w, match, nice) + + if len(url) > 0 { + w.Write(mdLinkDiv) + w.Write([]byte(urlReplacer.Replace(url))) + w.Write(mdLinkEnd) + } + + // advance + line = line[m[1]:] + } + commentEscape(w, line, nice) +} + +// Everything from here on is a copy of go/doc/comment.go + +const ( + // Regexp for Go identifiers + identRx = `[\pL_][\pL_0-9]*` + + // Regexp for URLs + // Match parens, and check later for balance - see #5043, #22285 + // Match .,:;?! within path, but not at end - see #18139, #16565 + // This excludes some rare yet valid urls ending in common punctuation + // in order to allow sentences ending in URLs. + + // protocol (required) e.g. http + protoPart = `(https?|ftp|file|gopher|mailto|nntp)` + // host (required) e.g. www.example.com or [::1]:8080 + hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)` + // path+query+fragment (optional) e.g. /path/index.html?q=foo#bar + pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*` + + urlRx = protoPart + `://` + hostPart + pathPart +) + +var ( + matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`) + urlReplacer = strings.NewReplacer(`(`, `\(`, `)`, `\)`) +) + +func indentLen(s string) int { + i := 0 + for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + i++ + } + return i +} + +func isBlank(s string) bool { + return len(s) == 0 || (len(s) == 1 && s[0] == '\n') +} + +func commonPrefix(a, b string) string { + i := 0 + for i < len(a) && i < len(b) && a[i] == b[i] { + i++ + } + return a[0:i] +} + +func unindent(block []string) { + if len(block) == 0 { + return + } + + // compute maximum common white prefix + prefix := block[0][0:indentLen(block[0])] + for _, line := range block { + if !isBlank(line) { + prefix = commonPrefix(prefix, line) + } + } + n := len(prefix) + + // remove + for i, line := range block { + if !isBlank(line) { + block[i] = line[n:] + } + } +} + +// heading returns the trimmed line if it passes as a section heading; +// otherwise it returns the empty string. +func heading(line string) string { + line = strings.TrimSpace(line) + if len(line) == 0 { + return "" + } + + // a heading must start with an uppercase letter + r, _ := utf8.DecodeRuneInString(line) + if !unicode.IsLetter(r) || !unicode.IsUpper(r) { + return "" + } + + // it must end in a letter or digit: + r, _ = utf8.DecodeLastRuneInString(line) + if !unicode.IsLetter(r) && !unicode.IsDigit(r) { + return "" + } + + // exclude lines with illegal characters. we allow "()," + if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") { + return "" + } + + // allow "'" for possessive "'s" only + for b := line; ; { + i := strings.IndexRune(b, '\'') + if i < 0 { + break + } + if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') { + return "" // not followed by "s " + } + b = b[i+2:] + } + + // allow "." when followed by non-space + for b := line; ; { + i := strings.IndexRune(b, '.') + if i < 0 { + break + } + if i+1 >= len(b) || b[i+1] == ' ' { + return "" // not followed by non-space + } + b = b[i+1:] + } + + return line +} + +type op int + +const ( + opPara op = iota + opHead + opPre +) + +type block struct { + op op + lines []string +} + +func blocks(text string) []block { + var ( + out []block + para []string + + lastWasBlank = false + lastWasHeading = false + ) + + close := func() { + if para != nil { + out = append(out, block{opPara, para}) + para = nil + } + } + + lines := strings.SplitAfter(text, "\n") + unindent(lines) + for i := 0; i < len(lines); { + line := lines[i] + if isBlank(line) { + // close paragraph + close() + i++ + lastWasBlank = true + continue + } + if indentLen(line) > 0 { + // close paragraph + close() + + // count indented or blank lines + j := i + 1 + for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { + j++ + } + // but not trailing blank lines + for j > i && isBlank(lines[j-1]) { + j-- + } + pre := lines[i:j] + i = j + + unindent(pre) + + // put those lines in a pre block + out = append(out, block{opPre, pre}) + lastWasHeading = false + continue + } + + if lastWasBlank && !lastWasHeading && i+2 < len(lines) && + isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 { + // current line is non-blank, surrounded by blank lines + // and the next non-blank line is not indented: this + // might be a heading. + if head := heading(line); head != "" { + close() + out = append(out, block{opHead, []string{head}}) + i += 2 + lastWasHeading = true + continue + } + } + + // open paragraph + lastWasBlank = false + lastWasHeading = false + para = append(para, lines[i]) + i++ + } + close() + + return out +} |