aboutsummaryrefslogtreecommitdiff
path: root/markdown/extensions/codehilite.py
diff options
context:
space:
mode:
Diffstat (limited to 'markdown/extensions/codehilite.py')
-rw-r--r--markdown/extensions/codehilite.py400
1 files changed, 253 insertions, 147 deletions
diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py
index c5d496b..a54ba21 100644
--- a/markdown/extensions/codehilite.py
+++ b/markdown/extensions/codehilite.py
@@ -1,156 +1,216 @@
-#!/usr/bin/python
-
"""
CodeHilite Extension for Python-Markdown
========================================
Adds code/syntax highlighting to standard Python-Markdown code blocks.
-Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/).
+See <https://Python-Markdown.github.io/extensions/code_hilite>
+for documentation.
+
+Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/).
+
+All changes Copyright 2008-2014 The Python Markdown Project
-Project website: <http://www.freewisdom.org/project/python-markdown/CodeHilite>
-Contact: markdown@freewisdom.org
-
-License: BSD (see ../docs/LICENSE for details)
-
-Dependencies:
-* [Python 2.3+](http://python.org/)
-* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/)
-* [Pygments](http://pygments.org/)
+License: [BSD](https://opensource.org/licenses/bsd-license.php)
"""
-import markdown
+from . import Extension
+from ..treeprocessors import Treeprocessor
+from ..util import parseBoolValue
+
+try: # pragma: no cover
+ from pygments import highlight
+ from pygments.lexers import get_lexer_by_name, guess_lexer
+ from pygments.formatters import get_formatter_by_name
+ from pygments.util import ClassNotFound
+ pygments = True
+except ImportError: # pragma: no cover
+ pygments = False
+
-# --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------
+def parse_hl_lines(expr):
+ """Support our syntax for emphasizing certain lines of code.
-try:
- TAB_LENGTH = markdown.TAB_LENGTH
-except AttributeError:
- TAB_LENGTH = 4
+ expr should be like '1 2' to emphasize lines 1 and 2 of a code block.
+ Returns a list of ints, the line numbers to emphasize.
+ """
+ if not expr:
+ return []
+
+ try:
+ return list(map(int, expr.split()))
+ except ValueError: # pragma: no cover
+ return []
# ------------------ The Main CodeHilite Class ----------------------
class CodeHilite:
"""
- Determine language of source code, and pass it into the pygments hilighter.
+ Determine language of source code, and pass it on to the Pygments highlighter.
+
+ Usage:
+ code = CodeHilite(src=some_code, lang='python')
+ html = code.hilite()
- Basic Usage:
- >>> code = CodeHilite(src = 'some text')
- >>> html = code.hilite()
-
+ Arguments:
* src: Source string or any object with a .readline attribute.
-
- * linenos: (Boolen) Turn line numbering 'on' or 'off' (off by default).
-
- * css_class: Set class name of wrapper div ('codehilite' by default).
-
- Low Level Usage:
- >>> code = CodeHilite()
- >>> code.src = 'some text' # String or anything with a .readline attr.
- >>> code.linenos = True # True or False; Turns line numbering on or of.
- >>> html = code.hilite()
-
+
+ * lang: String name of Pygments lexer to use for highlighting. Default: `None`.
+
+ * guess_lang: Auto-detect which lexer to use. Ignored if `lang` is set to a valid
+ value. Default: `True`.
+
+ * use_pygments: Pass code to pygments for code highlighting. If `False`, the code is
+ instead wrapped for highlighting by a JavaScript library. Default: `True`.
+
+ * pygments_formatter: The name of a Pygments formatter or a formatter class used for
+ highlighting the code blocks. Default: `html`.
+
+ * linenums: An alias to Pygments `linenos` formatter option. Default: `None`.
+
+ * css_class: An alias to Pygments `cssclass` formatter option. Default: 'codehilite'.
+
+ * lang_prefix: Prefix prepended to the language. Default: "language-".
+
+ Other Options:
+ Any other options are accepted and passed on to the lexer and formatter. Therefore,
+ valid options include any options which are accepted by the `html` formatter or
+ whichever lexer the code's language uses. Note that most lexers do not have any
+ options. However, a few have very useful options, such as PHP's `startinline` option.
+ Any invalid options are ignored without error.
+
+ Formatter options: https://pygments.org/docs/formatters/#HtmlFormatter
+ Lexer Options: https://pygments.org/docs/lexers/
+
+ Additionally, when Pygments is enabled, the code's language is passed to the
+ formatter as an extra option `lang_str`, whose value being `{lang_prefix}{lang}`.
+ This option has no effect to the Pygments's builtin formatters.
+
+ Advanced Usage:
+ code = CodeHilite(
+ src = some_code,
+ lang = 'php',
+ startinline = True, # Lexer option. Snippet does not start with `<?php`.
+ linenostart = 42, # Formatter option. Snippet starts on line 42.
+ hl_lines = [45, 49, 50], # Formatter option. Highlight lines 45, 49, and 50.
+ linenos = 'inline' # Formatter option. Avoid alignment problems.
+ )
+ html = code.hilite()
+
"""
- def __init__(self, src=None, linenos=False, css_class="codehilite"):
+ def __init__(self, src, **options):
self.src = src
- self.lang = None
- self.linenos = linenos
- self.css_class = css_class
-
- def hilite(self):
+ self.lang = options.pop('lang', None)
+ self.guess_lang = options.pop('guess_lang', True)
+ self.use_pygments = options.pop('use_pygments', True)
+ self.lang_prefix = options.pop('lang_prefix', 'language-')
+ self.pygments_formatter = options.pop('pygments_formatter', 'html')
+
+ if 'linenos' not in options:
+ options['linenos'] = options.pop('linenums', None)
+ if 'cssclass' not in options:
+ options['cssclass'] = options.pop('css_class', 'codehilite')
+ if 'wrapcode' not in options:
+ # Override pygments default
+ options['wrapcode'] = True
+ # Disallow use of `full` option
+ options['full'] = False
+
+ self.options = options
+
+ def hilite(self, shebang=True):
"""
- Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with
- optional line numbers. The output should then be styled with css to
- your liking. No styles are applied by default - only styling hooks
- (i.e.: <span class="k">).
+ Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with
+ optional line numbers. The output should then be styled with css to
+ your liking. No styles are applied by default - only styling hooks
+ (i.e.: <span class="k">).
returns : A string of html.
-
+
"""
self.src = self.src.strip('\n')
-
- self._getLang()
-
- try:
- from pygments import highlight
- from pygments.lexers import get_lexer_by_name, guess_lexer, \
- TextLexer
- from pygments.formatters import HtmlFormatter
- except ImportError:
- # just escape and pass through
- txt = self._escape(self.src)
- if self.linenos:
- txt = self._number(txt)
- else :
- txt = '<div class="%s"><pre>%s</pre></div>\n'% \
- (self.css_class, txt)
- return txt
- else:
+
+ if self.lang is None and shebang:
+ self._parseHeader()
+
+ if pygments and self.use_pygments:
try:
- lexer = get_lexer_by_name(self.lang)
+ lexer = get_lexer_by_name(self.lang, **self.options)
except ValueError:
try:
- lexer = guess_lexer(self.src)
- except ValueError:
- lexer = TextLexer()
- formatter = HtmlFormatter(linenos=self.linenos,
- cssclass=self.css_class)
+ if self.guess_lang:
+ lexer = guess_lexer(self.src, **self.options)
+ else:
+ lexer = get_lexer_by_name('text', **self.options)
+ except ValueError: # pragma: no cover
+ lexer = get_lexer_by_name('text', **self.options)
+ if not self.lang:
+ # Use the guessed lexer's language instead
+ self.lang = lexer.aliases[0]
+ lang_str = f'{self.lang_prefix}{self.lang}'
+ if isinstance(self.pygments_formatter, str):
+ try:
+ formatter = get_formatter_by_name(self.pygments_formatter, **self.options)
+ except ClassNotFound:
+ formatter = get_formatter_by_name('html', **self.options)
+ else:
+ formatter = self.pygments_formatter(lang_str=lang_str, **self.options)
return highlight(self.src, lexer, formatter)
-
- def _escape(self, txt):
- """ basic html escaping """
- txt = txt.replace('&', '&amp;')
- txt = txt.replace('<', '&lt;')
- txt = txt.replace('>', '&gt;')
- txt = txt.replace('"', '&quot;')
- return txt
-
- def _number(self, txt):
- """ Use <ol> for line numbering """
- # Fix Whitespace
- txt = txt.replace('\t', ' '*TAB_LENGTH)
- txt = txt.replace(" "*4, "&nbsp; &nbsp; ")
- txt = txt.replace(" "*3, "&nbsp; &nbsp;")
- txt = txt.replace(" "*2, "&nbsp; ")
-
- # Add line numbers
- lines = txt.splitlines()
- txt = '<div class="codehilite"><pre><ol>\n'
- for line in lines:
- txt += '\t<li>%s</li>\n'% line
- txt += '</ol></pre></div>\n'
- return txt
-
-
- def _getLang(self):
- """
- Determines language of a code block from shebang lines and whether said
- line should be removed or left in place. If the sheband line contains a
- path (even a single /) then it is assumed to be a real shebang lines and
- left alone. However, if no path is given (e.i.: #!python or :::python)
- then it is assumed to be a mock shebang for language identifitation of a
- code fragment and removed from the code block prior to processing for
- code highlighting. When a mock shebang (e.i: #!python) is found, line
- numbering is turned on. When colons are found in place of a shebang
- (e.i.: :::python), line numbering is left in the current state - off
- by default.
-
+ else:
+ # just escape and build markup usable by JS highlighting libs
+ txt = self.src.replace('&', '&amp;')
+ txt = txt.replace('<', '&lt;')
+ txt = txt.replace('>', '&gt;')
+ txt = txt.replace('"', '&quot;')
+ classes = []
+ if self.lang:
+ classes.append('{}{}'.format(self.lang_prefix, self.lang))
+ if self.options['linenos']:
+ classes.append('linenums')
+ class_str = ''
+ if classes:
+ class_str = ' class="{}"'.format(' '.join(classes))
+ return '<pre class="{}"><code{}>{}\n</code></pre>\n'.format(
+ self.options['cssclass'],
+ class_str,
+ txt
+ )
+
+ def _parseHeader(self):
+ """
+ Determines language of a code block from shebang line and whether the
+ said line should be removed or left in place. If the sheband line
+ contains a path (even a single /) then it is assumed to be a real
+ shebang line and left alone. However, if no path is given
+ (e.i.: #!python or :::python) then it is assumed to be a mock shebang
+ for language identification of a code fragment and removed from the
+ code block prior to processing for code highlighting. When a mock
+ shebang (e.i: #!python) is found, line numbering is turned on. When
+ colons are found in place of a shebang (e.i.: :::python), line
+ numbering is left in the current state - off by default.
+
+ Also parses optional list of highlight lines, like:
+
+ :::python hl_lines="1 3"
"""
import re
-
- #split text into lines
+
+ # split text into lines
lines = self.src.split("\n")
- #pull first line to examine
+ # pull first line to examine
fl = lines.pop(0)
-
+
c = re.compile(r'''
- (?:(?:::+)|(?P<shebang>[#]!)) # Shebang or 2 or more colons.
- (?P<path>(?:/\w+)*[/ ])? # Zero or 1 path
- (?P<lang>[\w+-]*) # The language
+ (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons
+ (?P<path>(?:/\w+)*[/ ])? # Zero or 1 path
+ (?P<lang>[\w#.+-]*) # The language
+ \s* # Arbitrary whitespace
+ # Optional highlight lines, single- or double-quote-delimited
+ (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?
''', re.VERBOSE)
# search first line for shebang
m = c.search(fl)
@@ -158,67 +218,113 @@ class CodeHilite:
# we have a match
try:
self.lang = m.group('lang').lower()
- except IndexError:
+ except IndexError: # pragma: no cover
self.lang = None
if m.group('path'):
# path exists - restore first line
lines.insert(0, fl)
- if m.group('shebang'):
- # shebang exists - use line numbers
- self.linenos = True
+ if self.options['linenos'] is None and m.group('shebang'):
+ # Overridable and Shebang exists - use line numbers
+ self.options['linenos'] = True
+
+ self.options['hl_lines'] = parse_hl_lines(m.group('hl_lines'))
else:
# No match
lines.insert(0, fl)
-
- self.src = "\n".join(lines).strip("\n")
+ self.src = "\n".join(lines).strip("\n")
# ------------------ The Markdown Extension -------------------------------
-class HiliteTreeprocessor(markdown.treeprocessors.Treeprocessor):
- """ Hilight source code in code blocks. """
+
+
+class HiliteTreeprocessor(Treeprocessor):
+ """ Highlight source code in code blocks. """
+
+ def code_unescape(self, text):
+ """Unescape code."""
+ text = text.replace("&lt;", "<")
+ text = text.replace("&gt;", ">")
+ # Escaped '&' should be replaced at the end to avoid
+ # conflicting with < and >.
+ text = text.replace("&amp;", "&")
+ return text
def run(self, root):
""" Find code blocks and store in htmlStash. """
- blocks = root.getiterator('pre')
+ blocks = root.iter('pre')
for block in blocks:
- children = block.getchildren()
- if len(children) == 1 and children[0].tag == 'code':
- code = CodeHilite(children[0].text,
- linenos=self.config['force_linenos'][0],
- css_class=self.config['css_class'][0])
- placeholder = self.markdown.htmlStash.store(code.hilite(),
- safe=True)
+ if len(block) == 1 and block[0].tag == 'code':
+ local_config = self.config.copy()
+ code = CodeHilite(
+ self.code_unescape(block[0].text),
+ tab_length=self.md.tab_length,
+ style=local_config.pop('pygments_style', 'default'),
+ **local_config
+ )
+ placeholder = self.md.htmlStash.store(code.hilite())
# Clear codeblock in etree instance
block.clear()
- # Change to p element which will later
+ # Change to p element which will later
# be removed when inserting raw html
block.tag = 'p'
block.text = placeholder
-class CodeHiliteExtension(markdown.Extension):
- """ Add source code hilighting to markdown codeblocks. """
+class CodeHiliteExtension(Extension):
+ """ Add source code highlighting to markdown codeblocks. """
- def __init__(self, configs):
+ def __init__(self, **kwargs):
# define default configs
self.config = {
- 'force_linenos' : [False, "Force line numbers - Default: False"],
- 'css_class' : ["codehilite",
- "Set class name for wrapper <div> - Default: codehilite"],
+ 'linenums': [None,
+ "Use lines numbers. True|table|inline=yes, False=no, None=auto"],
+ 'guess_lang': [True,
+ "Automatic language detection - Default: True"],
+ 'css_class': ["codehilite",
+ "Set class name for wrapper <div> - "
+ "Default: codehilite"],
+ 'pygments_style': ['default',
+ 'Pygments HTML Formatter Style '
+ '(Colorscheme) - Default: default'],
+ 'noclasses': [False,
+ 'Use inline styles instead of CSS classes - '
+ 'Default false'],
+ 'use_pygments': [True,
+ 'Use Pygments to Highlight code blocks. '
+ 'Disable if using a JavaScript library. '
+ 'Default: True'],
+ 'lang_prefix': [
+ 'language-',
+ 'Prefix prepended to the language when use_pygments is false. Default: "language-"'
+ ],
+ 'pygments_formatter': ['html',
+ 'Use a specific formatter for Pygments highlighting.'
+ 'Default: "html"',
+ ],
}
-
- # Override defaults with user settings
- for key, value in configs:
- self.setConfig(key, value)
- def extendMarkdown(self, md, md_globals):
+ for key, value in kwargs.items():
+ if key in self.config:
+ self.setConfig(key, value)
+ else:
+ # manually set unknown keywords.
+ if isinstance(value, str):
+ try:
+ # Attempt to parse str as a bool value
+ value = parseBoolValue(value, preserve_none=True)
+ except ValueError:
+ pass # Assume it's not a bool value. Use as-is.
+ self.config[key] = [value, '']
+
+ def extendMarkdown(self, md):
""" Add HilitePostprocessor to Markdown instance. """
hiliter = HiliteTreeprocessor(md)
- hiliter.config = self.config
- md.treeprocessors.add("hilite", hiliter, "_begin")
+ hiliter.config = self.getConfigs()
+ md.treeprocessors.register(hiliter, 'hilite', 30)
+ md.registerExtension(self)
-def makeExtension(configs={}):
- return CodeHiliteExtension(configs=configs)
+def makeExtension(**kwargs): # pragma: no cover
+ return CodeHiliteExtension(**kwargs)