# -*- coding: utf-8 -*-
"""
Functions for parsing comments in markup
"""
from __future__ import unicode_literals
import math
import textwrap
import re
from cmakelang import common
# Matches comment strings like ``# TODO(josh):`` or ``# NOTE(josh):``
NOTE_REGEX = re.compile(r'^\s*[A-Z_]+\([^)]+\):.*')
# Matches comment lines that are clearly meant to separate sections or
# headers. The meaning of this regex is "a line consisting of three or more
# non-word characters ending with three or more non-word characters"
RULER_PATTERN = r'^\s*[^\w\s]{3}.*[^\w\s]{3}$'
RULER_REGEX = re.compile(RULER_PATTERN)
# Matches lines that start a bulleted list
BULLET_REGEX = re.compile(r'^(\s*)([\*-])( .+)$')
# Matches lines that start an itemized list
ENUM_REGEX = re.compile(r'^(\s*)\d+([.:])( .+)$')
# Matches a verbatim fence
FENCE_PATTERN = r'^\s*([`~]{3}[`~]*)(.*)$'
FENCE_REGEX = re.compile(FENCE_PATTERN)
CommentType.PARAGRAPH = CommentType(0)
CommentType.BULLET_LIST = CommentType(1)
CommentType.ENUM_LIST = CommentType(2)
CommentType.NOTE = CommentType(3)
CommentType.RULER = CommentType(4)
CommentType.SEPARATOR = CommentType(5)
CommentType.FENCE = CommentType(6)
CommentType.VERBATIM = CommentType(7)
[docs]def parse(lines, config=None):
"""
Parse comment lines. Returns objects of different formatable entities
"""
# pylint: disable=too-many-statements
obj_list = []
state = None
bullet_regex = None
if config is None:
fence_re = FENCE_REGEX
ruler_re = RULER_REGEX
else:
fence_re = re.compile(config.markup.fence_pattern)
ruler_re = re.compile(config.markup.ruler_pattern)
for line in lines:
fence_match = fence_re.match(line)
if fence_match:
obj_list.append(CommentItem(CommentType.FENCE))
obj_list[-1].lines.append(fence_match.group(1).strip())
content = fence_match.group(2).strip()
line = content
if state == CommentType.VERBATIM:
state = None
else:
obj_list.append(CommentItem(CommentType.VERBATIM))
state = CommentType.VERBATIM
if not line:
continue
if state == CommentType.VERBATIM:
if line and line[0] == ' ':
obj_list[-1].lines.append(line[1:])
else:
obj_list[-1].lines.append(line)
continue
if not line:
if state is CommentType.SEPARATOR:
continue
obj_list.append(CommentItem(CommentType.SEPARATOR))
state = CommentType.SEPARATOR
continue
if ruler_re.match(line):
obj_list.append(CommentItem(CommentType.RULER))
obj_list[-1].lines.append(line.strip())
state = CommentType.RULER
continue
if state in (None, CommentType.SEPARATOR, CommentType.RULER):
match = BULLET_REGEX.match(line)
if match:
obj_list.append(CommentItem(CommentType.BULLET_LIST))
indent_str = match.group(1)
bullet_punctuation = match.group(2)
obj_list[-1].lines.append(match.group(3))
obj_list[-1].indent = len(indent_str)
state = CommentType.BULLET_LIST
if bullet_punctuation == '*':
bullet_punctuation = r'\*'
bullet_regex = re.compile(
'^{}{}( .*)$'.format(indent_str, bullet_punctuation))
continue
match = ENUM_REGEX.match(line)
if match:
obj_list.append(CommentItem(CommentType.ENUM_LIST))
indent_str = match.group(1)
bullet_punctuation = match.group(2)
obj_list[-1].lines.append(match.group(3).strip())
obj_list[-1].indent = len(indent_str)
state = CommentType.ENUM_LIST
# TODO(josh) We want to match lines with either the same number of
# spaces or with the colon in the same column
bullet_regex = re.compile(
r'^{}\d+{}( .*)$'.format(indent_str, bullet_punctuation))
continue
if NOTE_REGEX.match(line):
obj_list.append(CommentItem(CommentType.NOTE))
obj_list[-1].lines.append(line.strip())
state = CommentType.NOTE
continue
if ruler_re.match(line):
obj_list.append(CommentItem(CommentType.RULER))
obj_list[-1].lines.append(line.strip())
state = CommentType.RULER
continue
state = CommentType.PARAGRAPH
obj_list.append(CommentItem(CommentType.PARAGRAPH))
obj_list[-1].lines.append(line.strip())
elif state in (CommentType.PARAGRAPH, CommentType.NOTE):
if NOTE_REGEX.match(line):
obj_list.append(CommentItem(CommentType.NOTE))
state = CommentType.NOTE
elif RULER_REGEX.match(line):
obj_list.append(CommentItem(CommentType.RULER))
state = CommentType.RULER
obj_list[-1].lines.append(line.strip())
elif state in (CommentType.BULLET_LIST, CommentType.ENUM_LIST):
match = bullet_regex.match(line)
if match:
obj_list[-1].lines.append(match.group(1).strip())
else:
obj_list[-1].lines[-1] += '\n' + line.strip()
return obj_list
[docs]def is_hashruler(item):
"""
Return true if the markup item is a hash ruler, i.e.::
###########################
# Like this ^^^ or this vvv
###########################
"""
if item.kind != CommentType.RULER:
return False
if len(item.lines) != 1:
return False
if item.lines[0].strip('#'):
return False
return True
COMMON_KWARGS = dict(
expand_tabs=True,
replace_whitespace=True,
drop_whitespace=True,
break_long_words=False,
break_on_hyphens=False
)