""" ============== smartypants.py ============== ---------------------------- SmartyPants ported to Python ---------------------------- Ported by `Chad Miller`_ Copyright (c) 2004 Chad Miller original `SmartyPants`_ by `John Gruber`_ Copyright (c) 2003 John Gruber Synopsis ======== A smart-quotes plugin for Pyblosxom_. The priginal "SmartyPants" is a free web publishing plug-in for Movable Type, Blosxom, and BBEdit that easily translates plain ASCII punctuation characters into "smart" typographic punctuation HTML entities. This software, *smartypants.py*, endeavours to be a functional port of SmartyPants to Python, for use with Pyblosxom_. Description =========== SmartyPants can perform the following transformations: - Straight quotes ( " and ' ) into "curly" quote HTML entities - Backticks-style quotes (\`\`like this'') into "curly" quote HTML entities - Dashes (``--`` and ``---``) into en- and em-dash entities - Three consecutive dots (``...`` or ``. . .``) into an ellipsis entity This means you can write, edit, and save your posts using plain old ASCII straight quotes, plain dashes, and plain dots, but your published posts (and final HTML output) will appear with smart quotes, em-dashes, and proper ellipses. SmartyPants does not modify characters within ``
``, ````, ````,
```` or ``

He said, "'Quoted' words in a larger quote."

str = re.sub(r""""'(?=\w)""", """“‘""", str) str = re.sub(r"""'"(?=\w)""", """‘“""", str) # Special case for decade abbreviations (the '80s): str = re.sub(r"""(?=\d{2}s)""", r"""’/""", str) close_class = r"""[^\ \t\r\n\[\{\(\-]""" dec_dashes = r"""–|—""" # Get most opening single quotes: opening_single_quotes_regex = re.compile(r""" ( \s | # a whitespace char, or   | # a non-breaking space entity, or -- | # dashes, or &[mn]dash; | # named dash entities %s | # or decimal entities &\#x201[34]; # or hex ) ' # the quote (?=\w) # followed by a word character """ % (dec_dashes,), re.VERBOSE) str = opening_single_quotes_regex.sub(r"""\1‘""", str) closing_single_quotes_regex = re.compile(r""" (%s) ' (?=\s | s\b) """ % (close_class,), re.VERBOSE) str = closing_single_quotes_regex.sub(r"""\1’""", str) closing_single_quotes_regex = re.compile(r""" (%s) ' (\s | s\b) """ % (close_class,), re.VERBOSE) str = closing_single_quotes_regex.sub(r"""\1’\2""", str) # Any remaining single quotes should be opening ones: re.sub(r"""'""", r"""‘""", str) # Get most opening double quotes: opening_double_quotes_regex = re.compile(r""" ( \s | # a whitespace char, or   | # a non-breaking space entity, or -- | # dashes, or &[mn]dash; | # named dash entities %s | # or decimal entities &\#x201[34]; # or hex ) " # the quote (?=\w) # followed by a word character """ % (dec_dashes,), re.VERBOSE) str = opening_double_quotes_regex.sub(r"""\1“""", str) # Double closing quotes: closing_double_quotes_regex = re.compile(r""" #(%s)? # character that indicates the quote should be closing " (?=\s) """ % (close_class,), re.VERBOSE) str = closing_double_quotes_regex.sub(r"""”""", str) closing_double_quotes_regex = re.compile(r""" (%s) # character that indicates the quote should be closing " """ % (close_class,), re.VERBOSE) str = closing_double_quotes_regex.sub(r"""\1”""", str) # Any remaining quotes should be opening ones. str = re.sub(r'"', r"""“""", str) return str def educateBackticks(str): """ Parameter: String. Returns: The string, with ``backticks'' -style double quotes translated into HTML curly quote entities. Example input: ``Isn't this fun?'' Example output: “Isn't this fun?” """ str = re.sub(r"""``""", r"""“""", str) str = re.sub(r"""''""", r"""”""", str) return str def educateSingleBackticks(str): """ Parameter: String. Returns: The string, with `backticks' -style single quotes translated into HTML curly quote entities. Example input: `Isn't this fun?' Example output: ‘Isn’t this fun?’ """ str = re.sub(r"""`""", r"""‘""", str) str = re.sub(r"""'""", r"""’""", str) return str def educateDashes(str): """ Parameter: String. Returns: The string, with each instance of "--" translated to an em-dash HTML entity. """ str = re.sub(r"""--""", r"""—""", str) return str def educateDashesOldSchool(str): """ Parameter: String. Returns: The string, with each instance of "--" translated to an en-dash HTML entity, and each "---" translated to an em-dash HTML entity. """ str = re.sub(r"""---""", r"""—""", str) # em str = re.sub(r"""--""", r"""/–""", str) # en return str def educateDashesOldSchoolInverted(str): """ Parameter: String. Returns: The string, with each instance of "--" translated to an em-dash HTML entity, and each "---" translated to an en-dash HTML entity. Two reasons why: First, unlike the en- and em-dash syntax supported by EducateDashesOldSchool(), it's compatible with existing entries written before SmartyPants 1.1, back when "--" was only used for em-dashes. Second, em-dashes are more common than en-dashes, and so it sort of makes sense that the shortcut should be shorter to type. (Thanks to Aaron Swartz for the idea.) """ str = re.sub(r"""---""", r"""–""", str) # em str = re.sub(r"""--""", r"""/—""", str) # en return str def educateEllipses(str): """ Parameter: String. Returns: The string, with each instance of "..." translated to an ellipsis HTML entity. Example input: Huh...? Example output: Huh…? """ str = re.sub(r"""\.\.\.""", r"""…""", str) str = re.sub(r"""\. \. \.""", r"""…""", str) return str def stupefyEntities(str): """ Parameter: String. Returns: The string, with each SmartyPants HTML entity translated to its ASCII counterpart. Example input: “Hello — world.” Example output: "Hello -- world." """ str = re.sub(r"""–""", r"""-""", str) # en-dash str = re.sub(r"""—""", r"""--""", str) # em-dash str = re.sub(r"""‘""", r"""'""", str) # open single quote str = re.sub(r"""’""", r"""'""", str) # close single quote str = re.sub(r"""“""", r'''"''', str) # open double quote str = re.sub(r"""”""", r'''"''', str) # close double quote str = re.sub(r"""…""", r"""...""", str)# ellipsis return str def processEscapes(str): """ Parameter: String. Returns: The string, with after processing the following backslash escape sequences. This is useful if you want to force a "dumb" quote or other character to appear. Escape Value ------ ----- \\ \ \" " \' ' \. . \- - \` ` """ str = re.sub(r"""\\\\""", r"""\""", str) str = re.sub(r'''\\"''', r""""""", str) str = re.sub(r"""\\'""", r"""'""", str) str = re.sub(r"""\\\.""", r""".""", str) str = re.sub(r"""\\-""", r"""-""", str) str = re.sub(r"""\\`""", r"""`""", str) return str def _tokenize(str): """ Parameter: String containing HTML markup. Returns: Reference to an array of the tokens comprising the input string. Each token is either a tag (possibly with nested, tags contained therein, such as , or a run of text between tags. Each element of the array is a two-element array; the first is either 'tag' or 'text'; the second is the actual value. Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin. """ pos = 0 length = len(str) tokens = [] depth = 6 nested_tags = string.join(['(?:<(?:[^<>]',] * depth, "|") + (')*>)' * depth) #match = r"""(?: ) | # comments # (?: <\? .*? \?> ) | # directives # %s # nested tags """ % (nested_tags,) tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""") token_match = tag_soup.search(str) previous_end = 0 while token_match is not None: if token_match.group(1) != "": tokens.append(['text', token_match.group(1)]) tokens.append(['tag', token_match.group(2)]) previous_end = token_match.end() token_match = tag_soup.search(str, token_match.end()) if previous_end < len(str): tokens.append(['text', str[previous_end:]]) return tokens if __name__ == "__main__": import locale try: locale.setlocale(locale.LC_ALL, '') except: pass from docutils.core import publish_string docstring_html = publish_string(__doc__, writer_name='html') print docstring_html __author__ = "Chad Miller " __version__ = "1.5_1.1 Sun, 14 Mar 2004 14:38:28 -0500" __url__ = "http://wiki.chad.org/SmartyPantsPy" __description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom"