mirror of https://github.com/sgoudham/Enso-Bot.git
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
86 lines
2.8 KiB
Python
86 lines
2.8 KiB
Python
5 years ago
|
"""This module implements the WordSub class, modelled after a recipe
|
||
|
in "Python Cookbook" (Recipe 3.14, "Replacing Multiple Patterns in a
|
||
|
Single Pass" by Xavier Defrang).
|
||
|
|
||
|
Usage:
|
||
|
Use this class like a dictionary to add before/after pairs:
|
||
|
> subber = TextSub()
|
||
|
> subber["before"] = "after"
|
||
|
> subber["begin"] = "end"
|
||
|
Use the sub() method to perform the substitution:
|
||
|
> print( subber.sub("before we begin") )
|
||
|
after we end
|
||
|
All matching is intelligently case-insensitive:
|
||
|
> print( subber.sub("Before we BEGIN") )
|
||
|
After we END
|
||
|
The 'before' words must be complete words -- no prefixes.
|
||
|
The following example illustrates this point:
|
||
|
> subber["he"] = "she"
|
||
|
> print( subber.sub("he says he'd like to help her") )
|
||
|
she says she'd like to help her
|
||
|
Note that "he" and "he'd" were replaced, but "help" and "her" were
|
||
|
not.
|
||
|
"""
|
||
|
|
||
|
from __future__ import print_function
|
||
|
|
||
|
|
||
|
# 'dict' objects weren't available to subclass from until version 2.2.
|
||
|
# Get around this by importing UserDict.UserDict if the built-in dict
|
||
|
# object isn't available.
|
||
|
try: dict
|
||
|
except: from UserDict import UserDict as dict
|
||
|
|
||
|
import re
|
||
|
import string
|
||
|
try:
|
||
|
from ConfigParser import ConfigParser
|
||
|
except ImportError:
|
||
|
from configparser import ConfigParser
|
||
|
|
||
|
class WordSub(dict):
|
||
|
"""All-in-one multiple-string-substitution class."""
|
||
|
|
||
|
def _wordToRegex(self, word):
|
||
|
"""Convert a word to a regex object which matches the word."""
|
||
|
if word != "" and word[0].isalpha() and word[-1].isalpha():
|
||
|
return "\\b%s\\b" % re.escape(word)
|
||
|
else:
|
||
|
return r"\b%s\b" % re.escape(word)
|
||
|
|
||
|
def _update_regex(self):
|
||
|
"""Build re object based on the keys of the current
|
||
|
dictionary.
|
||
|
|
||
|
"""
|
||
|
self._regex = re.compile("|".join(map(self._wordToRegex, self.keys())))
|
||
|
self._regexIsDirty = False
|
||
|
|
||
|
def __init__(self, defaults = {}):
|
||
|
"""Initialize the object, and populate it with the entries in
|
||
|
the defaults dictionary.
|
||
|
|
||
|
"""
|
||
|
self._regex = None
|
||
|
self._regexIsDirty = True
|
||
|
for k,v in defaults.items():
|
||
|
self[k] = v
|
||
|
|
||
|
def __call__(self, match):
|
||
|
"""Handler invoked for each regex match."""
|
||
|
return self[match.group(0)]
|
||
|
|
||
|
def __setitem__(self, i, y):
|
||
|
self._regexIsDirty = True
|
||
|
# for each entry the user adds, we actually add three entrys:
|
||
|
super(type(self),self).__setitem__(i.lower(),y.lower()) # key = value
|
||
|
super(type(self),self).__setitem__(string.capwords(i), string.capwords(y)) # Key = Value
|
||
|
super(type(self),self).__setitem__(i.upper(), y.upper()) # KEY = VALUE
|
||
|
|
||
|
def sub(self, text):
|
||
|
"""Translate text, returns the modified text."""
|
||
|
if self._regexIsDirty:
|
||
|
self._update_regex()
|
||
|
return self._regex.sub(self, text)
|
||
|
|