#!/usr/bin/env python3 # -*- coding: utf-8 -*- ''' ponysay - Ponysay, cowsay reimplementation for ponies Copyright (C) 2012, 2013 Erkin Batu Altunbaş et al. This program is free software. It comes without any warranty, to the extent permitted by applicable law. You can redistribute it and/or modify it under the terms of the Do What The Fuck You Want To Public License, Version 2, as published by Sam Hocevar. See http://sam.zoy.org/wtfpl/COPYING for more details. ''' from common import * ''' Class used for correcting spellos and typos, Note that this implementation will not find that correctly spelled word are correct faster than it corrects words. It is also limited to words of size 0 to 127 (inclusive) ''' class SpelloCorrecter(): # Naïvely and quickly ported and adapted from optimised Java, may not be the nicest, or even fast, Python code ''' Constructor @param directories:list List of directories that contains the file names with the correct spelling @param ending:str The file name ending of the correctly spelled file names, this is removed for the name ''' def __init__(self, directories, ending): self.weights = {'k' : {'c' : 0.25, 'g' : 0.75, 'q' : 0.125}, 'c' : {'k' : 0.25, 'g' : 0.75, 's' : 0.5, 'z' : 0.5, 'q' : 0.125}, 's' : {'z' : 0.25, 'c' : 0.5}, 'z' : {'s' : 0.25, 'c' : 0.5}, 'g' : {'k' : 0.75, 'c' : 0.75, 'q' : 0.9}, 'o' : {'u' : 0.5}, 'u' : {'o' : 0.5, 'v' : 0.75, 'w' : 0.5}, 'b' : {'v' : 0.75}, 'v' : {'b' : 0.75, 'w' : 0.5, 'u' : 0.7}, 'w' : {'v' : 0.5, 'u' : 0.5}, 'q' : {'c' : 0.125, 'k' : 0.125, 'g' : 0.9}} self.corrections = None self.dictionary = [None] * 513 self.reusable = [0] * 512 self.dictionaryEnd = 512 self.closestDistance = 0 self.M = [None] * 128 for y in range(0, 128): self.M[y] = [0] * 128 self.M[y][0] = y m0 = self.M[0] x = 127 while x > -1: m0[x] = x x -= 1 previous = '' self.dictionary[-1] = previous; for directory in directories: for filename in os.listdir(directory): if (not endswith(filename, ending)) or (len(filename) - len(ending) > 127): continue proper = filename[:-len(ending)] if self.dictionaryEnd == 0: self.dictionaryEnd = len(self.dictionary) self.reusable = [0] * self.dictionaryEnd + self.reusable self.dictionary = [None] * self.dictionaryEnd + self.dictionary self.dictionaryEnd -= 1 self.dictionary[self.dictionaryEnd] = proper prevCommon = min(len(previous), len(proper)) for i in range(0, prevCommon): if previous[i] != proper[i]: prevCommon = i break previous = proper self.reusable[self.dictionaryEnd] = prevCommon #part = self.dictionary[self.dictionaryEnd : len(self.dictionary) - 1] #part.sort() #self.dictionary[self.dictionaryEnd : len(self.dictionary) - 1] = part # #index = len(self.dictionary) - 1 #while index >= self.dictionaryEnd: # proper = self.dictionary[index] # prevCommon = min(len(previous), len(proper)) # for i in range(0, prevCommon): # if previous[i] != proper[i]: # prevCommon = i # break # previous = proper # self.reusable[self.dictionaryEnd] = prevCommon # index -= 1; ''' Finds the closests correct spelled word @param used:str The word to correct @return (words, distance):(list, int) A list the closest spellings and the weighted distance ''' def correct(self, used): if len(used) > 127: return ([used], 0) self.__correct(used) return (self.corrections, self.closestDistance) ''' Finds the closests correct spelled word @param used:str The word to correct, it must satisfy all restrictions ''' def __correct(self, used): self.closestDistance = 0x7FFFFFFF previous = self.dictionary[-1] prevLen = 0 usedLen = len(used) proper = None prevCommon = 0 d = len(self.dictionary) - 1 while d > self.dictionaryEnd: d -= 1 proper = self.dictionary[d] if abs(len(proper) - usedLen) <= self.closestDistance: if previous == self.dictionary[d + 1]: prevCommon = self.reusable[d]; else: prevCommon = min(prevLen, len(proper)) for i in range(0, prevCommon): if previous[i] != proper[i]: prevCommon = i break skip = min(prevLen, len(proper)) i = prevCommon while i < skip: for u in range(0, usedLen): if (used[u] == previous[i]) or (used[u] == proper[i]): skip = i break i += 1 common = min(skip, min(usedLen, len(proper))) for i in range(0, common): if used[i] != proper[i]: common = i break distance = self.__distance(proper, skip, len(proper), used, common, usedLen) if self.closestDistance > distance: self.closestDistance = distance self.corrections = [proper] elif self.closestDistance == distance: self.corrections.append(proper) previous = proper; if distance >= 0x7FFFFF00: prevLen = distance & 255 else: prevLen = len(proper) ''' Calculate the distance between a correct word and a incorrect word @param proper:str The correct word @param y0:int The offset for `proper` @param yn:int The length, before applying `y0`, of `proper` @param used:str The incorrect word @param x0:int The offset for `used` @param xn:int The length, before applying `x0`, of `used` @return :float The distance between the words ''' def __distance(self, proper, y0, yn, used, x0, xn): my = self.M[y0] for y in range(y0, yn): best = 0x7FFFFFFF p = proper[y] myy = self.M[y + 1] # only one array bound check, and at most one + ☺ x = x0 while x < xn: change = my[x] u = used[x] if p == u: # commence black magick … twilight would be so disappointed x += 1 myy[x] = change best = min(best, change) remove = myy[x] add = my[x + 1] cw = 1 if my[x] in self.weights: if p in self.weights[u]: cw = self.weights[u][p] x += 1 myy[x] = min(cw + change, 1 + min(remove, add)) if best > myy[x]: best = myy[x] if best > self.closestDistance: return 0x7FFFFF00 | y my = myy return my[xn]