store in database

Signed-off-by: Mattias Andrée <maandree@operamail.com>
This commit is contained in:
Mattias Andrée 2013-08-17 19:18:49 +02:00
parent ed03ac4cb2
commit d150fc9643

View file

@ -3,30 +3,31 @@
import os import os
import sys import sys
import dbm
allponies = {} allponies = {}
ponies = os.listdir('ponyquotes') ponies = os.listdir('ponyquotes')
for pony in allponies: for pony in ponies:
parts = pony.split('.') parts = pony.split('.')
if len(parts) == 2: if len(parts) == 2:
name = parts[1] name = parts[0]
index = parts[2] index = parts[1]
if len(name) * len(index) > 0: if len(name) * len(index) > 0:
if len(index.strip('0123456789')) == 0: if len(index.strip('0123456789')) == 0:
if name not in allponies: if name not in allponies:
allponies[name] = set([]) allponies[name] = set([])
allponies[name].add(index) allponies[name].add(int(index))
for pony in allponies.keys(): for pony in allponies.keys():
count = max(allponies[pony]) + 1 count = max(allponies[pony]) + 1
if len(allponies[pony]) != count: if len(allponies[pony]) != count:
print('Index error on quotes for %s' % pony, file = sys.stderr) print('Index error on quotes for %s' % pony, file = sys.stderr)
sys.exit(1) sys.exit(1)
allponies[pony] = count allponies[pony] = str(count)
lines = None lines = None
while open('ponyquotes/ponies', 'rb') as file: with open('ponyquotes/ponies', 'rb') as file:
lines = file.read() lines = file.read()
lines = lines.decode('utf-8', 'error').split('\n') lines = lines.decode('utf-8', 'error').split('\n')
@ -40,6 +41,8 @@ for line in lines:
ponies = line.split('+') ponies = line.split('+')
master = ponies[0] master = ponies[0]
if master not in allponies:
continue
count = allponies[master] count = allponies[master]
by_master[master] = [count] + ponies by_master[master] = [count] + ponies
@ -48,3 +51,13 @@ for line in lines:
by_file[pony] = [] by_file[pony] = []
by_file[pony] += [count, master] by_file[pony] += [count, master]
db = dbm.open('by-master', 'n')
for key in by_master:
db[key] = ' '.join(by_master[key])
db.close()
db = dbm.open('by-file', 'n')
for key in by_file:
db[key] = ' '.join(by_file[key])
db.close()