Ale*_*x K 8 python optimization letter
最近在面试时,我遇到了以下问题:
编写一个能够在命令行上运行的脚本python
它应该在命令行中使用两个单词(或者如果您愿意,可以选择查询用户通过控制台提供两个单词).
鉴于这两个词:a.确保它们长度相等b.确保它们都是您下载的英语有效单词词典中的单词.
如果是这样,请计算您是否可以通过以下一系列步骤从第一个单词到达第二个单词a.您可以一次更改一个字母b.每次更改字母时,结果字也必须存在于字典c中.您无法添加或删除字母
如果可以访问这两个单词,则脚本应该打印出从一个单词到另一个单词作为单个最短路径的路径.
您可以/ usr/share/dict/words为您的单词词典.
我的解决方案包括使用广度优先搜索来找到两个单词之间的最短路径.但显然这不足以得到这份工作:(
你们能知道我做错了什么吗?非常感谢.
import collections
import functools
import re
def time_func(func):
import time
def wrapper(*args, **kwargs):
start = time.time()
res = func(*args, **kwargs)
timed = time.time() - start
setattr(wrapper, 'time_taken', timed)
return res
functools.update_wrapper(wrapper, func)
return wrapper
class OneLetterGame:
def __init__(self, dict_path):
self.dict_path = dict_path
self.words = set()
def run(self, start_word, end_word):
'''Runs the one letter game with the given start and end words.
'''
assert len(start_word) == len(end_word), \
'Start word and end word must of the same length.'
self.read_dict(len(start_word))
path = self.shortest_path(start_word, end_word)
if not path:
print 'There is no path between %s and %s (took %.2f sec.)' % (
start_word, end_word, find_shortest_path.time_taken)
else:
print 'The shortest path (found in %.2f sec.) is:\n=> %s' % (
self.shortest_path.time_taken, ' -- '.join(path))
def _bfs(self, start):
'''Implementation of breadth first search as a generator.
The portion of the graph to explore is given on demand using get_neighboors.
Care was taken so that a vertex / node is explored only once.
'''
queue = collections.deque([(None, start)])
inqueue = set([start])
while queue:
parent, node = queue.popleft()
yield parent, node
new = set(self.get_neighbours(node)) - inqueue
inqueue = inqueue | new
queue.extend([(node, child) for child in new])
@time_func
def shortest_path(self, start, end):
'''Returns the shortest path from start to end using bfs.
'''
assert start in self.words, 'Start word not in dictionnary.'
assert end in self.words, 'End word not in dictionnary.'
paths = {None: []}
for parent, child in self._bfs(start):
paths[child] = paths[parent] + [child]
if child == end:
return paths[child]
return None
def get_neighbours(self, word):
'''Gets every word one letter away from the a given word.
We do not keep these words in memory because bfs accesses
a given vertex only once.
'''
neighbours = []
p_word = ['^' + word[0:i] + '\w' + word[i+1:] + '$'
for i, w in enumerate(word)]
p_word = '|'.join(p_word)
for w in self.words:
if w != word and re.match(p_word, w, re.I|re.U):
neighbours += [w]
return neighbours
def read_dict(self, size):
'''Loads every word of a specific size from the dictionnary into memory.
'''
for l in open(self.dict_path):
l = l.decode('latin-1').strip().lower()
if len(l) == size:
self.words.add(l)
if __name__ == '__main__':
import sys
if len(sys.argv) not in [3, 4]:
print 'Usage: python one_letter_game.py start_word end_word'
else:
g = OneLetterGame(dict_path = '/usr/share/dict/words')
try:
g.run(*sys.argv[1:])
except AssertionError, e:
print e
Run Code Online (Sandbox Code Playgroud)
谢谢你所有的好答案.我认为真正让我得到的事实是我每次都会在字典中迭代所有单词以考虑可能的单词邻居.相反,我可以使用Duncan和Matt Anderson指出的倒排索引.A*方法肯定也有帮助.非常感谢,现在我知道我做错了什么.
这是倒排索引的相同代码:
import collections
import functools
import re
def time_func(func):
import time
def wrapper(*args, **kwargs):
start = time.time()
res = func(*args, **kwargs)
timed = time.time() - start
setattr(wrapper, 'time_taken', timed)
return res
functools.update_wrapper(wrapper, func)
return wrapper
class OneLetterGame:
def __init__(self, dict_path):
self.dict_path = dict_path
self.words = {}
def run(self, start_word, end_word):
'''Runs the one letter game with the given start and end words.
'''
assert len(start_word) == len(end_word), \
'Start word and end word must of the same length.'
self.read_dict(len(start_word))
path = self.shortest_path(start_word, end_word)
if not path:
print 'There is no path between %s and %s (took %.2f sec.)' % (
start_word, end_word, self.shortest_path.time_taken)
else:
print 'The shortest path (found in %.2f sec.) is:\n=> %s' % (
self.shortest_path.time_taken, ' -- '.join(path))
def _bfs(self, start):
'''Implementation of breadth first search as a generator.
The portion of the graph to explore is given on demand using get_neighboors.
Care was taken so that a vertex / node is explored only once.
'''
queue = collections.deque([(None, start)])
inqueue = set([start])
while queue:
parent, node = queue.popleft()
yield parent, node
new = set(self.get_neighbours(node)) - inqueue
inqueue = inqueue | new
queue.extend([(node, child) for child in new])
@time_func
def shortest_path(self, start, end):
'''Returns the shortest path from start to end using bfs.
'''
assert self.in_dictionnary(start), 'Start word not in dictionnary.'
assert self.in_dictionnary(end), 'End word not in dictionnary.'
paths = {None: []}
for parent, child in self._bfs(start):
paths[child] = paths[parent] + [child]
if child == end:
return paths[child]
return None
def in_dictionnary(self, word):
for s in self.get_steps(word):
if s in self.words:
return True
return False
def get_neighbours(self, word):
'''Gets every word one letter away from the a given word.
'''
for step in self.get_steps(word):
for neighbour in self.words[step]:
yield neighbour
def get_steps(self, word):
return (word[0:i] + '*' + word[i+1:]
for i, w in enumerate(word))
def read_dict(self, size):
'''Loads every word of a specific size from the dictionnary into an inverted index.
'''
for w in open(self.dict_path):
w = w.decode('latin-1').strip().lower()
if len(w) != size:
continue
for step in self.get_steps(w):
if step not in self.words:
self.words[step] = []
self.words[step].append(w)
if __name__ == '__main__':
import sys
if len(sys.argv) not in [3, 4]:
print 'Usage: python one_letter_game.py start_word end_word'
else:
g = OneLetterGame(dict_path = '/usr/share/dict/words')
try:
g.run(*sys.argv[1:])
except AssertionError, e:
print e
Run Code Online (Sandbox Code Playgroud)
和时间比较:
%python one_letter_game_old.py开心你好最短路径(发现时间为91.57秒)是:
=>快乐 - 竖琴 - 竖琴 - 哈特 - 停止 - 大厅 - 地狱 - 你好%python one_letter_game.py happy hello最短的路径(在1.71秒内找到)是:
=>快乐 - 竖琴 - 竖琴 - 哈特 - 停止 - 大厅 - 地狱 - 你好
Mat*_*son 10
我不会说你的解决方案是错的,但它有点慢.有两个原因.
广度优先搜索将访问所有长度比所需路径短的路径,以及所需的长度路径,然后才能给出答案.理想情况下,最佳优先搜索(A*)将跳过大多数不相关的路径.
每次寻找邻居时,您都会检查字典中的每个单词是否为候选人.正如邓肯建议的那样,你可以建立一个数据结构来基本上查找邻居而不是搜索它们.
这是您的问题的另一种解决方案:
import collections
import heapq
import time
def distance(start, end):
steps = 0
for pos in range(len(start)):
if start[pos] != end[pos]:
steps += 1
return steps
class SearchHeap(object):
def __init__(self):
self.on_heap = set()
self.heap = []
def push(self, distance, word, path):
if word in self.on_heap:
return
self.on_heap.add(word)
heapq.heappush(self.heap, ((distance, len(path)), word, path))
def __len__(self):
return len(self.heap)
def pop(self):
return heapq.heappop(self.heap)
class OneLetterGame(object):
_word_data = None
def __init__(self, dict_path):
self.dict_path = dict_path
def run(self, start_word, end_word):
start_time = time.time()
self._word_data = collections.defaultdict(list)
if len(start_word) != len(end_word):
print 'words of different length; no path'
return
found_start, found_end = self._load_words(start_word, end_word)
if not found_start:
print 'start word %r not found in dictionary' % start_word
return
if not found_end:
print 'end word %r not found in dictionary' % end_word
return
search_start_time = time.time()
path = self._shortest_path(start_word, end_word)
search_time = time.time() - search_start_time
print 'search time was %.4f seconds' % search_time
if path:
print path
else:
print 'no path found from %r to %r' % (start_word, end_word)
run_time = time.time() - start_time
print 'total run time was %.4f seconds' % run_time
def _load_words(self, start_word, end_word):
found_start, found_end = False, False
length = len(start_word)
with open(self.dict_path) as words:
for word in words:
word = word.strip()
if len(word) == length:
if start_word == word: found_start = True
if end_word == word: found_end = True
for bucket in self._buckets_for(word):
self._word_data[bucket].append(word)
return found_start, found_end
def _shortest_path(self, start_word, end_word):
heap = SearchHeap()
heap.push(distance(start_word, end_word), start_word, (start_word,))
while len(heap):
dist, word, path = heap.pop()
if word == end_word:
return path
for neighbor in self._neighbors_of(word):
heap.push(
distance(neighbor, end_word),
neighbor,
path + (neighbor,))
return ()
def _buckets_for(self, word):
buckets = []
for pos in range(len(word)):
front, back = word[:pos], word[pos+1:]
buckets.append(front+'*'+back)
return buckets
def _neighbors_of(self, word):
for bucket in self._buckets_for(word):
for word in self._word_data[bucket]:
yield word
if __name__ == '__main__':
import sys
if len(sys.argv) not in [3, 4]:
print 'Usage: python one_letter_game.py start_word end_word'
else:
matt = OneLetterGame(dict_path = '/usr/share/dict/words')
matt.run(*sys.argv[1:])
Run Code Online (Sandbox Code Playgroud)
并且,时间比较:
% python /tmp/one_letter_alex.py canoe happy
The shortest path (found in 51.98 sec.) is:
=> canoe -- canon -- caxon -- taxon -- taxor -- taxer -- taper -- paper -- papey -- pappy -- happy
% python /tmp/one_letter_matt.py canoe happy
search time was 0.0020 seconds
('canoe', 'canon', 'caxon', 'taxon', 'taxor', 'taxer', 'taper', 'paper', 'papey', 'pappy', 'happy')
total run time was 0.2416 seconds
Run Code Online (Sandbox Code Playgroud)