hangman游戏

2023-08-27 01:54| 来源: 网络整理| 查看: 265

挑战是代码刽子手的算法来玩这个游戏。你的算法应该比baseline算法,我们提供给你和理想有超过50%的准确率。用这个电子邮件,我们已经附加训练字典文件以及Jupyter笔记本模板显示了如何代码,执行和提交你的算法

游戏的规则，给定单词长度，让你猜单词，猜对的字母可以保留，猜错的字母将用掉你一条命，每个单词你有六条命。

模型背景，已知游戏规则，还有一个单词字典作为训练集。

这个游戏有两种思路，一个是比较吃GPU硬件设施，也就是用RNN的方法来训练字典内的信息做nlp预测，另外一个基于游戏规则从训练的字典中获得一些统计信息，基于这些先验信息来预测字典外的单词。由于硬件问题，我用的是后者，采取的是，基于单词长度以及猜对单词后，字母对应的位置，未猜对的字母对应位置等等作为我的特征进行训练。

// An highlighted block #my code def update_counter(lm, data, order): for i in range(len(data)-order): history, char = data[i:i+order], data[i+order] if history.find(' ') == -1 and char != ' ': lm[history][char]+=1 return lm def train_char_lm(dic, order=3): lm = defaultdict(Counter) data = dic # calculate the probability of letter appearnce in a specified gram (length decided by order) for i in range(len(data)-order): gram = data[i:i+order] histories = [] chars = [] if gram.find(' ') == -1 and gram.find('_') == -1: replace_space = order - 1 while(replace_space>0): gram_list = list(gram) comb = combinations(range(len(gram)), replace_space) for i in list(comb): keeped_letter = [x if idx not in list(i) else '_' for idx, x in enumerate(gram_list)] replaced_letter = [x for idx, x in enumerate(gram_list) if idx in list(i)] histories += ["".join(keeped_letter)] chars += [Counter("".join(replaced_letter)).most_common()[0][0]] replace_space -= 1 for history, char in zip(histories, chars): if history.find(' ') == -1 and char != ' ': lm[history][char]+=1 def normalize(counter): s = float(sum(counter.values())) return [(c,cnt/s) for c,cnt in counter.items()] outlm = {hist:normalize(chars) for hist, chars in lm.items()} return outlm def check_guessed(sorted_letter_count, guessed_letters): flag = False for letter, instance_prob in sorted_letter_count: if letter not in guessed_letters and letter is not '_': flag = True break if flag: return (letter, instance_prob) else: return def update_candidate(candidate_list, curr_guess): if curr_guess is not None: if curr_guess[0] in candidate_list: candidate_list[curr_guess[0]] += curr_guess[1] else: candidate_list[curr_guess[0]] = curr_guess[1] return candidate_list def generate_letter(full_dict, guess, lm, d, guessed_letters, order = 3): candidate_list = {} print (guess) ## the first time guess is based on probablity of all words with the length of target word if len(set(guess))== 1 and guess[0] == '_': curr_guess = check_guessed(collections.Counter("".join(d[len(guess)])).most_common(), guessed_letters) candidate_list = update_candidate(candidate_list, curr_guess) ## get the gram of current word and get the probablity of letters from language model for i in range(len(guess)): stem = guess[i:i+order] if stem in lm: curr_guess = check_guessed(sorted(lm[stem], key=lambda item:item[1], reverse=True), guessed_letters) candidate_list = update_candidate(candidate_list, curr_guess) ## if the gram is not in the language model, then use the default probability of all words with the length of target word if (len(candidate_list) == 0): curr_guess = check_guessed(collections.Counter("".join(d[len(guess)])).most_common(), guessed_letters) candidate_list = update_candidate(candidate_list, curr_guess) letter = max(candidate_list, key=lambda k: candidate_list[k]) return letter def play(answer, lm, d, order, nTrials=6): guess = "_ " * int(len(answer)/2) guess_clean = guess[::2].replace(" ", "") full_dict = full_dictionary guessed_letters = [] errors = 0 count = 0 flag = False while(errors < nTrials): c = generate_letter(full_dict, guess_clean, lm, d, guessed_letters, order) guessed_letters += [c] if answer.find(c)!=-1: idx = [pos for pos, char in enumerate(answer) if char == c] for j in idx: guess = '%s%s%s'%(guess[:j],c,guess[j+1:]) else: errors += 1 print ("-------------") print (count, errors, c, ': ', guess_clean) print ("-------------") guess_clean = guess[::2] if guess_clean.find('_') == -1: flag = True break count += 1 return guess, flag def build_dictionary(dictionary_file_location): text_file = open(dictionary_file_location,"r") full_dictionary = text_file.read().splitlines() text_file.close() return full_dictionary full_dictionary_location = "words_250000_train.txt" full_dictionary = build_dictionary(full_dictionary_location) #95% train and 5% test full_dic, answers = train_test_split(full_dictionary, test_size = 0.05) lm = train_char_lm(" ".join(full_dic), 5) d=defaultdict(list) for word in full_dic: d[len(word)].append(word) N = len(answers) success = 0 for answer in answers: answer = " ".join(answer) + " " res, flag = play(answer, lm, d, order = 5) if flag: success += 1 print ("success!, the answer is " + res) else: print("failed!, the answer is " + answer + " the guess is " + res) acc = success/(N*1.0)*100 print ("success rate is %0.2f%%"%acc) # here ahout is 51% 通过这个方法，大概可以提高到50%左右，正确率，在6次live的情况下，以上是我在做trex机试的时候上机题，有空再写详细点吧

【本文地址】

hangman游戏

hangman游戏

今日新闻

推荐新闻