python成语接龙代码_#python#成语接龙(⼀)本来想写⼀个机器⼈成语接龙的,太复杂了,弄⼀个可以找到所有的可以“⼀招制敌”的成语,就是没办法再被接龙的成语。
第⼀步是找⼀个带拼⾳的成语字库,⽹上找了半天,没找到合适的,发现搜狗输⼊法⾃带成语词库,就拿来解析了。
搜狗的字库是scel格式,内容包括拼⾳表和字库两部分,它们的结构如下所⽰:
解析代码如下 [par_scel.py]:为了避开烦⼈的字符编码问题,⽤的是python 3
增加了print,⽅便跟踪结果# encoding=utf8
# python 3
import sysimport structPIN_YIN_TAG_LEN = 4
西安周边游def get_hanzi_offt(buff):
mask = buff[4]
return 0x2628 if mask == 0x44 el 0x26c4 if mask == 0x45 el None
def get_pinyin_offt(buff):
offt = 0x1540
朱志恒return offt if buff[offt:offt + PIN_YIN_TAG_LEN] == b"\x9d\x01\x00\x00" el None
def get_piyin_table(buff, py_offt, hz_offt):
py_table = {}
data = buff[py_offt + PIN_YIN_TAG_LEN:hz_offt] pos = 0
while pos < len(data):
# 序号
index = struct.unpack('H', data[pos:pos + 2])[0]
pos += 2
# 拼⾳长度
l = struct.unpack('H', data[pos:pos + 2])[0]
pos += 2
# 拼⾳
py = data[pos:pos + l]
pos += l
py_table[index] = py.decode('UTF-16LE')
print(index, l, py.decode('UTF-16LE'))
return py_table
def get_hanzi_and_pinyin(buff, hz_offt, py_table): def _get_py(data, py_table):头皮屑多用什么洗发水
length = len(data)
pos = 0
py = u''
while pos < length:
index = struct.unpack('H', data[pos:pos + 2])[0]
九进制
py += u'.' + py_table[index]
pos += 2
行政总结
return py[1:] hz_all = {}
pos = 0
hz_table = buff[hz_offt:]
while pos < len(hz_table):
# 同⾳词数量
same = struct.unpack('H', hz_table[pos:pos + 2])[0]
pos += 2
# 拼⾳索引表长度
py_table_len = struct.unpack('H', hz_table[pos:pos + 2])[0] pos += 2
# 拼⾳索引表
data = hz_table[pos:pos + py_table_len]
py = _get_py(data, py_table)
pos += py_table_len print(same, py_table_len, py)
# 中⽂词组
for i in range(same):
# 中⽂词组长度
hz_len = struct.unpack('H', hz_table[pos:pos + 2])[0]显白的美甲
pos += 2
# 中⽂词组
word = hz_table[pos:pos + hz_len]
pos += hz_len
# 扩展数据长度
ext_len = struct.unpack('H', hz_table[pos:pos + 2])[0]
pos += 2
# 序号
count = struct.unpack('H', hz_table[pos:pos + 2])[0]
pos += ext_len
hz_all[count] = (word.decode('UTF-16LE'), py)
print(hz_len, word.decode('UTF-16LE'), ext_len, count)
谭晨return hz_all
def scel_to_txt(scel, out):
with open(scel, "rb") as src, open(out, "w") as dst:
buff = ad()
hz_offt = get_hanzi_offt(buff)
py_offt = get_pinyin_offt(buff)
if not hz_offt or not py_offt:
print("scel format changed, not support now \
[hz_offt = {} py_offt = {}]!".format(hz_offt, py_offt)) it(1) py_table = get_piyin_table(buff, py_offt, hz_offt) hz_py_all = get_hanzi_and_pinyin(buff, hz_offt, py_table)
for x in hz_py_all:
dst.write(hz_py_all[x][0] + ' ' + hz_py_all[x][1] + '\n')
pass盆栽海棠花
if __name__ == "__main__":
scel_to_txt(u"成语⼤全.scel", u"成语⼤全.txt")
pass
⽣成的结果截图如下: