|
@@ -9,7 +9,7 @@ end_token = 'E'
|
|
|
|
|
|
def process_poem(file_name):
|
|
|
# 诗集
|
|
|
- poem = []
|
|
|
+ poems = []
|
|
|
with open(file_name, "r", encoding='utf-8', ) as f:
|
|
|
for line in f.readlines():
|
|
|
try:
|
|
@@ -21,13 +21,13 @@ def process_poem(file_name):
|
|
|
if len(content) < 5 or len(content) > 79:
|
|
|
continue
|
|
|
content = start_token + content + end_token
|
|
|
- poem.append(content)
|
|
|
+ poems.append(content)
|
|
|
except ValueError as e:
|
|
|
pass
|
|
|
- poem = sorted(poem, key=lambda l: len(line))
|
|
|
+ poems = sorted(poems, key=lambda l: len(line))
|
|
|
|
|
|
all_words = []
|
|
|
- for poem in poem:
|
|
|
+ for poem in poems:
|
|
|
all_words += [word for word in poem]
|
|
|
counter = collections.Counter(all_words)
|
|
|
count_pairs = sorted(counter.items(), key=lambda x: -x[1])
|