#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@Contact :   liuyuqi.gov@msn.cn
@Time    :   2023/03/08 23:05:51
@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
@Desc    :   b.csv数据预处理,获取前500个热门英语单词,长度小于5
'''

import pandas as pd

if __name__=='__main__':
    with open("data/b.csv", "r", encoding="utf-8") as file:
        res=file.readlines()
        res=[x.strip().lower() for x in res]
        res = pd.Series(res).drop_duplicates()
        data = pd.DataFrame(res, columns=["name"])
        data['strlen'] = data['name'].str.len()
        # data.sort_values(by='strlen', inplace=True)
        data=data[data['strlen'] < 5 ]
        data["name"][:500].to_csv("res3.csv", index=False, header=None)