rmTag.sh 169 B

12
  1. #把news_tensite_xml.dat转换为utf8,然后取content内容存储到corpus.txt
  2. cat news_tensite_xml.dat | iconv -f gbk -t utf-8 -c | grep "<content>" > corpus.txt