Divide the English sentence into words
wordsegmentation.py
# -*- coding: utf-8 -*-
from wordsegmentation import WordSegment
ws = WordSegment()
print ws.segment('universityofwashington')
# ['university', 'of', 'washington']
print ws.segment('thisisatest')
# ['this', 'is', 'a', 'test']
print ws.segment('thisisanURLcontaining123345and-&**^&butitstillworks')
# ['this', 'is', 'an', 'url', 'containing', '123345', 'and', '-&**^&', 'but', 'it', 'still', 'works']
print ws.segment('NoMatterHowLongThisTextisTextThisTextMightBe')
# ['no', 'matter', 'how', 'long', 'this', 'text', 'is', 'text', 'this', 'text', 'might', 'be']
Recommended Posts