Python: File Stemming dengan Sastrawi
Revision as of 11:17, 30 October 2018 by Onnowpurbo (talk | contribs) (Created page with " import sys, getopt import argparse import os,nltk,os.path,re,string import argparse import Sastrawi from nltk.stem.porter import PorterStemmer from Sastrawi.Stemmer.S...")
import sys, getopt
import argparse
import os,nltk,os.path,re,string
import argparse
import Sastrawi
from nltk.stem.porter import PorterStemmer
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--infile', default=, help='input filename')
parser.add_argument('-o', '--outfile', default=, help='output filename')
return parser.parse_args()
def hanya_huruf( input ):
r=re.match('^[a-zA-Z]+$', input)
if r==None:
return False
else:
return True
def main():
args = parse_args()
outfile = args.outfile
infile = args.infile
f = open(infile,"r")
fcontent = f.read()
lines = fcontent.split()
f.close()
factory = StemmerFactory()
stemmer = factory.create_stemmer()
f = open(outfile,"w")
for word in lines:
if hanya_huruf(word) and len(word)<20 and len(word)>1 and word!='Iing' :
word = word.strip(string.punctuation).lower()
word = stemmer.stem(word)
if word not in nltk.corpus.stopwords.words('english'):
if word not in nltk.corpus.stopwords.words('indonesian'):
f.write(word)
f.write(" ")
else:
pass
f.close()
main()