python - n-grams with Naive Bayes classifier Error -
i experimenting python nltk text classification. here code example practicing: http://www.laurentluce.com/posts/twitter-sentiment-analysis-using-python-and-nltk/
here code:
from nltk import bigrams nltk.probability import eleprobdist, freqdist nltk import naivebayesclassifier collections import defaultdict train_samples = {} file ('data/positive.txt', 'rt') f: line in f.readlines(): train_samples[line] = 'pos' file ('data/negative.txt', 'rt') d: line in d.readlines(): train_samples[line] = 'neg' f = open("data/test.txt", "r") test_samples = f.readlines() # error in code # def bigramreturner(text): # tweetstring = text.lower() # bigramfeaturevector = {} # item in bigrams(tweetstring.split()): # bigramfeaturevector.append(' '.join(item)) # return bigramfeaturevector # updated code stack overflow comment def bigramreturner (tweetstring): tweetstring = tweetstring.lower() #comment line since function not defined #tweetstring = removepunctuation (tweetstring) bigramfeaturevector = [] item in nltk.unigrams(tweetstring.split()): bigramfeaturevector.append(' '.join(item)) return bigramfeaturevector def get_labeled_features(samples): word_freqs = {} text, label in train_samples.items(): tokens = text.split() token in tokens: if token not in word_freqs: word_freqs[token] = {'pos': 0, 'neg': 0} word_freqs[token][label] += 1 return word_freqs def get_label_probdist(labeled_features): label_fd = freqdist() item, counts in labeled_features.items(): label in ['neg', 'pos']: if counts[label] > 0: label_fd.inc(label) label_probdist = eleprobdist(label_fd) return label_probdist def get_feature_probdist(labeled_features): feature_freqdist = defaultdict(freqdist) feature_values = defaultdict(set) num_samples = len(train_samples) / 2 token, counts in labeled_features.items(): label in ['neg', 'pos']: feature_freqdist[label, token].inc(true, count=counts[label]) feature_freqdist[label, token].inc(none, num_samples - counts[label]) feature_values[token].add(none) feature_values[token].add(true) item in feature_freqdist.items(): print item[0], item[1] feature_probdist = {} ((label, fname), freqdist) in feature_freqdist.items(): probdist = eleprobdist(freqdist, bins=len(feature_values[fname])) feature_probdist[label, fname] = probdist return feature_probdist labeled_features = get_labeled_features(train_samples) label_probdist = get_label_probdist(labeled_features) feature_probdist = get_feature_probdist(labeled_features) classifier = naivebayesclassifier(label_probdist, feature_probdist) sample in test_samples: print "%s | %s" % (sample, classifier.classify(bigramreturner(sample)))
but when run code following error:
traceback (most recent call last): file "naive_bigram_1.py", line 87, in <module> print "%s | %s" % (sample, classifier.classify(bigramreturner(sample))) file "naive_bigram_1.py", line 30, in bigramreturner tweetstring = removepunctuation (tweetstring) nameerror: global name 'removepunctuation' not defined
i saw similar question other error, here updated n-grams naive bayes classifier
you're calling function removepunctuation
hasn't been defined previously:
def bigramreturner (tweetstring): tweetstring = tweetstring.lower() tweetstring = removepunctuation (tweetstring) ....
i noticed put spaces between functions' names , parameters list. avoid it's not idiomatic python , cause problems (like function being evaluated object instead of being called).
Comments
Post a Comment