python - n-grams with Naive Bayes classifier Error -

i experimenting python nltk text classification. here code example practicing:

here code:

from nltk import bigrams nltk.probability import eleprobdist, freqdist nltk import naivebayesclassifier collections import defaultdict  train_samples = {}  file ('data/positive.txt', 'rt') f:    line in f.readlines():        train_samples[line] = 'pos'  file ('data/negative.txt', 'rt') d:    line in d.readlines():        train_samples[line] = 'neg'  f = open("data/test.txt", "r") test_samples = f.readlines()  # error in code # def bigramreturner(text): #    tweetstring = text.lower() #    bigramfeaturevector = {} #    item in bigrams(tweetstring.split()): #        bigramfeaturevector.append(' '.join(item)) #    return bigramfeaturevector  # updated code stack overflow comment  def bigramreturner (tweetstring):     tweetstring = tweetstring.lower()     #comment line since function not defined     #tweetstring = removepunctuation (tweetstring)     bigramfeaturevector = []     item in nltk.unigrams(tweetstring.split()):         bigramfeaturevector.append(' '.join(item))     return bigramfeaturevector  def get_labeled_features(samples):     word_freqs = {}     text, label in train_samples.items():         tokens = text.split()         token in tokens:             if token not in word_freqs:                 word_freqs[token] = {'pos': 0, 'neg': 0}             word_freqs[token][label] += 1     return word_freqs   def get_label_probdist(labeled_features):     label_fd = freqdist()     item, counts in labeled_features.items():         label in ['neg', 'pos']:             if counts[label] > 0:            label_probdist = eleprobdist(label_fd)     return label_probdist   def get_feature_probdist(labeled_features):     feature_freqdist = defaultdict(freqdist)     feature_values = defaultdict(set)     num_samples = len(train_samples) / 2     token, counts in labeled_features.items():         label in ['neg', 'pos']:             feature_freqdist[label, token].inc(true, count=counts[label])             feature_freqdist[label, token].inc(none, num_samples - counts[label])             feature_values[token].add(none)             feature_values[token].add(true)     item in feature_freqdist.items():         print item[0], item[1]     feature_probdist = {}     ((label, fname), freqdist) in feature_freqdist.items():         probdist = eleprobdist(freqdist, bins=len(feature_values[fname]))         feature_probdist[label, fname] = probdist     return feature_probdist    labeled_features = get_labeled_features(train_samples)  label_probdist = get_label_probdist(labeled_features)  feature_probdist = get_feature_probdist(labeled_features)  classifier = naivebayesclassifier(label_probdist, feature_probdist)   sample in test_samples:     print "%s | %s" % (sample, classifier.classify(bigramreturner(sample))) 

but when run code following error:

traceback (most recent call last):   file "", line 87, in <module>     print "%s | %s" % (sample, classifier.classify(bigramreturner(sample)))   file "", line 30, in bigramreturner     tweetstring = removepunctuation (tweetstring) nameerror: global name 'removepunctuation' not defined 

i saw similar question other error, here updated n-grams naive bayes classifier

you're calling function removepunctuation hasn't been defined previously:

def bigramreturner (tweetstring):     tweetstring = tweetstring.lower()     tweetstring = removepunctuation (tweetstring)     .... 

i noticed put spaces between functions' names , parameters list. avoid it's not idiomatic python , cause problems (like function being evaluated object instead of being called).


Popular posts from this blog

java.util.scanner - How to read and add only numbers to array from a text file -

php - Add the correct number of days for each month -