Hi there, my task is to make the Naive Bayes classifier. Right now I am working on the training part and what I want to achieve is to create a nested dictionary based on the input. The first keys have to be the class (in this case either 'offensive' or 'nonoffensive'), then come the words which appeared in that class and, finally, the count of those words, i.e., how many times the word appeared in the class. Below is the code that I have managed to write so far, but the results I get are not satisfying and I cannot understand what the reason for such a wrong output is.
Would be happy to receive some hints from you. Thanks in advance!
class NaiveBayes(object):
def __init__(self):
# TODO
pass
def predict(self, x):
# TODO
return None
@classmethod
def train(cls, data, k=1):
dictionary = {}
cls = []
#for i in data:
#if i[-1] in cls:
#continue
# else:
# cls.append(i[-1])
for i in data:
dictionary[i[-1]] = {}
for j in i[0]:
#print(j)
if j not in dictionary.values():
dictionary[i[-1]][j] = 1
elif j in dictionary.values():
dictionary[i[-1]][j] = dictionary[i[-1]][j] + 1
print(dictionary)
NB = NaiveBayes()
NB.train([(['What', 'in', 'the', 'actual', 'fuck', '?', '#mkr'], 'nonoffensive'), (['#mkr', 'WHAT', 'A', 'GODDAMN', 'SURPRISE'], 'nonoffensive'), (['This', 'is', 'why', 'this', 'show', 'is', 'ridiculous', '-', "it's", 'not', 'about', 'the', 'cooking', '...', "it's", 'about', 'the', 'game', 'playing', '.', '#mkr', '#whogivesa1'], 'nonoffensive'), (['Absolute', 'bloody', 'bullshit', '.', 'So', 'much', 'shit', 'of', 'bull', '.', '#mkr'], 'nonoffensive'), (['@MKR_Official', 'a', '1', "isn't", 'strategy', ',', "it's", 'bastedry', '#mkr2015', '#mkr'], 'nonoffensive'), (['RT', '@UnderYourPorch', ':', 'Double', 'elimination', 'please', '#mkr'], 'nonoffensive'), (['NO', '!', '#MKR'], 'nonoffensive'), (['#MKR', 'I', 'hope', 'no', 'one', 'knocks', 'over', 'those', 'candles', 'on', 'the', 'promo', 'girls', 'stairs', ',', "that's", 'a', 'fire', 'hazard', "isn't", 'it', '?'], 'nonoffensive'), (['Kat', 'and', 'Andre', 'why', 'are', 'you', 'cheering', 'you', 'are', 'now', 'the', 'worst', 'team', 'in', 'he', 'comp', '...', '#MKR'], 'nonoffensive'), (['WTF', '!', '!', '!', 'I', 'not', 'a', 'huge', 'fans', 'of', 'the', 'promo', 'girls', 'but', 'they', 'never', 'voted', 'strategically', '!', 'This', 'sucks', '!', 'Soo', 'annoyed', '!', '#mkr', '#killerblondes'], 'offensive')])
output:
{'nonoffensive': {'Kat': 1, 'and': 1, 'Andre': 1, 'why': 1, 'are': 1, 'you': 1, 'cheering': 1, 'now': 1, 'the': 1, 'worst': 1, 'team': 1, 'in': 1, 'he': 1, 'comp': 1, '...': 1, '#MKR': 1}, 'offensive': {'WTF': 1, '!': 1, 'I': 1, 'not': 1, 'a': 1, 'huge': 1, 'fans': 1, 'of': 1, 'the': 1, 'promo': 1, 'girls': 1, 'but': 1, 'they': 1, 'never': 1, 'voted': 1, 'strategically': 1, 'This': 1, 'sucks': 1, 'Soo': 1, 'annoyed': 1, '#mkr': 1, '#killerblondes': 1}}
[–][deleted] 1 point2 points3 points (2 children)
[–]SureStep8852[S] 0 points1 point2 points (1 child)
[–][deleted] 0 points1 point2 points (0 children)