a error while executing this Decision tree algorithm code NameError name rows is not defined

0 votes
training_data = [

    ["Green",3, 'Mango'],

    ['Yellow',3,'Mango'],

    ['Red',1, 'Grape'],

    ['Red',1, 'Grape'],

    ['Yellow',3,'Lemon']

]

header = ['color','diameter', 'label']

def unique_vals(rows, col):

    """ Find the unique values for a column in a dataset"""

    return set([row[col] for row in rows])

def class_counts(rows):

    """ Counts the number of each type of example in a dataset"""

    count = {}

    for row in rows:

        label = row[-1]

        if label not in counts:

            counts[label] = 0

        counts[label] += 1

    return counts

def is_numeric(value):

    """ test if a value is numeric or not"""

    return isinstance(value, int) or isinstance(value, float)

class Question: #a question is used to paratition a dataset

    def __init__(self , column, value):

        self.column = column

        self.value  = value

    

    def match(self, example):

        # compare the feature value in the example to the feature value in the question

        val = example[self.column]

        if is_numeric(val):

            return value >= self.value

        else:

            return val == self.value

    def __repr__(self):

        # this is just the helper method to print the question in a readable format

        condition = "=="

        if is_numeric(self.value):

            condition =">="

        return " Is %s %s %s?" % (

            header[self.column], condition, str(self.value))

def paratition(rows, question):

    """ Partitions in a dataset"""

    true_rows, false_rows = [], []

    for row in rows:

        if question.match(row):

            true_rows.append(row)

        else:

            false_rows.append(row)

    return true_rows, false_rows

def gini(rows):

    counts = class_counts(rows)

    impurity = 1

    for lbl in counts:

        prob_of_lbl = counts[lbl] / float(len(rows))

        impurity -= prob_of_lbl**2

    return impurity

def info_gain(left, right, current_uncertainity):

    p = float(len(left))/ (len(left)+ len(right))

    return current_uncertainity - p * gini(left) - (1-p) * gini(right)

def find_the_split(best):

    """ find the best ques to ask by inerating over every feature value and caclulating the information gain"""

    best_gain = 0 # keep track of the best information gain

    best_question = None

    current_uncertainity = gini(rows)

    n_features = len(rows[0])- 1 # no of columns

    for col in range(n_features):

        values = set([row[col] for row in rows])

        for val in values: # for each value

            question = Question(col, val)

            # try spliting the dataset

            true_rows, false_rows = paratition(rows ,question)

            if len(true_rows)== 0 or len(false_rows) == 0:

                continue

            #calculate the IG from the split

            gain = info_gain(true_rows, false_rows, current_uncertainity)

            if gain >= best_gain:

                best, best_question = gain, question

    return best_gain, best_question

class Leaf:

    def __init__(self, rows):

        self.predictions = class_counts(rows)

class Decision_Node:

    def __init__(self, question, true_branch, false_branch):

        self.question = question

        self.true_branch = true_branch

        self.false_branch = false_branch

def build_tree(rows):

    """build the tree"""

    gain, question = find_the_split(rows)

    if gain == 0:

        return Leaf(rows)

    true_rows, false_rows = paratition(rows, question)

    true_branch = build_tree(true_rows)

    false_branch = build_tree(false_rows)

def print_tree(node, spacing=""):

    """World's most elegant tree printing function"""

    if isinstance(node, Leaf):

        print(spacing+ "Predict" , node.paratitions)

        return

    # PRINT THE QUESTION AT THIS NODE

    print(spacing + str(node.question))\

    

    print(spacing +'--> True:')

    print_tree(node.true_branch, spacing + "  ")

    print(spacing +'--> False:')

    print_tree(node.false_branch, spacing + "  ")

def classify(row, node):

    if isinstance(node, Leaf):

        return node.predictions

    if node.question.match(row):

        return classify(row, node.true_branch)

    else:

        return classify(row, node.false_branch)

def print_leaf(counts):

    """ print the predictions at leaf"""

    total = sum(counts.values())* 1.0

    probs = {}

    for lbl in counts.keys():

        probs[lbl]= str(int(counts[lbl]/ total * 100)) + "%s"

    return probs

if __name__ == "__main__":

    my_tree = build_tree(training_data)

    print_tree(my_tree)

    #Evaluate

    testing_data = [

    ["Green",3, 'Mango'],

    ['Yellow',3,'Mango'],

    ['Red',1, 'Grape'],

    ['Red',1, 'Grape'],

    ['Yellow',3,'Lemon']

]

for row in testing_data:

    print("Actual: %s, Predicted: %s"%

    (row[-1], print_leaf(classify(row, my_tree))))
Feb 13, 2021 in Python by Piyush

edited Mar 4 31 views

No answer to this question. Be the first to respond.

Your answer

Your name to display (optional):
Privacy: Your email address will only be used for sending these notifications.
webinar REGISTER FOR FREE WEBINAR X
REGISTER NOW
webinar_success Thank you for registering Join Edureka Meetup community for 100+ Free Webinars each month JOIN MEETUP GROUP