I m trying to run this code but I got this error ZeroDivisionError float division by zero in this q values action2 summ len delays history action2

number_of_actions = 5

max_value = 2
Prev_max_value = 0
costs = [0.0 for i in range(number_of_actions)]
rewards = [0.0 for i in range(number_of_actions)]
action_counts = [0 for i in range(number_of_actions)]
q_values = [0.0 for i in range(number_of_actions)]
max_val_updated = False
delays_history = [[] for i in range(number_of_actions)]

for i in range(10):

    if i < number_of_actions:
        action = i

    else:
        action = q_values.index(max(q_values))

    # for action in range(number_of_actions):
    action_counts[action] = action_counts[action] + 1
    delay = random.randint(0, 20)
    costs[action] = delay / max_value
    rewards[action] = rewards[action] + (1 - costs[action])
    q_values[action] = rewards[action] / float(action_counts[action])
    delays_history[action].append(delay)
    print(f"First Q_values: {q_values}")

    if delay > max_value:
        Prev_max_value = max_value
        max_value = delay
        max_val_updated = True

        # delay = random.randint(0,20)
        costs = [0.0 for i in range(number_of_actions)]
        for action2 in range(number_of_actions):
            summ = 0.0
            for d in delays_history[action2]:
                cost = d / max_value
                summ += 1 - cost
                costs[action2] = costs[action2] + cost
            q_values[action2] = summ / len(delays_history[action2])
            rewards[action2] = summ
        second_qvalues = q_values.copy()
        print(f"Second Q_values: {q_values}")
        for action2 in range(number_of_actions):
            N = len(delays_history[action2])
            q_values[action2] = (N - ((N - (q_values[action2] * N)) * Prev_max_value) / float(max_value)) / float(N)
        third_qvalues = q_values.copy()
        print(f"Third Q_values: {q_values}")
        print()

No answer to this question. Be the first to respond.