Revision history [back]

I have written a code that takes a file list and trains the neural network:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each file
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read  image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Add noise to input image
    img_input_array = add_noise(img_input_array, 0.1)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    print(ann.predict(img_input_array))

I have written a code that takes a file list and trains the neural network:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each file
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read  image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Add noise to input image
    img_input_array = add_noise(img_input_array, 0.1)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    print(ann.predict(img_input_array))

I have written a code that takes a file list and trains the neural network:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        img_input_array #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each file
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read  image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Add noise to input image
    img_input_array #img_input_array = add_noise(img_input_array, 0.1)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    print(ann.predict(img_input_array))

I have written a code that takes a file list and trains the neural ~~network:~~network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each file
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read  image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Add noise to input image
    #img_input_array = add_noise(img_input_array, 0.1)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    print(ann.predict(img_input_array))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here is the file list format, where the filename comes first, followed by the class number:

dove.png 0
flowers.png 1
peacock.png 2
statue.png 3

Here is the code:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each file
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read  image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Add noise to input image
    #img_input_array = add_noise(img_input_array, 0.1)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    print(ann.predict(img_input_array))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here is the file list format, where the filename comes first, followed by the class number:

dove.png 0
flowers.png 1
peacock.png 2
statue.png 3

I recommend that you create such a file yourself, and then pseudorandomly shuffle the file list.

Here is the code:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each file
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read  image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Add noise to input image
    #img_input_array = add_noise(img_input_array, 0.1)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    print(ann.predict(img_input_array))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here are the files: image description

Here is the file list format, where the filename comes first, followed by the class number:

dove.png 0
flowers.png 1
peacock.png 2
statue.png 3

I recommend that you create such a file yourself, and then pseudorandomly shuffle the file list.

Here is the code:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each file
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read  image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Add noise to input image
    #img_input_array = add_noise(img_input_array, 0.1)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    print(ann.predict(img_input_array))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here are the files: image description

Here is the file list format, where the filename comes first, followed by the class number:

dove.png 0
flowers.png 1
peacock.png 2
statue.png 3

I recommend that you create such a file yourself, and then pseudorandomly shuffle the file list.

It would be way faster in C++.

Here is the code:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each file
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read  image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Add noise to input image
    #img_input_array = add_noise(img_input_array, 0.1)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    print(ann.predict(img_input_array))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here are the files: image description

Here is the file list format, where the filename comes first, followed by the class number:

dove.png 0
flowers.png 1
peacock.png 2
statue.png 3

I recommend that you create such a file yourself, and then pseudorandomly shuffle the file list.

~~It would be way faster in C++.~~Here is the code to generate the file list:

import os

file = open("meta/classes.txt", "r") 

classifications = []

for line in file:
    classification_string = line.split("\n")[0]
    classifications.append(classification_string)

rootDir = "Images/"
for dirName, subdirList, fileList in os.walk(rootDir):
    classification_string = dirName.split("/")[1]
    classification_string = classification_string.split("\n")[0]

    class_id = 0

    for i in range(0, len(classifications)):

        if(classifications[i] == classification_string):
            class_id = i
            break

    for fname in fileList:
        print("%s%s %s" % (dirName, fname, class_id))

Here is the main code:

import cv2
import numpy as np
import math
import random
 
def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each file
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read  image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Add noise to input image
    #img_input_array = add_noise(img_input_array, 0.1)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    print(ann.predict(img_input_array))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here are the files: image description

Here is the file list format, where the filename comes first, followed by the class number:

dove.png 0
flowers.png 1
peacock.png 2
statue.png 3

I recommend that you create such a file yourself, and then pseudorandomly shuffle the file list.

Here is the code to generate the file list:

# https://docs.python.org/3/tutorial/datastructures.html#dictionaries
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory


import os

file = open("meta/classes.txt", "r") 

classifications = []

for line in file:
    classification_string = line.split("\n")[0]
    classifications.append(classification_string)

rootDir = "Images/"
os.getcwd()
rootDir = rootDir + "\\Images\\"
for dirName, subdirList, fileList in os.walk(rootDir):

    s = dirName.split("\\");
    classification_string = dirName.split("/")[1]
s[len(s) - 1]
    classification_string = classification_string.split("\n")[0]

    class_id = 0

    for i in range(0, len(classifications)):
         if(classifications[i] == classification_string):
classification_string):            
            class_id = i
            break

    for fname in fileList:
        print("%s%s print("%s\\%s %s" % (dirName, fname, class_id))

Here is the main code:

import cv2
import numpy as np
import math
import random

def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each file
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read  image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Add noise to input image
    #img_input_array = add_noise(img_input_array, 0.1)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    print(ann.predict(img_input_array))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here are the files: image description

Here is the file list format, where the filename comes first, followed by the class number:

dove.png 0
flowers.png 1
peacock.png 2
statue.png 3

I recommend that you create such a file yourself, and then pseudorandomly shuffle the file list.

Here is the code to generate the file list:

# https://docs.python.org/3/tutorial/datastructures.html#dictionaries
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory


import os

file = open("meta/classes.txt", "r") 

classifications = []

for line in file:
    classification_string = line.split("\n")[0]
    classifications.append(classification_string)

rootDir = os.getcwd()
rootDir = rootDir + "\\Images\\"
for dirName, subdirList, fileList in os.walk(rootDir):

    s = dirName.split("\\");
    classification_string = s[len(s) - 1]
    classification_string = classification_string.split("\n")[0]

    class_id = 0

    for i in range(0, len(classifications)):
        if(classifications[i] == classification_string):            
            class_id = i
            break

    for fname in fileList:
        print("%s\\%s %s" % (dirName, fname, class_id))

Here is the main code:

import cv2
import numpy as np
import math
import random

 def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])
sample_img = cv2.resize(sample_img, (64, 64))

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        print(filenames[j])

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = cv2.resize(img_input_array, (64, 64))
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each file
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read  image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = cv2.resize(img_input_array, (64, 64)) 
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Add noise to input image
    #img_input_array = add_noise(img_input_array, 0.1)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    print(ann.predict(img_input_array))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here ~~are the files:~~

Here is the file list format, where the filename comes first, followed by the class number:

dove.png 0
flowers.png 1
peacock.png 2
statue.png 3

I recommend that you create such a file yourself, and then pseudorandomly shuffle the file list.

Here is the code to generate the file list:

# https://docs.python.org/3/tutorial/datastructures.html#dictionaries
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory


import os

file = open("meta/classes.txt", "r") 

classifications = []

for line in file:
    classification_string = line.split("\n")[0]
    classifications.append(classification_string)

rootDir = os.getcwd()
rootDir = rootDir + "\\Images\\"
for dirName, subdirList, fileList in os.walk(rootDir):

    s = dirName.split("\\");
    classification_string = s[len(s) - 1]
    classification_string = classification_string.split("\n")[0]

    class_id = 0

    for i in range(0, len(classifications)):
        if(classifications[i] == classification_string):            
            class_id = i
            break

    for fname in fileList:
        print("%s\\%s %s" % (dirName, fname, class_id))

Here is the main ~~code:~~code, that reads in the file list:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])
sample_img = cv2.resize(sample_img, (64, 64))

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        print(filenames[j])

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = cv2.resize(img_input_array, (64, 64))
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each file
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read  image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = cv2.resize(img_input_array, (64, 64)) 
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Add noise to input image
    #img_input_array = add_noise(img_input_array, 0.1)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    print(ann.predict(img_input_array))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here is the code to generate the file list:

# https://docs.python.org/3/tutorial/datastructures.html#dictionaries
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory


import os

file = open("meta/classes.txt", "r") 

classifications = []

for line in file:
    classification_string = line.split("\n")[0]
    classifications.append(classification_string)

rootDir #rootDir = os.getcwd()
rootDir = rootDir + "\\Images\\"
"Images\\"
for dirName, subdirList, fileList in os.walk(rootDir):

    s = dirName.split("\\");
    classification_string = s[len(s) - 1]
    classification_string = classification_string.split("\n")[0]

    class_id = 0

    for i in range(0, len(classifications)):
        if(classifications[i] == classification_string):            
            class_id = i
            break

    for fname in fileList:
        print("%s\\%s %s" % (dirName, fname, class_id))

Here is the main code, that reads in the file list:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])
sample_img = cv2.resize(sample_img, (64, 64))

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        print(filenames[j])

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = cv2.resize(img_input_array, (64, 64))
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each file
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read  image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = cv2.resize(img_input_array, (64, 64)) 
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Add noise to input image
    #img_input_array = add_noise(img_input_array, 0.1)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    print(ann.predict(img_input_array))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here is the code to generate the file list:

# https://docs.python.org/3/tutorial/datastructures.html#dictionaries
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory


import os

file = open("meta/classes.txt", "r") 

classifications = []

for line in file:
    classification_string = line.split("\n")[0]
    classifications.append(classification_string)

#rootDir = os.getcwd()
rootDir = "Images\\"
for dirName, subdirList, fileList in os.walk(rootDir):

    s = dirName.split("\\");
    classification_string = s[len(s) - 1]
    classification_string = classification_string.split("\n")[0]

    class_id = 0

    for i in range(0, len(classifications)):
        if(classifications[i] == classification_string):            
            class_id = i
            break

    for fname in fileList:
        print("%s\\%s %s" % (dirName, fname, class_id))

Here is the main code, that reads in the file list:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])
sample_img = cv2.resize(sample_img, (64, 64))

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        print(filenames[j])

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = cv2.resize(img_input_array, (64, 64))
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each file
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read  image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = cv2.resize(img_input_array, (64, 64)) 
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Add noise to input image
    #img_input_array = add_noise(img_input_array, 0.1)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    print(ann.predict(img_input_array))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here is the code to generate the file list:

# https://docs.python.org/3/tutorial/datastructures.html#dictionaries
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory


import os

file = open("meta/classes.txt", "r") 

classifications = []

for line in file:
    classification_string = line.split("\n")[0]
    classifications.append(classification_string)

#rootDir = os.getcwd()
rootDir = "Images\\"
"Images/"
for dirName, subdirList, fileList in os.walk(rootDir):

    s = dirName.split("\\");
dirName.split("/");
    classification_string = s[len(s) - 1]
    classification_string = classification_string.split("\n")[0]

    class_id = 0

    for i in range(0, len(classifications)):
        if(classifications[i] == classification_string):            
            class_id = i
            break

    for fname in fileList:
        print("%s\\%s print("%s/%s %s" % (dirName, fname, class_id))

Here is the main code, that reads in the file list:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])
sample_img = cv2.resize(sample_img, (64, 64))

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        print(filenames[j])

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = cv2.resize(img_input_array, (64, 64))
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here is the code to generate the file list:

# https://docs.python.org/3/tutorial/datastructures.html#dictionaries
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory


import os

file = open("meta/classes.txt", "r") 

classifications = []

for line in file:
    classification_string = line.split("\n")[0]
    classifications.append(classification_string)

rootDir = "Images/"
for dirName, subdirList, fileList in os.walk(rootDir):

    s = dirName.split("/");
    classification_string = s[len(s) - 1]
    classification_string = classification_string.split("\n")[0]

    class_id = 0

    for i in range(0, len(classifications)):
        if(classifications[i] == classification_string):            
            class_id = i
            break

    for fname in fileList:
        print("%s/%s %s" % (dirName, fname, class_id))

Here is the main code, that reads in the file list:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


def get_int_for_bits(src_bits):

    answer = 0
    shifted = 1

    for i in range(0, len(src_bits)):

        if 1.0 == src_bits[len(src_bits) - i - 1]:
            answer += shifted

        shifted = shifted << 1

    return answer



# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])
sample_img = cv2.resize(sample_img, (64, 64))

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
10):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        print(filenames[j])

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = cv2.resize(img_input_array, (64, 64))
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here is the code to generate the file list:

# https://docs.python.org/3/tutorial/datastructures.html#dictionaries
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory


import os

file = open("meta/classes.txt", "r") 

classifications = []

for line in file:
    classification_string = line.split("\n")[0]
    classifications.append(classification_string)

rootDir = "Images/"
for dirName, subdirList, fileList in os.walk(rootDir):

    s = dirName.split("/");
    classification_string = s[len(s) - 1]
    classification_string = classification_string.split("\n")[0]

    class_id = 0

    for i in range(0, len(classifications)):
        if(classifications[i] == classification_string):            
            class_id = i
            break

    for fname in fileList:
        print("%s/%s %s" % (dirName, fname, class_id))

Here is the main code, that reads in the file list:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


def get_int_for_bits(src_bits):

    answer = 0
    shifted = 1

    for i in range(0, len(src_bits)):

        if 1.0 == src_bits[len(src_bits) - i - 1]:
            answer += shifted

        shifted = shifted << 1

    return answer



# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


def snapto(position):

    if position < 0:
        position = 0

    if position > 1:
        position = 1

    # round
    return math.floor(0.5 + position)


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])
sample_img = cv2.resize(sample_img, (64, 64))

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
#ann.setBackpropMomentumScale(0.00001)
ann.setBackpropMomentumScale(1.0)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 10):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        print(filenames[j])
#print(filenames[j])

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = cv2.resize(img_input_array, (64, 64))
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)



error_count = 0
ok_count = 0

# For each file
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = cv2.resize(img_input_array, (64, 64))
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = snapto(img_input_array[j])
        #print(img_input_array[j])

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    prediction = ann.predict(img_input_array)

    # snap to 0 or 1
    for j in range(0, len(prediction[1][0])):
        prediction[1][0][j] = snapto(prediction[1][0][j])

    # if the classifications are not a match, then there is error
    if int(classifications[i]) != get_int_for_bits(prediction[1][0]):
        error_count += 1
    else:
        ok_count += 1


print(float(ok_count) / float(error_count + ok_count))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here is the code to generate the file list:

# https://docs.python.org/3/tutorial/datastructures.html#dictionaries
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory


import os

file = open("meta/classes.txt", "r") 

classifications = []

for line in file:
    classification_string = line.split("\n")[0]
    classifications.append(classification_string)

rootDir = "Images/"
for dirName, subdirList, fileList in os.walk(rootDir):

    s = dirName.split("/");
    classification_string = s[len(s) - 1]
    classification_string = classification_string.split("\n")[0]

    class_id = 0

    for i in range(0, len(classifications)):
        if(classifications[i] == classification_string):            
            class_id = i
            break

    for fname in fileList:
        print("%s/%s %s" % (dirName, fname, class_id))

Here is the main code, that reads in the file list:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


def get_int_for_bits(src_bits):

    answer = 0
    shifted = 1

    for i in range(0, len(src_bits)):

        if 1.0 == src_bits[len(src_bits) - i - 1]:
            answer += shifted

        shifted = shifted << 1

    return answer



# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


def snapto(position):

    if position < 0:
        position = 0

    if position > 1:
        position = 1

    # round
    return math.floor(0.5 + position)


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])
sample_img = cv2.resize(sample_img, (64, 64))

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
ann.setBackpropMomentumScale(1.0)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 10):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        #print(filenames[j])

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = cv2.resize(img_input_array, (64, 64))
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)



error_count = 0
ok_count = 0

# For each file
file in the test data (replace with your own filenames/classifications)
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = cv2.resize(img_input_array, (64, 64))
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = snapto(img_input_array[j])
        #print(img_input_array[j])

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    prediction = ann.predict(img_input_array)

    # snap to 0 or 1
    for j in range(0, len(prediction[1][0])):
        prediction[1][0][j] = snapto(prediction[1][0][j])

    # if the classifications are not a match, then there is error
    if int(classifications[i]) != get_int_for_bits(prediction[1][0]):
        error_count += 1
    else:
        ok_count += 1


print(float(ok_count) / float(error_count + ok_count))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here is the code to generate the file list:

# https://docs.python.org/3/tutorial/datastructures.html#dictionaries
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory


import os

file = open("meta/classes.txt", "r") 

classifications = []

for line in file:
    classification_string = line.split("\n")[0]
    classifications.append(classification_string)

rootDir = "Images/"
for dirName, subdirList, fileList in os.walk(rootDir):

    s = dirName.split("/");
    classification_string = s[len(s) - 1]
    classification_string = classification_string.split("\n")[0]

    class_id = 0

    for i in range(0, len(classifications)):
        if(classifications[i] == classification_string):            
            class_id = i
            break

    for fname in fileList:
        print("%s/%s %s" % (dirName, fname, class_id))

Here is the main code, that reads in the file list:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


def get_int_for_bits(src_bits):

    answer = 0
    shifted = 1

    for i in range(0, len(src_bits)):

        if 1.0 == src_bits[len(src_bits) - i - 1]:
            answer += shifted

        shifted = shifted << 1

    return answer



# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


def snapto(position):

    if position < 0:
        position = 0

    if position > 1:
        position = 1

    # round
    return math.floor(0.5 + position)


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])
sample_img = cv2.resize(sample_img, (64, 64))

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
ann.setBackpropMomentumScale(1.0)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 10):
1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        #print(filenames[j])

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = cv2.resize(img_input_array, (64, 64))
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Add noise to input image
        #img_input_array = add_noise(img_input_array, 0.1)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)



error_count = 0
ok_count = 0

# For each file in the test data (replace with your own filenames/classifications)
for i in range(0, len(filenames)):
    print(filenames[i])

    # Read image from file
    img_input_array = cv2.imread(filenames[i])
    img_input_array = cv2.resize(img_input_array, (64, 64))
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = snapto(img_input_array[j])
        #print(img_input_array[j])

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    prediction = ann.predict(img_input_array)

    # snap prediction to 0 or 1
    for j in range(0, len(prediction[1][0])):
        prediction[1][0][j] = snapto(prediction[1][0][j])

    # if the classifications are not a match, then there is error
    if int(classifications[i]) != get_int_for_bits(prediction[1][0]):
        error_count += 1
    else:
        ok_count += 1


print(float(ok_count) / float(error_count + ok_count))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters -- it's like voodoo magic.

Here is the code to generate the file list:

# https://docs.python.org/3/tutorial/datastructures.html#dictionaries
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory


import os

file = open("meta/classes.txt", "r") 

classifications = []

for line in file:
    classification_string = line.split("\n")[0]
    classifications.append(classification_string)

rootDir = "Images/"
for dirName, subdirList, fileList in os.walk(rootDir):

    s = dirName.split("/");
    classification_string = s[len(s) - 1]
    classification_string = classification_string.split("\n")[0]

    class_id = 0

    for i in range(0, len(classifications)):
        if(classifications[i] == classification_string):            
            class_id = i
            break

    for fname in fileList:
        print("%s/%s %s" % (dirName, fname, class_id))

Here is the main code, that reads in the file list:

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


def get_int_for_bits(src_bits):

    answer = 0
    shifted = 1

    for i in range(0, len(src_bits)):

        if 1.0 == src_bits[len(src_bits) - i - 1]:
            answer += shifted

        shifted = shifted << 1

    return answer



# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


def snapto(position):

    if position < 0:
        position = 0

    if position > 1:
        position = 1

    # round
    return math.floor(0.5 + position)


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])
sample_img = cv2.resize(sample_img, (64, 64))

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
ann.setBackpropMomentumScale(1.0)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        #print(filenames[j])

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = cv2.resize(img_input_array, (64, 64))
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)






test_file = open("test_files.txt", "r") 

test_filenames = []
test_classifications = []

for line in test_file:
    test_filenames.append(line.split(" ")[0])
    test_classifications.append(int(line.split(" ")[1]))

error_count = 0
ok_count = 0

# For each file in the test data (replace with your own filenames/classifications)
for i in range(0, len(filenames)):
    print(filenames[i])
len(test_filenames)):
    print(test_filenames[i])

    # Read image from file
    img_input_array = cv2.imread(filenames[i])
cv2.imread(test_filenames[i])
    img_input_array = cv2.resize(img_input_array, (64, 64))
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    prediction = ann.predict(img_input_array)

    # snap prediction to 0 or 1
    for j in range(0, len(prediction[1][0])):
        prediction[1][0][j] = snapto(prediction[1][0][j])

    # if the classifications are not a match, then there is error
    if int(classifications[i]) int(test_classifications[i]) != get_int_for_bits(prediction[1][0]):
        error_count += 1
    else:
        ok_count += 1


print(float(ok_count) / float(error_count + ok_count))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization ~~parameters -- it's like voodoo magic.~~parameters.

Here is the code to generate ~~the~~ a file list:

# https://docs.python.org/3/tutorial/datastructures.html#dictionaries
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory


import os

file = open("meta/classes.txt", "r") 

classifications = []

for line in file:
    classification_string = line.split("\n")[0]
    classifications.append(classification_string)

rootDir = "Images/"
for dirName, subdirList, fileList in os.walk(rootDir):

    s = dirName.split("/");
    classification_string = s[len(s) - 1]
    classification_string = classification_string.split("\n")[0]

    class_id = 0

    for i in range(0, len(classifications)):
        if(classifications[i] == classification_string):            
            class_id = i
            break

    for fname in fileList:
        print("%s/%s %s" % (dirName, fname, class_id))

Here is the main code, that reads in the file ~~list:~~lists (one for training data and one for test data):

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


def get_int_for_bits(src_bits):

    answer = 0
    shifted = 1

    for i in range(0, len(src_bits)):

        if 1.0 == src_bits[len(src_bits) - i - 1]:
            answer += shifted

        shifted = shifted << 1

    return answer



# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


def snapto(position):

    if position < 0:
        position = 0

    if position > 1:
        position = 1

    # round
    return math.floor(0.5 + position)


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])
sample_img = cv2.resize(sample_img, (64, 64))

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
ann.setBackpropMomentumScale(1.0)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        #print(filenames[j])

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = cv2.resize(img_input_array, (64, 64))
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)






test_file = open("test_files.txt", "r") 

test_filenames = []
test_classifications = []

for line in test_file:
    test_filenames.append(line.split(" ")[0])
    test_classifications.append(int(line.split(" ")[1]))

error_count = 0
ok_count = 0

# For each file in the test data (replace with your own filenames/classifications)
for i in range(0, len(test_filenames)):
    print(test_filenames[i])

    # Read image from file
    img_input_array = cv2.imread(test_filenames[i])
    img_input_array = cv2.resize(img_input_array, (64, 64))
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    prediction = ann.predict(img_input_array)

    # snap prediction to 0 or 1
    for j in range(0, len(prediction[1][0])):
        prediction[1][0][j] = snapto(prediction[1][0][j])

    # if the classifications are not a match, then there is error
    if int(test_classifications[i]) != get_int_for_bits(prediction[1][0]):
        error_count += 1
    else:
        ok_count += 1


print(float(ok_count) / float(error_count + ok_count))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters.

Here is the code to generate a the two file ~~list:~~lists (training data and testing data):

# https://docs.python.org/3/tutorial/datastructures.html#dictionaries
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory


import os

file = open("meta/classes.txt", "r") 
"r")
training_file = open("files.txt", "w")
test_file = open("test_files.txt", "w")

classifications = []

for line in file:
    classification_string = line.split("\n")[0]
    classifications.append(classification_string)

rootDir = "Images/"
 for dirName, subdirList, fileList in os.walk(rootDir):

    s = dirName.split("/");
    classification_string = s[len(s) - 1]
    classification_string = classification_string.split("\n")[0]

    class_id = 0

    for i in range(0, len(classifications)):
        if(classifications[i] == classification_string):            
            class_id = i
            break

    filenames_classifications = []

    for fname in fileList:
        print("%s/%s %s" filenames_classifications.append("%s/%s %s\n" % (dirName, fname, class_id))

    # Use 80% of the data for training
    cutoff = 0.8*float(len(filenames_classifications))

    for i in range(0, len(filenames_classifications)):
        if i < cutoff:
            training_file.write(filenames_classifications[i])    
        else:
            test_file.write(filenames_classifications[i])

Here is the main code, that reads in the file lists (one for training data and one for test data):

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


def get_int_for_bits(src_bits):

    answer = 0
    shifted = 1

    for i in range(0, len(src_bits)):

        if 1.0 == src_bits[len(src_bits) - i - 1]:
            answer += shifted

        shifted = shifted << 1

    return answer



# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


def snapto(position):

    if position < 0:
        position = 0

    if position > 1:
        position = 1

    # round
    return math.floor(0.5 + position)


# Read file list
file = open("files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])
sample_img = cv2.resize(sample_img, (64, 64))

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
ann.setBackpropMomentumScale(1.0)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        #print(filenames[j])

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = cv2.resize(img_input_array, (64, 64))
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)






test_file = open("test_files.txt", "r") 

test_filenames = []
test_classifications = []

for line in test_file:
    test_filenames.append(line.split(" ")[0])
    test_classifications.append(int(line.split(" ")[1]))

error_count = 0
ok_count = 0

# For each file in the test data (replace with your own filenames/classifications)
for i in range(0, len(test_filenames)):
    print(test_filenames[i])

    # Read image from file
    img_input_array = cv2.imread(test_filenames[i])
    img_input_array = cv2.resize(img_input_array, (64, 64))
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    prediction = ann.predict(img_input_array)

    # snap prediction to 0 or 1
    for j in range(0, len(prediction[1][0])):
        prediction[1][0][j] = snapto(prediction[1][0][j])

    # if the classifications are not a match, then there is error
    if int(test_classifications[i]) != get_int_for_bits(prediction[1][0]):
        error_count += 1
    else:
        ok_count += 1


print(float(ok_count) / float(error_count + ok_count))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters.

Here is the code to generate the two file lists (training data and testing data):

# https://docs.python.org/3/tutorial/datastructures.html#dictionaries
# https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory

 import os

file = open("meta/classes.txt", "r")
training_file = open("files.txt", open("training_files.txt", "w")
test_file = open("test_files.txt", "w")

classifications = []

for line in file:
    classification_string = line.split("\n")[0]
    classifications.append(classification_string)

rootDir = "Images/"

for dirName, subdirList, fileList in os.walk(rootDir):

    s = dirName.split("/");
    classification_string = s[len(s) - 1]
    classification_string = classification_string.split("\n")[0]

    class_id = 0

    for i in range(0, len(classifications)):
        if(classifications[i] == classification_string):            
            class_id = i
            break

    filenames_classifications = []

    for fname in fileList:
        filenames_classifications.append("%s/%s %s\n" % (dirName, fname, class_id))

    # Use 80% of the data for training
    cutoff = 0.8*float(len(filenames_classifications))

    for i in range(0, len(filenames_classifications)):
        if i < cutoff:
            training_file.write(filenames_classifications[i])    
        else:
            test_file.write(filenames_classifications[i])

Here is the main code, that reads in the file lists (one for training data and one for test data):

import cv2
import numpy as np
import math
import random



def add_noise(img_input_array, scale):
    for i in range(0, img_input_array.shape[0]):
        noise = float(random.randint(0, 255))   
        noise = noise / 255.0
        img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale)

        if img_input_array[i] < 0.0:
            img_input_array[i] = 0.0

        if img_input_array[i] > 1.0:
            img_input_array[i] = 1.0

    return img_input_array


def get_int_for_bits(src_bits):

    answer = 0
    shifted = 1

    for i in range(0, len(src_bits)):

        if 1.0 == src_bits[len(src_bits) - i - 1]:
            answer += shifted

        shifted = shifted << 1

    return answer



# A function that takes an integer and gives the bit numpy array
def get_bits_for_int(src_min_bits, src_number):
    bits = bin(src_number)[2:]

    a = np.array([])

    for i in range(0, len(bits)):
        a = np.append(a, float(bits[i]))

    num_bits = len(a)
    needed_bits = 0

    if num_bits < src_min_bits:
        needed_bits = src_min_bits - num_bits

    for i in range(0, needed_bits):
        a = np.insert(a, 0, 0.0)

    return a


def snapto(position):

    if position < 0:
        position = 0

    if position > 1:
        position = 1

    # round
    return math.floor(0.5 + position)


# Read file list
file = open("files.txt", open("training_files.txt", "r") 

filenames = []
classifications = []

for line in file:
    filenames.append(line.split(" ")[0])
    classifications.append(int(line.split(" ")[1]))

# Get the maximum classification number
max_class = 0

for i in range(0, len(classifications)):
    if classifications[i] > max_class:
        max_class = classifications[i]

num_classes = max_class + 1

# Get minimum number of bits needed to encode num_classes distinct classes
num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0))

# Get image and ANN parameters
sample_img = cv2.imread(filenames[0])
sample_img = cv2.resize(sample_img, (64, 64))

img_rows = sample_img.shape[0]
img_cols = sample_img.shape[1]
channels_per_pixel = 3

num_input_neurons = int(img_rows*img_cols*channels_per_pixel)
num_output_neurons = int(num_bits_needed)
num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons)))

ann = cv2.ml.ANN_MLP_create()
ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64))
ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 ))
ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001)
ann.setBackpropMomentumScale(1.0)
ann.setBackpropWeightScale(0.00001)

# Read image from file
img_input_array = sample_img.flatten()
img_input_array = img_input_array.astype(np.float32)

# Normalize all pixels from [0, 255] to [0, 1]
for i in range(0, img_input_array.shape[0]):
    img_input_array[i] = float(img_input_array[i]) / float(255)

# Get output image
img_output_array = get_bits_for_int(num_output_neurons, classifications[0])
img_output_array = img_output_array.astype(np.float32)

# Make both images have 1 row, many columns
img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

# Train the network
img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)

# For each training iteration
for i in range(0, 1000):
10):
    print(i)

    # For each file
    for j in range(0, len(filenames)):

        #print(filenames[j])

        # Read image from file
        img_input_array = cv2.imread(filenames[j])
        img_input_array = cv2.resize(img_input_array, (64, 64))
        img_input_array = img_input_array.flatten()
        img_input_array = img_input_array.astype(np.float32)

        # Normalize all pixels from [0, 255] to [0, 1]
        for k in range(0, img_input_array.shape[0]):
            img_input_array[k] = float(img_input_array[k]) / float(255)

        # Get output image
        img_output_array = get_bits_for_int(num_output_neurons, classifications[j])
        img_output_array = img_output_array.astype(np.float32)

        # Make both images have 1 row, many columns
        img_input_array = img_input_array.reshape(1, img_input_array.shape[0])
        img_output_array = img_output_array.reshape(1, img_output_array.shape[0])

        # Train the network
        img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array)
        ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE)






test_file = open("test_files.txt", "r") 

test_filenames = []
test_classifications = []

for line in test_file:
    test_filenames.append(line.split(" ")[0])
    test_classifications.append(int(line.split(" ")[1]))

error_count = 0
ok_count = 0

# For each file in the test data (replace with your own filenames/classifications)
for i in range(0, len(test_filenames)):
    print(test_filenames[i])

    # Read image from file
    img_input_array = cv2.imread(test_filenames[i])
    img_input_array = cv2.resize(img_input_array, (64, 64))
    img_input_array = img_input_array.flatten()
    img_input_array = img_input_array.astype(np.float32)

    # Normalize all pixels from [0, 255] to [0, 1]
    for j in range(0, img_input_array.shape[0]):
        img_input_array[j] = float(img_input_array[j]) / float(255)

    # Make input image have 1 row, many columns
    img_input_array = img_input_array.reshape(1, img_input_array.shape[0])

    # Ask the network to classify the image
    prediction = ann.predict(img_input_array)

    # snap prediction to 0 or 1
    for j in range(0, len(prediction[1][0])):
        prediction[1][0][j] = snapto(prediction[1][0][j])

    # if the classifications are not a match, then there is error
    if int(test_classifications[i]) != get_int_for_bits(prediction[1][0]):
        error_count += 1
    else:
        ok_count += 1


print(float(ok_count) / float(error_count + ok_count))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization ~~parameters.~~parameters:

~~Here is~~ https://github.com/sjhalayka/python_opencv_image_classification

See get_files.py for the code to generate the two file lists (training data and testing ~~data):~~data).

# https://docs.python.org/3/tutorial/datastructures.html#dictionaries # https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory import os file = open("meta/classes.txt", "r") training_file = open("training_files.txt", "w") test_file = open("test_files.txt", "w") classifications = []
See ann_image.py for line in file: classification_string = line.split("\n")[0] classifications.append(classification_string) rootDir = "Images/" for dirName, subdirList, fileList in os.walk(rootDir): s = dirName.split("/"); classification_string = s[len(s) - 1] classification_string = classification_string.split("\n")[0] class_id = 0 for i in range(0, len(classifications)): if(classifications[i] == classification_string): class_id = i break filenames_classifications = [] for fname in fileList: filenames_classifications.append("%s/%s %s\n" % (dirName, fname, class_id)) # Use 80% of the data for training cutoff = 0.8*float(len(filenames_classifications)) for i in range(0, len(filenames_classifications)): if i < cutoff: training_file.write(filenames_classifications[i]) else: test_file.write(filenames_classifications[i])

Here is the main ~~code,~~ code that reads in the file lists (one for training data and one for test ~~data):~~data).

import cv2 import numpy as np import math import random def add_noise(img_input_array, scale): for i in range(0, img_input_array.shape[0]): noise = float(random.randint(0, 255)) noise = noise / 255.0 img_input_array[i] = (img_input_array[i] + noise*scale) / (1.0 + scale) if img_input_array[i] < 0.0: img_input_array[i] = 0.0 if img_input_array[i] > 1.0: img_input_array[i] = 1.0 return img_input_array def get_int_for_bits(src_bits): answer = 0 shifted = 1 for i in range(0, len(src_bits)): if 1.0 == src_bits[len(src_bits) - i - 1]: answer += shifted shifted = shifted << 1 return answer # A function that takes an integer and gives the bit numpy array def get_bits_for_int(src_min_bits, src_number): bits = bin(src_number)[2:] a = np.array([]) for i in range(0, len(bits)): a = np.append(a, float(bits[i])) num_bits = len(a) needed_bits = 0 if num_bits < src_min_bits: needed_bits = src_min_bits - num_bits for i in range(0, needed_bits): a = np.insert(a, 0, 0.0) return a def snapto(position): if position < 0: position = 0 if position > 1: position = 1 # round return math.floor(0.5 + position) # Read file list file = open("training_files.txt", "r") filenames = [] classifications = [] for line in file: filenames.append(line.split(" ")[0]) classifications.append(int(line.split(" ")[1])) # Get the maximum classification number max_class = 0 for i in range(0, len(classifications)): if classifications[i] > max_class: max_class = classifications[i] num_classes = max_class + 1 # Get minimum number of bits needed to encode num_classes distinct classes num_bits_needed = math.ceil(math.log(num_classes)/math.log(2.0)) # Get image and ANN parameters sample_img = cv2.imread(filenames[0]) sample_img = cv2.resize(sample_img, (64, 64)) img_rows = sample_img.shape[0] img_cols = sample_img.shape[1] channels_per_pixel = 3 num_input_neurons = int(img_rows*img_cols*channels_per_pixel) num_output_neurons = int(num_bits_needed) num_hidden_neurons = int(math.floor(math.sqrt(num_input_neurons*num_output_neurons))) ann = cv2.ml.ANN_MLP_create() ann.setLayerSizes(np.array([num_input_neurons, num_hidden_neurons, num_output_neurons], dtype=np.int64)) ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM) ann.setTermCriteria((cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 1, 0.000001 )) ann.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.001) ann.setBackpropMomentumScale(1.0) ann.setBackpropWeightScale(0.00001) # Read image from file img_input_array = sample_img.flatten() img_input_array = img_input_array.astype(np.float32) # Normalize all pixels from [0, 255] to [0, 1] for i in range(0, img_input_array.shape[0]): img_input_array[i] = float(img_input_array[i]) / float(255) # Get output image img_output_array = get_bits_for_int(num_output_neurons, classifications[0]) img_output_array = img_output_array.astype(np.float32) # Make both images have 1 row, many columns img_input_array = img_input_array.reshape(1, img_input_array.shape[0]) img_output_array = img_output_array.reshape(1, img_output_array.shape[0]) # Train the network img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array) ann.train(img_td, cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE) # For each training iteration for i in range(0, 10): print(i) # For each file for j in range(0, len(filenames)): #print(filenames[j]) # Read image from file img_input_array = cv2.imread(filenames[j]) img_input_array = cv2.resize(img_input_array, (64, 64)) img_input_array = img_input_array.flatten() img_input_array = img_input_array.astype(np.float32) # Normalize all pixels from [0, 255] to [0, 1] for k in range(0, img_input_array.shape[0]): img_input_array[k] = float(img_input_array[k]) / float(255) # Get output image img_output_array = get_bits_for_int(num_output_neurons, classifications[j]) img_output_array = img_output_array.astype(np.float32) # Make both images have 1 row, many columns img_input_array = img_input_array.reshape(1, img_input_array.shape[0]) img_output_array = img_output_array.reshape(1, img_output_array.shape[0]) # Train the network img_td = cv2.ml.TrainData_create(img_input_array, cv2.ml.ROW_SAMPLE, img_output_array) ann.train(img_td, cv2.ml.ANN_MLP_UPDATE_WEIGHTS | cv2.ml.ANN_MLP_NO_INPUT_SCALE | cv2.ml.ANN_MLP_NO_OUTPUT_SCALE) test_file = open("test_files.txt", "r") test_filenames = [] test_classifications = [] for line in test_file: test_filenames.append(line.split(" ")[0]) test_classifications.append(int(line.split(" ")[1])) error_count = 0 ok_count = 0 # For each file in the test data (replace with your own filenames/classifications) for i in range(0, len(test_filenames)): print(test_filenames[i]) # Read image from file img_input_array = cv2.imread(test_filenames[i]) img_input_array = cv2.resize(img_input_array, (64, 64)) img_input_array = img_input_array.flatten() img_input_array = img_input_array.astype(np.float32) # Normalize all pixels from [0, 255] to [0, 1] for j in range(0, img_input_array.shape[0]): img_input_array[j] = float(img_input_array[j]) / float(255) # Make input image have 1 row, many columns img_input_array = img_input_array.reshape(1, img_input_array.shape[0]) # Ask the network to classify the image prediction = ann.predict(img_input_array) # snap prediction to 0 or 1 for j in range(0, len(prediction[1][0])): prediction[1][0][j] = snapto(prediction[1][0][j]) # if the classifications are not a match, then there is error if int(test_classifications[i]) != get_int_for_bits(prediction[1][0]): error_count += 1 else: ok_count += 1 print(float(ok_count) / float(error_count + ok_count))

I have written a code that takes a file list and trains the neural network. Be careful when changing any of the network initialization parameters:

https://github.com/sjhalayka/python_opencv_image_classification

See get_files.py for the code to generate the two file lists (training data and testing data).

See ann_image.py for the main code that reads in the file lists ~~(one for training data~~ and ~~one for test data).~~trains / tests the network.