Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

CascadeClassifier out of space python

Here is my code:

def load_cascades():
    cascades = []
    for root, dirs, files in os.walk(rooted('data/logos')):
        for fname in files:
            if fname == 'cascade.xml':
                path = os.path.join(root, fname)
                cascade = cv2.CascadeClassifier(path)
                cascades.append(cascade)
    return cascades


cascades = load_cascades()


def get_heuristics(pair):
    url = pair[0]
    image = pair[1]
    matches_any_logos = False

    for cascade in cascades:
        frame = cv2.imread(image, 0)

        logos = cascade.detectMultiScale(
            image=frame,
            minNeighbors=5,
            minSize=(25, 25),
        )

        if len(logos) > 0:
            matches_any_logos = True

    return {
        'matches_any_logos': matches_any_logos,
        'matches_corresponding_urls': matches_corresponding_urls,
    }


class LogoDetectionVectorizer(DictVectorizer):

    def fit(self, x, y=None):
        x = [get_heuristics(v) for v in x]
        return super(LogoDetectionVectorizer, self).fit(x)

    def fit_transform(self, x, y=None):
        x = [get_heuristics(v) for v in x]
        return super(LogoDetectionVectorizer, self).fit_transform(x)

    def transform(self, x, y=None):
        x = [get_heuristics(v) for v in x]
        return super(LogoDetectionVectorizer, self).transform(x)

This is a module in a machine learning algorithm of model training, I have a set of trained models related to logos which I have trained, however when I try and train the model on a large dataset I get this error:

cv2.error: /io/opencv/modules/core/src/matrix.cpp:436: error: (-215) u != 0

And opencv reports that it is out of memory. I do not understand why? I am simply creating all cascades once and then using them multiple times, I saw somewhere that I need to call cascade.deallocate() after each iteration and then recreate the cascades however python claims that cascade.desallocate() does not exist as well as the obvious performance hit of having to recreate the cascades each time.

Can someone help? Thanks