When I try to run this program on my computer (system: macOS Mojave) with the --voice flag I'm having a ton of
pid(5054)/euid(501) is calling TIS/TSM in non-main thread environment, ERROR : This is NOT allowed. Please call TIS/TSM in main thread!!!
error message. And a
* Terminating app due to uncaught exception 'NSInternalInconsistencyException', reason: 'not running on AppKit (main) thread' ...
I guess it has something to do with the thread I try to use, but I can' figure out how to solve this. I am not sure that this is an OpenCV related issue at all but I'm faced with similar issues when I used imshow outside the main thread.
import cv2
import numpy as np
import wave
import pyaudio
import speech_recognition as sr
import threading
labels = open("yolo3-320/coco.names").read().strip().split("\n")
message = None
def voice_command():
global message
rc = sr.Recognizer()
print("I'm waiting for your commands!")
while True:
mic = sr.Microphone()
with mic as source:
try:
audio = rc.listen(source)
result = rc.recognize_google(audio).lower()
if result == "detect":
play_effect("start")
print("I am listening...")
audio = rc.listen(source)
result = rc.recognize_google(audio).lower()
print(result)
if result in labels:
print("Okay, detecting: " + result)
play_effect("end")
message = result
else:
print("Sorry, I didn't catch that.")
play_effect("failed")
except:
pass
def main(mode):
if mode == "consol":
cmd = threading.Thread(target=consol_command)
cmd.daemon = True
cmd.start()
elif mode == "voice":
cmd = threading.Thread(target=voice_command)
cmd.daemon = True
cmd.start()
cam = cv2.VideoCapture(0)
np.random.seed(42)
colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")
net = cv2.dnn.readNetFromDarknet("./yolo3-320/yolov3.cfg", "./yolo3-320/yolov3.weights")
layerNames = net.getLayerNames()
layerNames = [layerNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
(W, H) = (None, None)
while True:
(grabbed, frame) = cam.read()
if not grabbed:
break
if W is None or H is None:
(H, W) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (288, 288), swapRB=True, crop=False)
net.setInput(blob)
layerOutputs = net.forward(layerNames)
boxes = []
confidences = []
classIDs = []
for output in layerOutputs:
for detection in output:
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
if confidence > 0.4:
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.4, 0.2)
if len(indexes) > 0:
for i in indexes.flatten():
if labels[classIDs[i]] == message:
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
color = [int(c) for c in colors[classIDs[i]]]
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.2f}".format(labels[classIDs[i]], confidences[i])
cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
cv2.imshow("Breathtaking", frame)
if cv2.waitKey(3) == 27:
break
print("Exit program.")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--voice", help="Enable voice commands", action="store_true")
args = parser.parse_args()
if args.voice:
main("voice")
else:
main("voice")
cv2.destroyAllWindows()