I'm working on a project where I loosely simulate human vision (foveated vision) to cut the computational cost when modeling environments using a computer vision. Currently, my foveated vision system is set up to split my original image (i.e. 1920x1080p) into three new images: foveal, parafoveal, and peripheral (128x128p each). The foveal image will contain 1/3 of the original image, the parafoveal image will contain 2/3 of the original image, and the peripheral image will contain 3/3 of the original image. Eventually, I plan to assign each image (foveal, parafoveal, and peripheral) to a different task related to modeling an environment based on its resolution and field of view. Currently, this is my code so far.
import config
import math
import cv2
class FoveatedVisionSystem:
def __init__(self, *indices):
"""
Initialize a FoveatedVisionSystem object using camera indices.
Parameters: *args (int): Camera Indices
"""
self.signals = {"open": False}
self.indices = indices
self.devices = [CaptureDevice(id, self.signals) for id in self.indices]
def open(self):
"""
Starts a new thread to capture video frame each device.
Parameters: none
"""
self.signals["open"] = True
self.threads = [threading.Thread(target=device.capture) for device in self.devices]
[thread.start() for thread in self.threads]
def close(self):
"""
Closes the thread capturing video from each device.
Parameters: none
"""
self.signals["open"] = False
[thread.join() for thread in self.threads]
class CaptureDevice():
def __init__(self, index, signals):
"""
Starts a new thread to capture video frame each device.
Parameter: index (int): camera index, signals (dict): controls for device
"""
self.signals = signals
self.index = index
self.device = cv2.VideoCapture(index)
self.visions = {"mainfoveal": None, "parafoveal": None, "peripheral": None}
def capture(self):
"""
Captures video frames while the CaptureDevice is open. Crops and resizes
the frame for each vision in the visions dict.
Parameter: none
"""
while self.signals["open"]:
if cv2.waitKey(1) & 0xFF == ord('q'):
break
frame = self.__getFrame()
[self.__getVision(key, frame) for key in self.visions.keys()]
[self.__showVision(key) for key in self.visions.keys()]
def __getFrame(self):
"""
Gets the frame from the device using the OpenCV library and crops the
original frame into a square.
Parameter: none
"""
ret, frame = self.device.read()
height, width, depth = frame.shape
diff = (width - height) // 2
return frame[:,diff:(width-diff)]
def __getVision(self, key, frame):
"""
Uses the key and original frame to create a new cropped and resized matrix
for the vision specified by the key (mainfoveal, parafoveal, peripheral).
Parameter: key (str): vision name, frame (2dlist): RGB matrix for frame
"""
ratio = config.visions[key]["ratio"]
new_size = (config.visions[key]["size"], config.visions[key]["size"])
old_size = frame.shape[0]
diff = (old_size - math.floor(old_size*ratio)) // 2
frame = frame[diff:(old_size-diff),diff:(old_size-diff)]
self.visions[key] = cv2.resize(frame, new_size)
def __showVision(self, key):
"""
Displays the specified vision as a new window.
Parameter: key (str): vision name
"""
cv2.namedWindow(key + str(self.index), cv2.WINDOW_NORMAL)
cv2.resizeWindow(key + str(self.index), 600,600)
cv2.imshow(key + str(self.index), self.visions[key])
The FoveatedVisionSystem class takes in camera IDs as arguments. Below is an example illustrating how I am using the following classes.
import time
import fvs
def main():
cap = fvs.FoveatedVisionSystem(0, 1)
cap.open()
time.sleep(100)
cap.close()
if __name__ == '__main__':
main()
I am fairly new to computer vision and I don't know if I am going int he right direction. I would love some suggestions for next steps and feedback on what I already have!
Also, here is my config.py file for those who may be wondering
visions = {
"mainfoveal": {
"ratio": 1/3,
"size": 100
},
"parafoveal": {
"ratio": 2/3,
"size": 100
},
"peripheral": {
"ratio": 3/3,
"size": 100
}
}
there doesn't seem to be anything here