1234567891011121314151617181920212223242526272829303132333435363738394041 |
- import hashlib
- import logging
- import os
- from embedchain.loaders.base_loader import BaseLoader
- class ImagesLoader(BaseLoader):
- def load_data(self, image_url):
- """
- Loads images from the supplied directory/file and applies CLIP model transformation to represent these images
- in vector form
- :param image_url: The URL from which the images are to be loaded
- """
- # load model and image preprocessing
- from embedchain.models.clip_processor import ClipProcessor
- model = ClipProcessor.load_model()
- if os.path.isfile(image_url):
- data = [ClipProcessor.get_image_features(image_url, model)]
- else:
- data = []
- for filename in os.listdir(image_url):
- filepath = os.path.join(image_url, filename)
- try:
- data.append(ClipProcessor.get_image_features(filepath, model))
- except Exception as e:
- # Log the file that was not loaded
- logging.exception("Failed to load the file {}. Exception {}".format(filepath, e))
- # Get the metadata like Size, Last Modified and Last Created timestamps
- image_path_metadata = [
- str(os.path.getsize(image_url)),
- str(os.path.getmtime(image_url)),
- str(os.path.getctime(image_url)),
- ]
- doc_id = hashlib.sha256((" ".join(image_path_metadata) + image_url).encode()).hexdigest()
- return {
- "doc_id": doc_id,
- "data": data,
- }
|