image.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. import base64
  2. import hashlib
  3. import os
  4. from pathlib import Path
  5. from openai import OpenAI
  6. from embedchain.helpers.json_serializable import register_deserializable
  7. from embedchain.loaders.base_loader import BaseLoader
  8. DESCRIBE_IMAGE_PROMPT = "Describe the image:"
  9. @register_deserializable
  10. class ImageLoader(BaseLoader):
  11. def __init__(self, max_tokens: int = 500, api_key: str = None, prompt: str = None):
  12. super().__init__()
  13. self.custom_prompt = prompt or DESCRIBE_IMAGE_PROMPT
  14. self.max_tokens = max_tokens
  15. self.api_key = api_key or os.environ["OPENAI_API_KEY"]
  16. self.client = OpenAI(api_key=self.api_key)
  17. @staticmethod
  18. def _encode_image(image_path: str):
  19. with open(image_path, "rb") as image_file:
  20. return base64.b64encode(image_file.read()).decode("utf-8")
  21. def _create_completion_request(self, content: str):
  22. return self.client.chat.completions.create(
  23. model="gpt-4-vision-preview", messages=[{"role": "user", "content": content}], max_tokens=self.max_tokens
  24. )
  25. def _process_url(self, url: str):
  26. if url.startswith("http"):
  27. return [{"type": "text", "text": self.custom_prompt}, {"type": "image_url", "image_url": {"url": url}}]
  28. elif Path(url).is_file():
  29. extension = Path(url).suffix.lstrip(".")
  30. encoded_image = self._encode_image(url)
  31. image_data = f"data:image/{extension};base64,{encoded_image}"
  32. return [{"type": "text", "text": self.custom_prompt}, {"type": "image", "image_url": {"url": image_data}}]
  33. else:
  34. raise ValueError(f"Invalid URL or file path: {url}")
  35. def load_data(self, url: str):
  36. content = self._process_url(url)
  37. response = self._create_completion_request(content)
  38. content = response.choices[0].message.content
  39. doc_id = hashlib.sha256((content + url).encode()).hexdigest()
  40. return {"doc_id": doc_id, "data": [{"content": content, "meta_data": {"url": url, "type": "image"}}]}