import json
import re
import PyPDF2
import vertexai
from vertexai.generative_models import (
GenerationConfig,
GenerativeModel,
HarmCategory,
HarmBlockThreshold,
Part,
)
from vertexai.preview.generative_models import GenerativeModel, Part, GenerationConfig, SafetySetting, HarmCategory, HarmBlockThreshold
PROJECT_ID='myId'
def analyze_local_pdf(prompt, pdf_path) :
"""
Analyzes a local PDF file using Vertex AI and returns the summarized content.
Args:
prompt (str): The prompt to send to the model (e.g., "Summarize the following content:").
pdf_path (str): The path to the local PDF file.
Returns:
str: The summarized content extracted from the response.
"""
# Initialize Vertex AI
vertexai.init(project=PROJECT_ID, location="us-central1")
# Load the model
model = GenerativeModel(model_name="gemini-1.5-flash-001")
# Read the local PDF content
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
pdf_content = ""
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
pdf_content += page.extract_text()
# Create a Part object with the PDF content prepended to the prompt
parts = [Part(text=f"{prompt}\n\n{pdf_content}", mime_type="text/plain")]
# Create a GenerationConfig object
generation_config = GenerationConfig(
temperature=0.3,
top_p=0.3
)
# Safety settings
safety_settings = [
SafetySetting(
category=HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
threshold=HarmBlockThreshold.BLOCK_ONLY_HIGH,
),
# Add other safety settings as needed
]
# Send the request synchronously
try:
print("Sending request to Vertex AI...")
response = model.generate_content(
parts, generation_config=generation_config, safety_settings=safety_settings
)
# Extract the JSON response (assuming the response is in JSON format)
json_match = re.search(r'\{[\s\S]*\}', response.text)
if json_match:
json_string = json_match.group(0)
try:
summary_data = json.loads(json_string)
print("Successfully retrieved summary content from Vertex AI.")
return summary_data.get("text", "") # Extract text from summary object
except json.JSONDecodeError as e:
print(f"Error parsing JSON response: {e}")
return None
else:
print("No JSON object found in the response.")
return None
except Exception as e:
print(f"Unexpected error during processing: {str(e)}")
return None
# Example usage
if __name__ == "__main__":
prompt = "Summarize the following content:"
pdf_path = "GreatDepression.pdf" # Replace with the actual path to your PDF
summary_text = analyze_local_pdf(prompt, pdf_path)
if summary_text:
print(f"Summary: {summary_text}")
else:
print("Failed to retrieve summary from Vertex AI.")
I'd appreciate any help or advice. Thanks.