139 lines
4.6 KiB
Python
139 lines
4.6 KiB
Python
import re
|
|
from urllib.parse import urlparse
|
|
from mastodon import Mastodon
|
|
from bs4 import BeautifulSoup
|
|
|
|
from telegram import InlineQueryResultPhoto, InlineQueryResultVideo, InlineQueryResultDocument, InputTextMessageContent
|
|
import mimetypes
|
|
import uuid
|
|
|
|
def build_fediverse_inline_results(post_data):
|
|
"""
|
|
Build a list of InlineQueryResult* objects from fediverse post data.
|
|
"""
|
|
post_text = post_data.get("post_text", "")
|
|
media_urls = post_data.get("media", [])
|
|
results = []
|
|
|
|
for idx, media_url in enumerate(media_urls):
|
|
mime_type, _ = mimetypes.guess_type(media_url)
|
|
result_id = str(uuid.uuid4()) # unique ID for each result
|
|
|
|
# Optional: include full post text only in the first result
|
|
caption = post_text if idx == 0 else None
|
|
|
|
if mime_type:
|
|
if mime_type.startswith("image/"):
|
|
results.append(
|
|
InlineQueryResultPhoto(
|
|
id=result_id,
|
|
photo_url=media_url,
|
|
thumbnail_url=media_url,
|
|
caption=caption,
|
|
input_message_content=InputTextMessageContent(post_text) if idx == 0 else None
|
|
)
|
|
)
|
|
elif mime_type.startswith("video/"):
|
|
results.append(
|
|
InlineQueryResultVideo(
|
|
id=result_id,
|
|
video_url=media_url,
|
|
mime_type=mime_type,
|
|
thumbnail_url=media_url,
|
|
title="Video from post",
|
|
caption=caption,
|
|
input_message_content=InputTextMessageContent(post_text) if idx == 0 else None
|
|
)
|
|
)
|
|
else:
|
|
# Fallback for other types (e.g., audio or unknown)
|
|
results.append(
|
|
InlineQueryResultDocument(
|
|
id=result_id,
|
|
title="Media attachment",
|
|
document_url=media_url,
|
|
mime_type=mime_type,
|
|
caption=caption,
|
|
input_message_content=InputTextMessageContent(post_text) if idx == 0 else None
|
|
)
|
|
)
|
|
else:
|
|
# fallback in case mime type is unknown
|
|
results.append(
|
|
InlineQueryResultDocument(
|
|
id=result_id,
|
|
title="Unknown file",
|
|
document_url=media_url,
|
|
mime_type="application/octet-stream",
|
|
caption=caption,
|
|
input_message_content=InputTextMessageContent(post_text) if idx == 0 else None
|
|
)
|
|
)
|
|
|
|
return results
|
|
|
|
|
|
def clean_html(raw_html):
|
|
"""Remove HTML tags from a Mastodon/Pleroma post."""
|
|
return BeautifulSoup(raw_html, "html.parser").get_text()
|
|
|
|
def parse_post_url(url):
|
|
"""
|
|
Parse a Mastodon or Pleroma post URL.
|
|
Returns: instance domain, status_id
|
|
"""
|
|
parsed = urlparse(url)
|
|
domain = parsed.netloc
|
|
|
|
# Mastodon format: https://instance/@user/id
|
|
mastodon_match = re.match(r'^/@[^/]+/(\d+)$', parsed.path)
|
|
if mastodon_match:
|
|
return domain, mastodon_match.group(1)
|
|
|
|
# Pleroma format: https://instance/notice/id
|
|
pleroma_match = re.match(r'^/notice/([^/]+)$', parsed.path)
|
|
if pleroma_match:
|
|
return domain, pleroma_match.group(1)
|
|
|
|
raise ValueError("Unsupported or invalid Fediverse post URL format.")
|
|
|
|
def fetch_status(instance, status_id):
|
|
"""Fetch status using Mastodon-compatible API."""
|
|
mastodon = Mastodon(api_base_url=f"https://{instance}")
|
|
return mastodon.status(status_id)
|
|
|
|
def extract_content(post_url):
|
|
try:
|
|
instance, status_id = parse_post_url(post_url)
|
|
status = fetch_status(instance, status_id)
|
|
|
|
result = {}
|
|
|
|
post_text = "src -> " + post_url
|
|
|
|
post_text += "\n\n"
|
|
|
|
post_text += clean_html(status['content'])
|
|
|
|
result["post_text"] = post_text
|
|
result["media"] = []
|
|
|
|
if status['media_attachments']:
|
|
for media in status['media_attachments']:
|
|
result["media"].append(media["url"])
|
|
else:
|
|
print("🖼️ No media found.")
|
|
return result
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
return str(e)
|
|
|
|
# === Example usage ===
|
|
if __name__ == "__main__":
|
|
post_url = "https://aipub.social/@usluck@channels.im/114303377854884481"
|
|
|
|
content = extract_content(post_url)
|
|
inline_query_response = build_fediverse_inline_results(content)
|
|
|
|
print(content)
|