import re from urllib.parse import urlparse from mastodon import Mastodon from bs4 import BeautifulSoup from telegram import InlineQueryResultPhoto, InlineQueryResultVideo, InlineQueryResultDocument, InputTextMessageContent import mimetypes import uuid def build_fediverse_inline_results(post_data): """ Build a list of InlineQueryResult* objects from fediverse post data. """ post_text = post_data.get("post_text", "") media_urls = post_data.get("media", []) results = [] for idx, media_url in enumerate(media_urls): mime_type, _ = mimetypes.guess_type(media_url) result_id = str(uuid.uuid4()) # unique ID for each result # Optional: include full post text only in the first result caption = post_text if idx == 0 else None if mime_type: if mime_type.startswith("image/"): results.append( InlineQueryResultPhoto( id=result_id, photo_url=media_url, thumbnail_url=media_url, caption=caption, input_message_content=InputTextMessageContent(post_text) if idx == 0 else None ) ) elif mime_type.startswith("video/"): results.append( InlineQueryResultVideo( id=result_id, video_url=media_url, mime_type=mime_type, thumbnail_url=media_url, title="Video from post", caption=caption, input_message_content=InputTextMessageContent(post_text) if idx == 0 else None ) ) else: # Fallback for other types (e.g., audio or unknown) results.append( InlineQueryResultDocument( id=result_id, title="Media attachment", document_url=media_url, mime_type=mime_type, caption=caption, input_message_content=InputTextMessageContent(post_text) if idx == 0 else None ) ) else: # fallback in case mime type is unknown results.append( InlineQueryResultDocument( id=result_id, title="Unknown file", document_url=media_url, mime_type="application/octet-stream", caption=caption, input_message_content=InputTextMessageContent(post_text) if idx == 0 else None ) ) return results def clean_html(raw_html): """Remove HTML tags from a Mastodon/Pleroma post.""" return BeautifulSoup(raw_html, "html.parser").get_text() def parse_post_url(url): """ Parse a Mastodon or Pleroma post URL. Returns: instance domain, status_id """ parsed = urlparse(url) domain = parsed.netloc # Mastodon format: https://instance/@user/id mastodon_match = re.match(r'^/@[^/]+/(\d+)$', parsed.path) if mastodon_match: return domain, mastodon_match.group(1) # Pleroma format: https://instance/notice/id pleroma_match = re.match(r'^/notice/([^/]+)$', parsed.path) if pleroma_match: return domain, pleroma_match.group(1) raise ValueError("Unsupported or invalid Fediverse post URL format.") def fetch_status(instance, status_id): """Fetch status using Mastodon-compatible API.""" mastodon = Mastodon(api_base_url=f"https://{instance}") return mastodon.status(status_id) def extract_content(post_url): try: instance, status_id = parse_post_url(post_url) status = fetch_status(instance, status_id) result = {} post_text = "src -> " + post_url post_text += "\n\n" post_text += clean_html(status['content']) result["post_text"] = post_text result["media"] = [] if status['media_attachments']: for media in status['media_attachments']: result["media"].append(media["url"]) else: print("🖼️ No media found.") return result except Exception as e: print(f"❌ Error: {e}") return str(e) # === Example usage === if __name__ == "__main__": post_url = "https://aipub.social/@usluck@channels.im/114303377854884481" content = extract_content(post_url) inline_query_response = build_fediverse_inline_results(content) print(content)