-- =====================================================================
-- Facebook Scraper API - Database Schema
-- =====================================================================
-- Run this ONCE in phpMyAdmin (or the MySQL CLI) after creating an empty
-- database in cPanel. Replace `facebook_scraper` below if your database
-- has a different name (cPanel usually prefixes it, e.g. cpaneluser_fbscraper).
-- =====================================================================

-- If you want the script to create the DB itself, uncomment the next two
-- lines. On shared cPanel hosting you normally create the DB through
-- "MySQL Databases" instead and skip this.
--
-- CREATE DATABASE IF NOT EXISTS `facebook_scraper`
--   CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
-- USE `facebook_scraper`;

SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;

-- ---------------------------------------------------------------------
-- posts: one row per scraped Facebook post
-- ---------------------------------------------------------------------
DROP TABLE IF EXISTS `posts`;
CREATE TABLE `posts` (
    `id`               INT UNSIGNED NOT NULL AUTO_INCREMENT,
    `scraped_from_url` TEXT          NULL,
    `post_url`         VARCHAR(1024) NULL,
    `post_url_hash`    CHAR(40)      NULL,   -- SHA1 of post_url for dedup / fast lookup
    `group_name`       VARCHAR(512)  NULL,
    `username`         VARCHAR(255)  NULL,
    `user_href`        VARCHAR(1024) NULL,
    `date_time`        DATETIME      NULL,
    `content`          MEDIUMTEXT    NULL,
    `media_urls`       MEDIUMTEXT    NULL,   -- JSON array of URLs
    `comments_number`  VARCHAR(64)   NULL,   -- kept as string because FB shows things like "1.2K"
    `phones`           TEXT          NULL,   -- JSON array
    `emails`           TEXT          NULL,   -- JSON array
    `raw_payload`      MEDIUMTEXT    NULL,   -- full original JSON, for safety
    `created_at`       TIMESTAMP     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    PRIMARY KEY (`id`),
    UNIQUE KEY `uniq_post_url_hash` (`post_url_hash`),
    KEY `idx_group_name`            (`group_name`),
    KEY `idx_username`              (`username`),
    KEY `idx_date_time`             (`date_time`),
    KEY `idx_created_at`            (`created_at`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

-- ---------------------------------------------------------------------
-- comments: one row per scraped comment, linked to its parent post
-- ---------------------------------------------------------------------
DROP TABLE IF EXISTS `comments`;
CREATE TABLE `comments` (
    `id`         INT UNSIGNED NOT NULL AUTO_INCREMENT,
    `post_id`    INT UNSIGNED NOT NULL,
    `username`   VARCHAR(255)  NULL,
    `content`    MEDIUMTEXT    NULL,
    `user_href`  VARCHAR(1024) NULL,
    `phones`     TEXT          NULL,   -- JSON array
    `emails`     TEXT          NULL,   -- JSON array
    `created_at` TIMESTAMP     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    PRIMARY KEY (`id`),
    KEY `idx_post_id`  (`post_id`),
    KEY `idx_username` (`username`),
    CONSTRAINT `fk_comments_post`
        FOREIGN KEY (`post_id`) REFERENCES `posts` (`id`)
        ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

-- ---------------------------------------------------------------------
-- groups: one row per scraped Facebook group page
-- ---------------------------------------------------------------------
DROP TABLE IF EXISTS `groups`;
CREATE TABLE `groups` (
    `id`            INT UNSIGNED NOT NULL AUTO_INCREMENT,
    `group_name`    VARCHAR(512)  NULL,
    `group_link`    VARCHAR(1024) NULL,
    `group_link_hash` CHAR(40)    NULL,   -- SHA1 of group_link for dedup
    `members`       VARCHAR(255)  NULL,   -- raw text like "12K members"
    `description`   MEDIUMTEXT    NULL,
    `raw_payload`   MEDIUMTEXT    NULL,
    `created_at`    TIMESTAMP     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    `updated_at`    TIMESTAMP     NOT NULL DEFAULT CURRENT_TIMESTAMP
                                  ON UPDATE CURRENT_TIMESTAMP,
    PRIMARY KEY (`id`),
    UNIQUE KEY `uniq_group_link_hash` (`group_link_hash`),
    KEY `idx_group_name`              (`group_name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

-- ---------------------------------------------------------------------
-- ingest_log: optional debugging log of every payload received
-- ---------------------------------------------------------------------
DROP TABLE IF EXISTS `ingest_log`;
CREATE TABLE `ingest_log` (
    `id`         INT UNSIGNED NOT NULL AUTO_INCREMENT,
    `endpoint`   VARCHAR(64)  NOT NULL,    -- "posts" or "groups"
    `item_count` INT UNSIGNED NOT NULL DEFAULT 0,
    `status`     VARCHAR(32)  NOT NULL,    -- "ok", "partial", "error"
    `message`    TEXT         NULL,
    `ip`         VARCHAR(64)  NULL,
    `created_at` TIMESTAMP    NOT NULL DEFAULT CURRENT_TIMESTAMP,
    PRIMARY KEY (`id`),
    KEY `idx_endpoint_created` (`endpoint`, `created_at`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

SET FOREIGN_KEY_CHECKS = 1;
