{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "5f0276fe",
   "metadata": {},
   "source": [
    "# Sharing On-Demand Logs with RAI\n",
    "\n",
    "## Overview\n",
    "\n",
    "This notebook creates and shares a [secure-share view](https://docs.snowflake.com/en/user-guide/data-sharing-intro) with RAI to provide access to on-demand logs. It's designed to run as a [Snowflake Notebook](https://docs.snowflake.com/en/user-guide/ui-snowsight/notebooks), but it can be adapted to run as a local Jupyter notebook by replacing the call to `get_active_session()` with code to generate a `Session` object locally. See [Snowflake's documentation](https://docs.snowflake.com/en/developer-guide/snowpark/python/creating-session) for details.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ebb4e3e7",
   "metadata": {
    "language": "python"
   },
   "outputs": [],
   "source": [
    "from datetime import datetime, timedelta, date\n",
    "\n",
    "def get_date_range(start_date, end_date):\n",
    "    start = datetime.strptime(start_date, \"%Y-%m-%d\")\n",
    "    end = datetime.strptime(end_date, \"%Y-%m-%d\")\n",
    "    dates = [(start + timedelta(days=i)).strftime(\"%Y-%m-%d\")\n",
    "             for i in range((end - start).days + 1)]\n",
    "    return \"|\".join(dates)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a7dfc426",
   "metadata": {},
   "source": [
    "Edit this cell with your specific values:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ccf4e700",
   "metadata": {
    "language": "python"
   },
   "outputs": [],
   "source": [
    "# Override this to the date you want to get logs starting from in the format YYYY-MM-DD:\n",
    "start_date = date.today().strftime(\"%Y-%m-%d\")\n",
    "\n",
    "# Override this to the date you want to get logs until in the format YYYY-MM-DD\n",
    "end_date = date.today().strftime(\"%Y-%m-%d\")\n",
    "\n",
    "date_range = get_date_range(start_date, end_date)\n",
    "\n",
    "# Override this to True if you want to share spcs_control_plane logs\n",
    "include_erps_logs = False\n",
    "\n",
    "# Override this to the engine name (ex, 'testEngine') or leave it as it is if you want to get logs for all engines\n",
    "engine_name = \".*\"\n",
    "warehouse = \"\"\n",
    "\n",
    "# Override this to a unique id and share it with RAI\n",
    "id = \"\"\n",
    "\n",
    "# The account you want to share the logs with -- it should be in the format org.account_name\n",
    "event_sharing_account = \"\"\n",
    "\n",
    "# Your native app name (usually relationalai)\n",
    "native_app_name = \"relationalai\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e3f9f6f6",
   "metadata": {
    "language": "python"
   },
   "outputs": [],
   "source": [
    "engine_file_pattern = f'{engine_name}/clientlogs-.*({date_range}).*\\\\.json|{engine_name}/clientlogs-engine.json'\n",
    "erp_file_pattern = f'|clientlogs-cp-({date_range}).*\\\\.json|.*clientlogs-cp.json'\n",
    "if include_erps_logs:\n",
    "    file_pattern = f'.*({engine_file_pattern}{erp_file_pattern}).*'\n",
    "else:\n",
    "    file_pattern = f'.*({engine_file_pattern}).*'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3a304d9a",
   "metadata": {
    "language": "python"
   },
   "outputs": [],
   "source": [
    "sql_query = \"\"\"\n",
    "BEGIN\n",
    "USE ROLE ACCOUNTADMIN;\n",
    "USE WAREHOUSE &warehouse; -- Update to use another warehouse if necessary.\n",
    "\n",
    "CREATE DATABASE IF NOT EXISTS TELEMETRY_SHARING;\n",
    "USE DATABASE TELEMETRY_SHARING;\n",
    "CREATE SCHEMA IF NOT EXISTS LOGS;\n",
    "USE SCHEMA LOGS;\n",
    "\n",
    "--*****--\n",
    "-- Load staged data files to temporary tables\n",
    "--*****--\n",
    "CREATE OR REPLACE TABLE TELEMETRY_SHARING.LOGS.TELEMETRY_LOAD_TABLE_&id (\n",
    "  LOG_RECORD VARCHAR\n",
    ");\n",
    "\n",
    "CREATE OR REPLACE TABLE TELEMETRY_SHARING.LOGS.TELEMETRY_SHARE_TABLE_&id  (\n",
    "  TIMESTAMP TIMESTAMP,\n",
    "  OBSERVED_TIMESTAMP TIMESTAMP,\n",
    "  SPAN_ID VARCHAR,\n",
    "  TRACE_ID VARCHAR,\n",
    "  MESSAGE VARCHAR,\n",
    "  LOG_RECORD VARCHAR\n",
    ");\n",
    "\n",
    "--*****--\n",
    "-- Create secure view from table with target log records\n",
    "--*****--\n",
    "CREATE OR REPLACE SECURE VIEW TELEMETRY_SHARING.LOGS.TELEMETRY_SHARE_VIEW_&id\n",
    "COMMENT = 'View containing telemetry records to share with the RAI provider account'\n",
    "AS\n",
    "    SELECT *\n",
    "    FROM TELEMETRY_SHARING.LOGS.TELEMETRY_SHARE_TABLE_&id ;\n",
    "\n",
    "--*****--\n",
    "-- Share secure view with the RAI provider account\n",
    "--*****--\n",
    "CREATE OR REPLACE SHARE TELEMETRY_SHARE_&id;\n",
    "\n",
    "CREATE DATABASE ROLE IF NOT EXISTS TELEMETRY_SHARE_ROLE;\n",
    "GRANT USAGE ON DATABASE TELEMETRY_SHARING TO SHARE TELEMETRY_SHARE_&id;\n",
    "GRANT USAGE ON SCHEMA TELEMETRY_SHARING.LOGS TO DATABASE ROLE TELEMETRY_SHARE_ROLE;\n",
    "GRANT SELECT ON VIEW TELEMETRY_SHARE_VIEW_&id TO DATABASE ROLE TELEMETRY_SHARE_ROLE;\n",
    "GRANT DATABASE ROLE TELEMETRY_SHARE_ROLE TO SHARE TELEMETRY_SHARE_&id;\n",
    "\n",
    "ALTER SHARE TELEMETRY_SHARE_&id ADD ACCOUNTS = &event_sharing_account;\n",
    "\n",
    "COPY INTO TELEMETRY_SHARING.LOGS.TELEMETRY_LOAD_TABLE_&id\n",
    "FROM (\n",
    "    SELECT\n",
    "        $1 AS log_record\n",
    "    FROM @&native_app_name.app_state.client_log_stage\n",
    ")\n",
    "PATTERN =  '&file_pattern'\n",
    "FILE_FORMAT = (TYPE = JSON)\n",
    "ON_ERROR = CONTINUE; -- This will skip any log records that are invalid JSON.\n",
    "-- The output of the query will indicate if any records were skipped due to errors.\n",
    "\n",
    "\n",
    "-- Copy from TELEMETRY_LOAD_TABLE_&id into TELEMETRY_SHARE_TABLE_&id and remove safe logs\n",
    "INSERT INTO TELEMETRY_SHARING.LOGS.TELEMETRY_SHARE_TABLE_&id\n",
    "    SELECT\n",
    "        to_timestamp(timeUnixNano) as timestamp,\n",
    "        to_timestamp(observedTimeUnixNano) as observed_timestamp,\n",
    "        spanId,\n",
    "        traceId,\n",
    "        a.value:value:stringValue as message,\n",
    "        log_record\n",
    "    FROM (SELECT\n",
    "            value:timeUnixNano as timeUnixNano,\n",
    "            value:observedTimeUnixNano as observedTimeUnixNano,\n",
    "            value:spanId as spanId,\n",
    "            value:traceId as traceId,\n",
    "            value as log_record\n",
    "        FROM TELEMETRY_SHARING.LOGS.TELEMETRY_LOAD_TABLE_&id, LATERAL FLATTEN( INPUT => TRY_PARSE_JSON($1):resourceLogs[0]:scopeLogs[0]:logRecords, OUTER => TRUE )),\n",
    "        LATERAL FLATTEN( INPUT => log_record:body:kvlistValue:values, OUTER => TRUE) a, LATERAL FLATTEN( INPUT => log_record:attributes, OUTER => TRUE) b\n",
    "    WHERE a.VALUE:key = 'message'\n",
    "    and\n",
    "        (\n",
    "            ( -- engine unsafe logs\n",
    "                b.value:key = 'log.file.name'\n",
    "                and b.value:value:stringValue ='engine-unsafe.log'\n",
    "            )\n",
    "            or\n",
    "            (\n",
    "                -- erps unsafe logs\n",
    "                log_record not like '%___safe_to_log%'\n",
    "                and log_record not like '%engine-safe.log%'\n",
    "                and log_record like '%spcs_control_plane%'\n",
    "\n",
    "            )\n",
    "        );\n",
    "END;\n",
    "\"\"\"\n",
    "sql_query = (\n",
    "    sql_query\n",
    "        .replace('&warehouse', warehouse)\n",
    "        .replace('&id', id)\n",
    "        .replace('&event_sharing_account', event_sharing_account)\n",
    "        .replace('&date_range', date_range)\n",
    "        .replace('&native_app_name', native_app_name)\n",
    "        .replace('&file_pattern', file_pattern)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "32bfc249",
   "metadata": {
    "language": "python"
   },
   "outputs": [],
   "source": [
    "# if running as a Snowflake notebook:\n",
    "session = get_active_session()\n",
    "\n",
    "# # if running as a local Jupyter notebook:\n",
    "# from snowflake.snowpark import Session\n",
    "#\n",
    "# connection_parameters = {\n",
    "#    \"account\": \"<your snowflake account>\",\n",
    "#    \"user\": \"<your snowflake user>\",\n",
    "#    \"password\": \"<your snowflake password>\",\n",
    "#    \"role\": \"<your snowflake role>\",  # optional\n",
    "#    \"warehouse\": \"<your snowflake warehouse>\",  # optional\n",
    "#    \"database\": \"<your snowflake database>\",  # optional\n",
    "#    \"schema\": \"<your snowflake schema>\",  # optional\n",
    "# }\n",
    "#\n",
    "# session = Session.builder.configs(connection_parameters).create()\n",
    "\n",
    "session.sql(sql_query).collect()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "da34db12",
   "metadata": {},
   "source": [
    "Run the following cell to confirm that the logs are copied to the secure share:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2f1b0261",
   "metadata": {
    "language": "python"
   },
   "outputs": [],
   "source": [
    "session.sql(\"USE ROLE ACCOUNTADMIN\").collect()\n",
    "session.sql(f\"USE WAREHOUSE {warehouse}\").collect()\n",
    "query = f\"SELECT * FROM TELEMETRY_SHARING.LOGS.TELEMETRY_SHARE_TABLE_{id} LIMIT 5\"\n",
    "session.sql(query).collect()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Streamlit Notebook",
   "name": "streamlit"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
