I have 2 tables
I want to insert new records into condensed_log_table but to condense the table into a smaller version i need to read the latest logs from the raw table and UNION it with the condensed_log_table since the algorithm on which logs to remove is based on previous logs.
Question:
How would I write this in Dataform? This is one of my many attempts
.sqlx file where I do the inserts into condensed_log_table
config {
type: "operations",
hasOutput: true,
}
pre_operations {
DECLARE timestamp_checkpoint TIMESTAMP;
...logic for getting latest timestamp
};
WITH old_and_new_states AS (
SELECT
id, insert_time, state
FROM ${ref("raw_logs")} AS state_log
WHERE inserted_at > timestamp_checkpoint
latest_events AS logs
UNION ALL
SELECT
id, insert_time, state
FROM ${ref("condensed_log_table")}
)
SELECT
*
FROM
old_and_new_states
QUALIFY state != LAG(state) OVER (PARTITION BY id ORDER BY inserted_at ASC)
OR LAG(state) OVER (PARTITION BY id ORDER BY inserted_at ASC) IS NULL
.sqlx file where I declare the condensed_log_table
config {
type: "operations",
name: "condensed_log_table",
hasOutput: true,
tags: [
"my_tag",
]
}
CREATE TABLE IF NOT EXISTS ${self()} (
state STRING,
inserted_at TIMESTAMP,
id STRING,
)
One of the many errors I get is