Symptoms
1st of every month is offset by 1
Expected
Semantically it should be a whole month (i.e. from the first day to the last day) in the configured timezone, at least it is what BigQuery’s date_diff
function do.
with data_srouce as (
select id, timestamp_seconds(timestamp) as timestamp
from `bigquery-public-data.samples.wikipedia`
where date(timestamp_seconds(timestamp), 'Asia/Tokyo') between date(2009, 12, 1) and date(2010, 4, 2)
)
select date(timestamp, 'Asia/Tokyo') as date
, date_diff(date(timestamp, 'Asia/Tokyo'), date(1970, 1, 1), month) as date_diff
, count(*) as count
from data_srouce
group by 1, 2
order by 1, 2
Screenshot
Environment
https://toreta.jp.looker.com
BigQuery
Asia/Tokyo
Custom Dimension
diff_months(date(1970, 1, 1), ${calendar.timestamp_date})
LookML
view: calendar {
derived_table: {
sql:
select id, timestamp
from `bigquery-public-data.samples.wikipedia`
where date(timestamp_seconds(timestamp), 'Asia/Tokyo') between date(2009, 12, 1) and date(2010, 4, 2)
;;
}
dimension: id {
label: "id"
type: number
primary_key: yes
sql: ${TABLE}.id ;;
}
dimension_group: timestamp {
label: "timestamp"
type: time
timeframes: [
raw,
time,
date,
week,
month,
quarter,
year
]
sql: timestamp_seconds(${TABLE}.timestamp) ;;
}
measure: measure_count {
type: count
}
}
I could be wrong but I think whenever you’ve put date(timestamp, 'Asia/Tokyo')
, you’re taking a UTC timestamp and telling BigQuery it’s an Asia/Tokyo timestamp. BigQuery is then converting it back to UTC.
Try not specifying timezone in the LookML - Looker will automatically convert it to your timezone at the point of explore/query.
In my understanding a timestamp is always stored in UTC (*1). The date(timestamp, 'Asia/Tokyo')
statement means “Extracts the DATE from a TIMESTAMP expression in the specified timezone.” (*2).
The diff_months()
function should respect the user timezone (apparently it is), but the problem here is that the date is somehow offset by 1 day. The same goes to extract_date
function.
References:
Could you provide the generated SQL from the Explore? by clicking the SQL tab on the screenshot you’ve provided
Sure.
-- raw sql results do not include filled-in values for 'calendar.timestamp_date'
WITH calendar AS (select id, timestamp
from `bigquery-public-data.samples.wikipedia`
where date(timestamp_seconds(timestamp), 'Asia/Tokyo') between date(2009, 12, 1) and date(2010, 4, 2)
)
SELECT * FROM (
SELECT *, DENSE_RANK() OVER (ORDER BY z___min_rank) as z___pivot_row_rank, RANK() OVER (PARTITION BY z__pivot_col_rank ORDER BY z___min_rank) as z__pivot_col_ordering, CASE WHEN z___min_rank = z___rank THEN 1 ELSE 0 END AS z__is_highest_ranked_cell FROM (
SELECT *, MIN(z___rank) OVER (PARTITION BY calendar_timestamp_date) as z___min_rank FROM (
SELECT *, RANK() OVER (ORDER BY calendar_timestamp_date ASC, z__pivot_col_rank) AS z___rank FROM (
SELECT *, DENSE_RANK() OVER (ORDER BY CASE WHEN diff_months IS NULL THEN 1 ELSE 0 END, diff_months) AS z__pivot_col_rank FROM (
SELECT
CAST((DATE_DIFF(CAST((TIMESTAMP(FORMAT_TIMESTAMP('%F %T', TIMESTAMP_TRUNC(CAST(TIMESTAMP(FORMAT_TIMESTAMP('%F %T', timestamp_seconds(calendar.timestamp) , 'Japan')) AS TIMESTAMP), DAY)), 'Japan')) AS DATE), CAST(CAST(CONCAT(CAST(CAST(1970 AS INT64) AS STRING), CAST('-' AS STRING), CAST(LPAD(CAST(CAST(1 AS INT64) AS STRING), 2, '0') AS STRING), CAST('-' AS STRING), CAST(LPAD(CAST(CAST(1 AS INT64) AS STRING), 2, '0') AS STRING), CAST(' 00:00:00' AS STRING)) AS TIMESTAMP) AS DATE), MONTH) + CASE WHEN TIMESTAMP_DIFF((TIMESTAMP(FORMAT_TIMESTAMP('%F %T', TIMESTAMP_TRUNC(CAST(TIMESTAMP(FORMAT_TIMESTAMP('%F %T', timestamp_seconds(calendar.timestamp) , 'Japan')) AS TIMESTAMP), DAY)), 'Japan')), TIMESTAMP_TRUNC(CAST((TIMESTAMP(FORMAT_TIMESTAMP('%F %T', TIMESTAMP_TRUNC(CAST(TIMESTAMP(FORMAT_TIMESTAMP('%F %T', timestamp_seconds(calendar.timestamp) , 'Japan')) AS TIMESTAMP), DAY)), 'Japan')) AS TIMESTAMP), MONTH), SECOND) = TIMESTAMP_DIFF(CAST(CONCAT(CAST(CAST(1970 AS INT64) AS STRING), CAST('-' AS STRING), CAST(LPAD(CAST(CAST(1 AS INT64) AS STRING), 2, '0') AS STRING), CAST('-' AS STRING), CAST(LPAD(CAST(CAST(1 AS INT64) AS STRING), 2, '0') AS STRING), CAST(' 00:00:00' AS STRING)) AS TIMESTAMP), TIMESTAMP_TRUNC(CAST(CAST(CONCAT(CAST(CAST(1970 AS INT64) AS STRING), CAST('-' AS STRING), CAST(LPAD(CAST(CAST(1 AS INT64) AS STRING), 2, '0') AS STRING), CAST('-' AS STRING), CAST(LPAD(CAST(CAST(1 AS INT64) AS STRING), 2, '0') AS STRING), CAST(' 00:00:00' AS STRING)) AS TIMESTAMP) AS TIMESTAMP), MONTH), SECOND) THEN 0 WHEN TIMESTAMP_DIFF((TIMESTAMP(FORMAT_TIMESTAMP('%F %T', TIMESTAMP_TRUNC(CAST(TIMESTAMP(FORMAT_TIMESTAMP('%F %T', timestamp_seconds(calendar.timestamp) , 'Japan')) AS TIMESTAMP), DAY)), 'Japan')), TIMESTAMP_TRUNC(CAST((TIMESTAMP(FORMAT_TIMESTAMP('%F %T', TIMESTAMP_TRUNC(CAST(TIMESTAMP(FORMAT_TIMESTAMP('%F %T', timestamp_seconds(calendar.timestamp) , 'Japan')) AS TIMESTAMP), DAY)), 'Japan')) AS TIMESTAMP), MONTH), SECOND) < TIMESTAMP_DIFF(CAST(CONCAT(CAST(CAST(1970 AS INT64) AS STRING), CAST('-' AS STRING), CAST(LPAD(CAST(CAST(1 AS INT64) AS STRING), 2, '0') AS STRING), CAST('-' AS STRING), CAST(LPAD(CAST(CAST(1 AS INT64) AS STRING), 2, '0') AS STRING), CAST(' 00:00:00' AS STRING)) AS TIMESTAMP), TIMESTAMP_TRUNC(CAST(CAST(CONCAT(CAST(CAST(1970 AS INT64) AS STRING), CAST('-' AS STRING), CAST(LPAD(CAST(CAST(1 AS INT64) AS STRING), 2, '0') AS STRING), CAST('-' AS STRING), CAST(LPAD(CAST(CAST(1 AS INT64) AS STRING), 2, '0') AS STRING), CAST(' 00:00:00' AS STRING)) AS TIMESTAMP) AS TIMESTAMP), MONTH), SECOND) THEN CASE WHEN CAST(CONCAT(CAST(CAST(1970 AS INT64) AS STRING), CAST('-' AS STRING), CAST(LPAD(CAST(CAST(1 AS INT64) AS STRING), 2, '0') AS STRING), CAST('-' AS STRING), CAST(LPAD(CAST(CAST(1 AS INT64) AS STRING), 2, '0') AS STRING), CAST(' 00:00:00' AS STRING)) AS TIMESTAMP) < (TIMESTAMP(FORMAT_TIMESTAMP('%F %T', TIMESTAMP_TRUNC(CAST(TIMESTAMP(FORMAT_TIMESTAMP('%F %T', timestamp_seconds(calendar.timestamp) , 'Japan')) AS TIMESTAMP), DAY)), 'Japan')) THEN -1 ELSE 0 END ELSE CASE WHEN CAST(CONCAT(CAST(CAST(1970 AS INT64) AS STRING), CAST('-' AS STRING), CAST(LPAD(CAST(CAST(1 AS INT64) AS STRING), 2, '0') AS STRING), CAST('-' AS STRING), CAST(LPAD(CAST(CAST(1 AS INT64) AS STRING), 2, '0') AS STRING), CAST(' 00:00:00' AS STRING)) AS TIMESTAMP) > (TIMESTAMP(FORMAT_TIMESTAMP('%F %T', TIMESTAMP_TRUNC(CAST(TIMESTAMP(FORMAT_TIMESTAMP('%F %T', timestamp_seconds(calendar.timestamp) , 'Japan')) AS TIMESTAMP), DAY)), 'Japan')) THEN 1 ELSE 0 END END) AS INT64) AS diff_months,
CAST(TIMESTAMP(FORMAT_TIMESTAMP('%F %T', timestamp_seconds(calendar.timestamp) , 'Japan')) AS DATE) AS calendar_timestamp_date,
COUNT(*) AS calendar_measure_count
FROM calendar
GROUP BY 1,2) ww
) bb WHERE z__pivot_col_rank <= 16384
) aa
) xx
) zz
WHERE (z__pivot_col_rank <= 50 OR z__is_highest_ranked_cell = 1) AND (z___pivot_row_rank <= 500 OR z__pivot_col_ordering = 1) ORDER BY z___pivot_row_rank