本方案的特点
- 周一开始,周日结束算一周
- 跨年的周,以周三所在年份作为这一周所在的年份
- 填写 END_DATE 和 START_DATE 两个参数,即可生成这段时间内的日历表。
- 节假日和调休工作日采用手动维护的方式。每年维护更新一次即可。
```SQL
create table if not exists dim.dim_calendar (
the_date DATE COMMENT '日期',
the_day STRING COMMENT '日',
the_month STRING COMMENT '月',
the_year STRING COMMENT '年',
week_name STRING COMMENT '周的名称',
nth_week_of_year STRING COMMENT '该年内第几周',
week_year STRING COMMENT '按周判断年份,跨年的周按照周三所在的年',
week_month STRING COMMENT '按周判断月份,跨月的周按照周三所在的月',
nth_day_of_week STRING COMMENT '周内第几天',
dayname_of_week STRING COMMENT '星期几',
is_weekday STRING COMMENT '是否非周末',
start_date_of_week STRING COMMENT '周开始日期',
end_date_of_week STRING COMMENT '周结束日期',
month_name STRING COMMENT '月份的名称',
start_date_of_month STRING COMMENT '月开始日期',
end_date_of_month STRING COMMENT '月结束日期',
quarter_name STRING COMMENT '季度的名称',
nth_quarter_of_year STRING COMMENT '该年内第几季度',
start_date_of_quarter STRING COMMENT '季度的开始日期',
end_date_of_quarter STRING COMMENT '季度的结束日期',
start_date_of_year STRING COMMENT '年的开始日期',
end_date_of_year STRING COMMENT '年的结束日期',
festival_name STRING COMMENT '节假日名称',
not_work STRING COMMENT '节假日放假和调休安排',
is_workday STRING COMMENT '是否工作日(已处理节假日)'
) COMMENT '日历表'
STORED AS orcfile
;
with festival as (
-- 手动维护好每年的节假日信息。
select
f.`date` as festival_date,
max(f.name) as festival_name,
max(f.not_work) as not_work
from (
-- 节假日和调休工作日
select array(
named_struct('date','2021-01-01','name','元旦','not_work','放假'),
named_struct('date','2021-01-02','name','元旦','not_work','放假'),
named_struct('date','2021-01-03','name','元旦','not_work','放假'),
named_struct('date','2021-02-11','name','春节','not_work','放假'),
named_struct('date','2021-02-12','name','春节','not_work','放假'),
named_struct('date','2021-02-13','name','春节','not_work','放假'),
named_struct('date','2021-02-14','name','春节','not_work','放假'),
named_struct('date','2021-02-15','name','春节','not_work','放假'),
named_struct('date','2021-02-16','name','春节','not_work','放假'),
named_struct('date','2021-02-17','name','春节','not_work','放假'),
named_struct('date','2021-02-07','name','春节','not_work','上班'),
named_struct('date','2021-02-20','name','春节','not_work','上班')
-- 可以不断往下补充新的节假日和调休日期,此处省略重复的内容
) as arr
)a
lateral view explode(arr) t as f
group by f.`date`
)
,base as (
-- 准备一些基础的日期数据
select
t1.the_date
,day(t1.the_date) as the_day
,month(t1.the_date) as the_month
,concat(year(t1.the_date), if(length(month(t1.the_date)) = 1, '年0', '年'), month(t1.the_date) , '月') as month_name -- 月份名称,格式 '2023年10月'
,year(t1.the_date) as the_year
,floor((month(the_date)-1)/3) + 1 as nth_quarter_of_year -- 一年中第几个季度
,concat(year(t1.the_date), '第', floor((month(the_date)-1)/3) + 1, '季度') as quarter_name
,weekofyear(the_date) as week_of_year -- 一年中第几周
,date_format(the_date, 'u') as day_of_week -- 一周中第几天,一周从周一开始
,datediff(the_date, '1900-01-01') as day_of_all -- 从 1900-01-01(刚好周一)以来第几天
,floor(datediff(the_date, '1900-01-01')/7) as week_of_all -- 从 1900-01-01(刚好周一)以来第几周
from (
SELECT date_add('${START_DATE}', t.pos) as the_date
from(select posexplode(split(repeat(',', datediff('${END_DATE}', '${START_DATE}')), ','))) t
) t1
)
insert overwrite table dim.dim_calendar
select
base.the_date
,base.the_day
,base.the_month
,base.the_year
,if(length(base.week_of_year) = 1 , concat(b.the_year, '年第0', base.week_of_year, '周'),concat(b.the_year, '年第', base.week_of_year, '周')) as week_name
,base.week_of_year as nth_week_of_year
,b.the_year as week_year
,b.the_month as week_month
,base.day_of_week as nth_day_of_week
,concat('周',
case
when base.day_of_week = 1 then '一'
when base.day_of_week = 2 then '二'
when base.day_of_week = 3 then '三'
when base.day_of_week = 4 then '四'
when base.day_of_week = 5 then '五'
when base.day_of_week = 6 then '六'
when base.day_of_week = 7 then '日'
end ) as dayname_of_week -- 星期几,格式 ‘周一’
,if(base.day_of_week in (6,7), 0 ,1) as is_weekday
,min(the_date) over(partition by base.week_of_all) as start_date_of_week
,max(the_date) over(partition by base.week_of_all) as end_date_of_week
,month_name
,min(the_date) over(partition by base.month_name) as start_date_of_month
,max(the_date) over(partition by base.month_name) as end_date_of_month
,base.quarter_name
,base.nth_quarter_of_year
,min(the_date) over(partition by base.quarter_name) as start_date_of_quarter
,max(the_date) over(partition by base.quarter_name) as end_date_of_quarter
,min(the_date) over(partition by base.the_year) as start_date_of_year
,max(the_date) over(partition by base.the_year) as end_date_of_year
,festival.festival_name
,festival.not_work
,case
when festival.not_work = '上班' then 1
when festival.not_work = '放假' then 0
else if(base.day_of_week in (6,7), 0 ,1)
end as is_workday
from base
left join (select
the_year
,the_month
,week_of_year
,week_of_all
from base
-- 以周三所在的年作为这一周所在的年
where day_of_week = 3) b on base.week_of_all = b.week_of_all
left join festival on base.the_date = festival.festival_date
;
```
- 参考了
https://blog.csdn.net/a_void/article/details/103680890?ydreferer=aHR0cHM6Ly93d3cuZ29vZ2xlLmNvbS8%3D?ydreferer=aHR0cHM6Ly93d3cuZ29vZ2xlLmNvbS8%3D