Hive综合应用案例——用户学历查询
[Hive综合应用案例——用户学历查询---------- 禁止修改 ----------drop database if exists mydb cascade;---------- 禁止修改 -------------------- begin -------------创建mydb数据库create database if not exists mydb;---使用mydb数据库use my
·
[Hive综合应用案例——用户学历查询
---------- 禁止修改 ----------
drop database if exists mydb cascade;
---------- 禁止修改 ----------
---------- begin ----------
---创建mydb数据库
create database if not exists mydb;
---使用mydb数据库
use mydb;
---创建表user
create table usertab(
id string ,
sex string ,
time string ,
education string ,
occupation string ,
income string ,
area string ,
desired_area string,
city_countryside string
)row format delimited fields terminated by ',';
---导入数据:/root/data.txt
load data local inpath '/root/data.txt' into table usertab;
--查询每一个用户从出生到2019-06-10的总天数
select id,datediff('2019-06-10',regexp_replace(time,'/','-')) from usertab;
---------- end ----------
-- create table if not exists usertab(
-- id string comment '用户id',
-- sex string comment '性别,f:女性,m:男性',
-- time string comment '出生日期',
-- education string comment '学历',
-- occupation string comment '职业',
-- income string comment '收入',
-- area string comment '出生地区',
-- desired_area string comment '向往地区',
-- city_countryside string comment '超市/农村'
-- )row format delimited fields terminated by ',';
---------- 禁止修改 ----------
drop database if exists mydb cascade;
---------- 禁止修改 ----------
---------- begin ----------
--创建mydb数据库
create database if not exists mydb;
---使用mydb数据库
use mydb;
---创建表user
create table usertab1(
id int ,
sex string ,
time string ,
education string ,
occupation string ,
income string ,
area string ,
desired_area string,
city_countryside string
)row format delimited fields terminated by ',';
---导入数据:/root/data.txt
load data local inpath '/root/data1.txt' into table usertab1;
--同一个地区相同的教育程度的最高收入
select area,education,income from (
select area,education,income,
row_number() over(partition by area,education order by income desc)
as rn
from usertab1
)a where a.rn=1;
---------- end ----------
-- select area,education,income from(
-- select area,education,income,
-- row_number() over(partition by area,education order by income desc) as rn
-- from usertab1
-- ) where a.rn=1;
---------- 禁止修改 ----------
drop database if exists mydb cascade;
set hive.mapred.mode=nonstrict;
---------- 禁止修改 ----------
---------- begin ----------
--创建mydb数据库
create database if not exists mydb;
---使用mydb数据库
use mydb;
---创建表user
create table usertab2(
id int ,
sex string ,
time string ,
education string ,
occupation string ,
income string ,
area string ,
desired_area string,
city_countryside string
)row format delimited fields terminated by ',';
---导入数据:/root/data.txt
load data local inpath '/root/data.txt' into table usertab2;
--统计各级学历所占总人数百分比(对结果保留两位小数)
select concat(round(a.cnted*100/b.cnt,2),'%'),a.education from(
select count(*) as cnted,education from usertab2 group by education
)a ,(
select count(*) as cnt from usertab2
)b order by a.education;
-- select concat(round(a.cnt*100/a.cnt_total*100,2),'%')as ct,education from(
-- select count(*) over(partition by education) as cnt,
-- count(*) over as cnt_total,education,row_number() over(partition by education)
-- )a where a.rn=1 order by education;
---------- end ----------
开放原子开发者工作坊旨在鼓励更多人参与开源活动,与志同道合的开发者们相互交流开发经验、分享开发心得、获取前沿技术趋势。工作坊有多种形式的开发者活动,如meetup、训练营等,主打技术交流,干货满满,真诚地邀请各位开发者共同参与!
更多推荐
已为社区贡献1条内容
所有评论(0)