Hive综合应用案例——用户学历查询

[Hive综合应用案例——用户学历查询---------- 禁止修改 ----------drop database if exists mydb cascade;---------- 禁止修改 -------------------- begin -------------创建mydb数据库create database if not exists mydb;---使用mydb数据库use my

Deng872347348

8880人浏览 · 2020-12-14 10:56:25

Deng872347348 · 2020-12-14 10:56:25 发布

[Hive综合应用案例——用户学历查询

在这里插入图片描述

---------- 禁止修改 ----------
 drop database if exists mydb cascade;
---------- 禁止修改 ----------


---------- begin ----------
---创建mydb数据库

create database if not exists mydb;
---使用mydb数据库
use mydb;

---创建表user
create table   usertab(
    id string ,
    sex string  ,
    time string  ,
    education string  ,
    occupation string ,
    income string ,
    area string ,
    desired_area string,
    city_countryside string
)row format delimited fields terminated by ','; 




---导入数据：/root/data.txt
load data local inpath '/root/data.txt' into table usertab;

--查询每一个用户从出生到2019-06-10的总天数
select id,datediff('2019-06-10',regexp_replace(time,'/','-')) from usertab;




---------- end ----------
-- create table  if not exists usertab(
--     id string comment '用户id',
--     sex string comment '性别,f:女性,m:男性',
--     time string comment '出生日期',
--     education string comment '学历',
--     occupation string comment '职业',
--     income string comment '收入',
--     area string comment '出生地区',
--     desired_area string comment '向往地区',
--     city_countryside string comment '超市/农村'
-- )row format delimited fields terminated by ',';

在这里插入图片描述

---------- 禁止修改 ----------
 drop database if exists mydb cascade;
---------- 禁止修改 ----------


---------- begin ----------
--创建mydb数据库

create database if not exists mydb;
---使用mydb数据库
use mydb;

---创建表user
create table   usertab1(
    id int ,
    sex string  ,
    time string  ,
    education string  ,
    occupation string ,
    income string ,
    area string ,
    desired_area string,
    city_countryside string
)row format delimited fields terminated by ','; 




---导入数据：/root/data.txt
load data local inpath '/root/data1.txt' into table usertab1;



--同一个地区相同的教育程度的最高收入
select area,education,income from (
select area,education,income,
row_number() over(partition by area,education order by income desc)
as rn 
from usertab1
)a where a.rn=1;





---------- end ----------
-- select area,education,income from(
--     select area,education,income,
--     row_number() over(partition by area,education order by income desc) as rn
--     from usertab1
-- ) where a.rn=1;

在这里插入图片描述

---------- 禁止修改 ----------

 drop database if exists mydb cascade;
 set hive.mapred.mode=nonstrict;
---------- 禁止修改 ----------


---------- begin ----------
--创建mydb数据库

create database if not exists mydb;
---使用mydb数据库
use mydb;

---创建表user
create table   usertab2(
    id int ,
    sex string  ,
    time string  ,
    education string  ,
    occupation string ,
    income string ,
    area string ,
    desired_area string,
    city_countryside string
)row format delimited fields terminated by ','; 




---导入数据：/root/data.txt
load data local inpath '/root/data.txt' into table usertab2;



--统计各级学历所占总人数百分比(对结果保留两位小数)
select concat(round(a.cnted*100/b.cnt,2),'%'),a.education from(
    select count(*) as cnted,education from usertab2 group by education
)a ,(
    select count(*) as cnt from usertab2
)b order by a.education;



-- select concat(round(a.cnt*100/a.cnt_total*100,2),'%')as ct,education from(
--     select count(*) over(partition by education) as cnt,
-- count(*) over as cnt_total,education,row_number() over(partition by education)
-- )a where a.rn=1 order by education;

---------- end ----------

开放原子开发者工作坊

开放原子开发者工作坊旨在鼓励更多人参与开源活动，与志同道合的开发者们相互交流开发经验、分享开发心得、获取前沿技术趋势。工作坊有多种形式的开发者活动，如meetup、训练营等，主打技术交流，干货满满，真诚地邀请各位开发者共同参与！

更多推荐

第二届openEuler生态大会（中国·湖南）成功举办

10月30日，第二届openEuler生态大会（中国·湖南）成功举办。

开放原子开发者工作坊

“小满”安全车控操作系统正式在AtomGit开源

10月24日，由中国汽车工业协会指导，普华基础软件股份有限公司主办的“小满”安全车控操作系统开源发布会暨共建计划说明会成功举行。普华基础软件宣布将安全车控操作系统“小满”（简称“小满”）V24.10源代码正式在开放原子开源基金会（简称“基金会”）旗下AtomGit开源协作平台开源，并在AtomGit平