自学内容网 自学内容网

数据仓库技术及应用(练习1)

1.创表

(1)customers.csv

CREATE EXTERNAL TABLE IF NOT EXISTS customers (  
customer_id int,  
customer_fname varchar(45),  
customer_lname varchar(45),  
customer_email varchar(45),  
customer_password varchar(45),  
customer_street varchar(255),   
customer_city varchar(45),
customer_state varchar(45),  
customer_zipcode varchar(45) 
)
ROW FORMAT  serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")  
LOCATION '/data/retail_db/customers';

(2)categories.csv

CREATE EXTERNAL TABLE IF NOT EXISTS categories (  
category_id int,  
category_department_id int,  
category_name varchar(45)
) 
ROW FORMAT  serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")  
LOCATION '/data/retail_db/categories';
CREATE EXTERNAL TABLE IF NOT EXISTS departments (  
department_id int,
department_name varchar(45)
) 
ROW FORMAT  serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")  
LOCATION '/data/retail_db/departments';

(3)order_items.csv

CREATE EXTERNAL TABLE IF NOT EXISTS order_items (  
order_item_id int,
order_item_order_id int,  
order_item_product_id int,  
order_item_quantity int,  
order_item_subtotal float,  
order_item_product_price float)
ROW FORMAT  serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")  
LOCATION '/data/retail_db/order_items';

(4)orders.csv

CREATE EXTERNAL TABLE IF NOT EXISTS orders (  
order_id int,  
order_date date,  
order_customer_id int,  
order_status varchar(45)
)
ROW FORMAT  serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")  
LOCATION '/data/retail_db/orders';

(5)products.csv

CREATE EXTERNAL TABLE IF NOT EXISTS products (  
product_id int,  
product_category_id int,
product_name varchar(45),  
product_description varchar(255),  
product_price float,
product_image varchar(255)) 
ROW FORMAT  serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties ("separatorChar"=",")  
LOCATION '/data/retail_db/products';

2.Hive内置函数操作

  • 在商品表products中,使用字符函数对商品名称product_name进行截取并输出
  • 要求商品名称不高于x个字符
  • 其中x取第10个字符向后最近空格的位置

关键步骤:

Substr()可以实现字符截取

Locate()可以实现x值的获取 

select substr(a.product_name,0,a.loc)
from
(select product_name,locate(" ",product_name,10) as loc
from products) a limit 5;
  • 使用Streaming将顾客表中的顾客名称转换为大写

关键步骤: 运用Shell Sed完成操作转换 sed’/s[a-z]/\u&/g’,可将匹配的小写字符转换为大写

select transform(customer_lname) 
using "/bin/sed '/s[a-z]/\u&/g' " as lname from customers limit 10;


原文地址:https://blog.csdn.net/2301_79744714/article/details/139626253

免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!