查询哪种电影类型最多(利用两次查询):
利用切片将电影类型进行分割:
SELECT explode(split(genres,'\\|'))as type from movies;
利用计数统计最多的电影类型:
SELECT type,COUNT(1)
FROM(
SELECT explode(split(genres,'\\|'))as type from movies
)t
GROUP BY type ORDER BY type DESC;
查询评分最高的电影:
先求电影平均分:
SELECT movieId, AVG(rating) as ratings
FROM ratings
GROUP BY movieId
ORDER BY ratings DESC LIMIT 10;
通过movieId连接movies表和ratings表进行连接统计查询:
SELECT * FROM(
SELECT movieId,AVG(rating) as avgratings
FROM ratings
GROUP BY movieId
ORDER BY avgratings DESC LIMIT 10
)t
LEFT JOIN movies m ON t.movieId = m.movieId;
查询评论人数最多的电影:
SELECT * FROM(
SELECT movieId,COUNT(rating) AS ratingcount FROM ratings
GROUP BY movieId
ORDER BY ratingcount
DESC LIMIT 10
)t
LEFT JOIN movies m on t.movieId = m.movieId;
查询年份(利用正则表达式去掉其余类似年份数据):
原始数据为
SELECT year,COUNT(1)
FROM(
SELECT regexp_extract(title,"\\((\\d{4})\\)") FROM movie_year WHERE title regexp'\\d{4}'
)t
GROUP BY year HAVING year<2019 AND year>1900;
查询评分最高的电影类型:
SELECT * FROM(
SELECT movieId,AVG(rating) as avgratings
FROM (SELECT movies.*,r.rating FROM(SELECT *,explode(split(genres,'\\|'))as type from movies) LEFT JOIN ratings r ON t.movieId = r.movieId)
GROUP BY type
ORDER BY avgratings DESC LIMIT 10;
高频词统计:
SELECT tag,COUNT(1) as total
FROM(
SELECT explode(split(tag,'\\s'))as tag from tags
)t
GROUP BY tag
ORDER BY total
DESC LIMIT 10;
版权声明:本文为qq_61786801原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。