-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlayoffs.sql
More file actions
125 lines (97 loc) · 2.48 KB
/
layoffs.sql
File metadata and controls
125 lines (97 loc) · 2.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
select *
from noah1.layoffs;
create table noah1.layoffs_1
like noah1.layoffs;
insert noah1.layoffs_1
select *
from noah1.layoffs;
-- Identifying and removing duplicates --
with temp as
(select *,
row_number() over(partition by company, location, industry, total_laid_off, percentage_laid_off, date, stage, country, funds_raised_millions) as row_num
from noah1.layoffs_1)
select *
from temp
where row_num > '1';
use noah1;
create table `layoffs_2` (
`company` text,
`location` text,
`industry` text,
`total_laid_off` int DEFAULT NULL,
`percentage_laid_off` text,
`date` text,
`stage` text,
`country` text,
`funds_raised_millions` int DEFAULT NULL,
`row_num` int
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
insert into noah1.layoffs_2
select *,
row_number() over(partition by company, location, industry, total_laid_off, percentage_laid_off, date, stage, country, funds_raised_millions) as row_num
from noah1.layoffs_1;
DELETE
from noah1.layoffs_2
where row_num > '1';
select *
from noah1.layoffs_2;
-- Data standardization and formatting --
select *
from noah1.layoffs_2;
select company, trim(company)
from noah1.layoffs_2;
update noah1.layoffs_2
set company = trim(company);
select distinct industry
from noah1.layoffs_2
order by 1;
select *
from noah1.layoffs_2
where industry like '%Crypto%';
update layoffs_2
set industry = 'Crypto'
where industry LIKE '%Crypto%';
select distinct country
from layoffs_2
order by 1;
select distinct country, trim(trailing '.' from country)
from layoffs_2
order by 1;
update layoffs_2
set country = trim(trailing '.' from country);
select date,
str_to_date(date, '%m/%d/%Y')
from layoffs_2;
update layoffs_2
set date = str_to_date(date, '%m/%d/%Y');
alter table layoffs_2
modify column date date;
-- Handling null and blank values --
select *
from layoffs_2
where industry is null
or industry = '' ;
update layoffs_2
set industry = null
where industry = '';
select temp1.industry, temp2.industry
from layoffs_2 as temp1
join layoffs_2 as temp2
on temp1.company = temp2.company
where temp1.industry is not null
and temp2.industry is null;
update layoffs_2 temp1
join layoffs_2 as temp2
on temp1.company = temp2.company
set temp2.industry = temp1.industry
where temp1.industry is not null
and temp2.industry is null;
-- Removing rows and colums not needed --
delete
from layoffs_2
where total_laid_off is null
and percentage_laid_off is null;
alter table layoffs_2
drop column row_num;
select *
from layoffs_2