1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
|
from dataclasses import dataclass
import datetime
import pymysql
import os
@dataclass
class PayGapDatabase:
host: str = "db"
user: str = "root"
passwd: str = None
db: str = "paygap"
port: int = 3306
def __enter__(self):
if self.passwd is None:
self.passwd = os.environ["MYSQL_ROOT_PASSWORD"]
try:
self.__connection = self.__get_connection()
except Exception as e:
print(e)
if e.args[0] == 1049:
self.__connection = self.__build_db()
return self
def __exit__(self, type, value, traceback):
self.__connection.close()
def __get_connection(self):
return pymysql.connect(
host = self.host,
port = self.port,
user = self.user,
passwd = self.passwd,
charset = "utf8mb4",
database = self.db
)
def __build_db(self):
print("Building database...")
self.__connection = pymysql.connect(
host = self.host,
port = self.port,
user = self.user,
passwd = self.passwd,
charset = "utf8mb4",
)
with self.__connection.cursor() as cursor:
# unsafe:
cursor.execute("CREATE DATABASE %s" % self.db)
cursor.execute("USE %s" % self.db)
cursor.execute("""
CREATE TABLE sic_sections(
sic_section_id CHAR(1) NOT NULL PRIMARY KEY,
sic_section_name VARCHAR(128) NOT NULL
);
""")
cursor.execute("""
CREATE TABLE sic(
sic_code INT UNSIGNED NOT NULL PRIMARY KEY,
sic_description VARCHAR(512) NOT NULL,
sic_section CHAR(1) NOT NULL,
FOREIGN KEY (sic_section) REFERENCES sic_sections(sic_section_id)
);
""")
cursor.execute("""
CREATE TABLE employer(
company_number CHAR(8) NOT NULL PRIMARY KEY,
name VARCHAR(512) NOT NULL,
address TEXT NOT NULL,
postcode VARCHAR(8) NOT NULL,
policy_link VARCHAR(256) NULL,
responsible_person VARCHAR(128) NOT NULL,
size VARCHAR(20) NOT NULL,
current_name VARCHAR(512) NULL,
status VARCHAR(32) NULL,
type_ VARCHAR(128) NULL,
incorporated DATETIME NULL
)
""")
cursor.execute("""
CREATE TABLE employer_sic(
company_number CHAR(8) NOT NULL,
sic_code INT UNSIGNED NOT NULL,
PRIMARY KEY (company_number, sic_code),
FOREIGN KEY (company_number) REFERENCES employer(company_number),
FOREIGN KEY (sic_code) REFERENCES sic(sic_code)
);
""")
cursor.execute("""
CREATE TABLE pay(
pay_id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
company_number CHAR(8) NOT NULL,
source VARCHAR(64) NOT NULL,
date_submitted DATETIME NOT NULL,
DiffMeanHourlyPercent DECIMAL(8,3) NOT NULL,
DiffMedianHourlyPercent DECIMAL(8,3) NOT NULL,
DiffMeanBonusPercent DECIMAL(8,3) NOT NULL,
DiffMedianBonusPercent DECIMAL(8,3) NOT NULL,
MaleBonusPercent DECIMAL(8,3) NOT NULL,
FemaleBonusPercent DECIMAL(8,3) NOT NULL,
MaleLowerQuartile DECIMAL(8,3) NOT NULL,
FemaleLowerQuartile DECIMAL(8,3) NOT NULL,
MaleLowerMiddleQuartile DECIMAL(8,3) NOT NULL,
FemaleLowerMiddleQuartile DECIMAL(8,3) NOT NULL,
MaleUpperMiddleQuartile DECIMAL(8,3) NOT NULL,
FemaleUpperMiddleQuartile DECIMAL(8,3) NOT NULL,
MaleTopQuartile DECIMAL(8,3) NOT NULL,
FemaleTopQuartile DECIMAL(8,3) NOT NULL,
FOREIGN KEY (company_number) REFERENCES employer(company_number)
);
""")
self.__connection.commit()
return self.__connection
def append_sic_sections(self, section_id, description):
# print("Section ID: '%s', Description: '%s'" % (section_id, description))
with self.__connection.cursor() as cursor:
cursor.execute("""
INSERT INTO sic_sections VALUES (%s, %s) ON DUPLICATE KEY UPDATE sic_section_name = %s;
""", (section_id, description, description))
self.__connection.commit()
def append_sic(self, code, description, section_id):
print("Appended code %d (%s) under section %s" % (code, description, section_id))
with self.__connection.cursor() as cursor:
cursor.execute("""
INSERT INTO sic VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE sic_description = %s, sic_section = %s;
""", (code, description, section_id, description, section_id))
self.__connection.commit()
def append_employer(self, company_number, name, address, postcode, policy_link, responsible_person, size, current_name, \
status, type_, incorporated, sics):
# print("incorporated: %s" % str(incorporated))
# print("sics", sics)
# print("name: %s" % name)
with self.__connection.cursor() as cursor:
cursor.execute("""
INSERT INTO employer (company_number, name, address, postcode, policy_link, responsible_person, size, current_name, status, type_, incorporated)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
name = %s, address = %s, postcode = %s, policy_link = %s, responsible_person = %s, size = %s,
current_name = %s, status = %s, type_ = %s, incorporated = %s;
""", (
company_number, name, address, postcode, policy_link, responsible_person, size, current_name, status, type_, incorporated,
name, address, postcode, policy_link, responsible_person, size, current_name, status, type_, incorporated
))
# sql = """INSERT INTO employer (company_number, name, address, postcode, policy_link, responsible_person, size, current_name, status, type_, incorporated)
# VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');""" % (
# company_number, name, address, postcode, policy_link, responsible_person, size, current_name, status, type_, incorporated
# )
# print(sql)
self.append_employer_sics(company_number, sics)
self.__connection.commit()
def append_pay_info(self, company_number, source, date_submitted, diff_mean_hourly_percent, diff_median_hourly_percent, \
diff_mean_bonus_percent, diff_median_bonus_percent, male_bonus_percent, female_bonus_percent, male_lower_quartile, \
female_lower_quartile, male_lower_middle_quartile, female_lower_middle_quartile, male_upper_middle_quartile, \
female_upper_middle_quartile, male_top_quartile, female_top_quartile):
try:
float(diff_mean_hourly_percent)
except ValueError:
diff_mean_hourly_percent = None
with self.__connection.cursor() as cursor:
cursor.execute("DELETE FROM pay WHERE company_number = %s AND source = %s;", (company_number, source))
try:
cursor.execute("""
INSERT INTO pay (company_number, source, date_submitted, DiffMeanHourlyPercent, DiffMedianHourlyPercent,
DiffMeanBonusPercent, DiffMedianBonusPercent, MaleBonusPercent, FemaleBonusPercent, MaleLowerQuartile,
FemaleLowerQuartile, MaleLowerMiddleQuartile, FemaleLowerMiddleQuartile, MaleUpperMiddleQuartile,
FemaleUpperMiddleQuartile, MaleTopQuartile, FemaleTopQuartile) VALUES (
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
);
""", (
company_number, source, date_submitted, diff_mean_hourly_percent, diff_median_hourly_percent,
diff_mean_bonus_percent, diff_median_bonus_percent, male_bonus_percent, female_bonus_percent, male_lower_quartile,
female_lower_quartile, male_lower_middle_quartile, female_lower_middle_quartile, male_upper_middle_quartile,
female_upper_middle_quartile, male_top_quartile, female_top_quartile
))
except pymysql.err.DataError:
return
self.__connection.commit()
def append_employer_sics(self, company_number, sics):
with self.__connection.cursor() as cursor:
cursor.execute("DELETE FROM employer_sic WHERE company_number = %s", (company_number, ))
for sic in sics:
cursor.execute("SELECT * FROM sic WHERE sic_code = %s", (sic, ))
if cursor.fetchone() != None:
cursor.execute("INSERT INTO employer_sic VALUES (%s, %s);", (company_number, sic))
|